electric-horses-infra/stacks/eh-search/app/search/static_pages.py

195 lines
6.8 KiB
Python
Raw Normal View History

"""Static page registry + tag-to-page bridge.
Hardcoded list of top-level Astro pages that aren't in Directus.
The tag bridge maps blog tags (real, curated by the team) to these pages,
turning the blog content taxonomy into a search vocabulary for the site.
"""
from dataclasses import dataclass, field
from typing import Iterable
@dataclass
class StaticPage:
slug: str # URL path, e.g. "/werkstatt"
title: str # Display title
snippet: str # Description shown in search results
keywords: list[str] = field(default_factory=list) # Direct synonyms
# Top-level pages of electric-horses.de that live as Astro routes,
# not in Directus. Order matters for tie-breaking (earlier = preferred).
STATIC_PAGES: list[StaticPage] = [
StaticPage(
slug="/werkstatt",
title="Werkstatt",
snippet="Spezialwerkstatt fuer E-Roller und E-Motorraeder. Inspektion, Reparatur, Akku-Service.",
keywords=["werkstatt", "reparatur", "service", "inspektion", "wartung"],
),
StaticPage(
slug="/ersatzteile",
title="Ersatzteile",
snippet="Original-Ersatzteile und Zubehoer fuer E-Roller, E-Motorraeder und E-Scooter.",
keywords=["ersatzteile", "teile", "zubehoer", "originalteile"],
),
StaticPage(
slug="/fahrzeuge",
title="Fahrzeuge",
snippet="Unser kompletter Bestand an E-Rollern, E-Motorraedern und E-Autos.",
keywords=["fahrzeuge", "bestand", "angebot", "uebersicht", "alle"],
),
StaticPage(
slug="/marken",
title="Marken",
snippet="Alle Marken im Ueberblick: Askoll, RAY, ZERO, Energica, Nissan und mehr.",
keywords=["marken", "hersteller", "brands"],
),
StaticPage(
slug="/vermietung",
title="Vermietung",
snippet="E-Motorrad-Vermietung in Wendelstein bei Nuernberg. Tagestouren, Wochenmiete.",
keywords=["vermietung", "mieten", "leihen", "verleih", "rental"],
),
StaticPage(
slug="/kontakt",
title="Kontakt",
snippet="Kontakt zu Electric Horses: Telefon, E-Mail, Anfahrt, Oeffnungszeiten.",
keywords=["kontakt", "anfahrt", "adresse", "telefon", "email", "oeffnungszeiten"],
),
StaticPage(
slug="/ueber-uns",
title="Ueber uns",
snippet="Electric Horses ist die E-Mobilitaets-Sparte des Autohaus Richter & Zech.",
keywords=["ueber uns", "team", "geschichte", "richter zech", "wendelstein"],
),
StaticPage(
slug="/blog",
title="Blog",
snippet="News, Ratgeber und Erfahrungsberichte rund um Elektromobilitaet.",
keywords=["blog", "news", "artikel", "ratgeber"],
),
]
# Tag → Page bridge.
# These tags exist on real blog posts; matching one of them surfaces the
# linked page (in addition to the blog posts that carry the tag).
# Source: actual tag pool extracted from blog_posts.tags.
TAG_TO_PAGE_BRIDGE: dict[str, str] = {
# ─── Werkstatt-Themen (Technik, Service, Reifen, Akku) ───────
"akku": "/werkstatt",
"akkuladung": "/werkstatt",
"ladedauer": "/werkstatt",
"reichweite": "/werkstatt",
"verbrauch": "/werkstatt",
"bremsen": "/werkstatt",
"reifen": "/werkstatt",
"metzeler": "/werkstatt",
"pirelli": "/werkstatt",
"heidenau": "/werkstatt",
"wartung": "/werkstatt",
"service": "/werkstatt",
"installation": "/werkstatt",
"montage": "/werkstatt",
"werkzeug": "/werkstatt",
"trittbrettverbreiterung": "/werkstatt",
"radnabenmotor": "/werkstatt",
"propilot": "/werkstatt",
"effizienz": "/werkstatt",
"ledersitze": "/werkstatt",
# ─── Beratung & Foerderung (Kontakt) ─────────────────────────
"versicherung": "/kontakt",
"foerderung": "/kontakt",
"foerderungen": "/kontakt",
"praemie": "/kontakt",
"preisvorteil": "/kontakt",
"innovationspraemie": "/kontakt",
"umweltpraemie": "/kontakt",
"kosten": "/kontakt",
"vergleich": "/kontakt",
"vorfuehrer": "/kontakt",
"preis": "/kontakt",
"goelectric": "/kontakt",
# ─── Fuehrerschein & Legales (Werkstatt-Section "Recht") ─────
"abe": "/werkstatt",
"ekfv": "/werkstatt",
"klassea1": "/werkstatt",
"klasseb": "/werkstatt",
"schluesselzahl196": "/werkstatt",
"fuehrerschein": "/werkstatt",
"strassenzulassung": "/werkstatt",
"verkehrsregeln": "/werkstatt",
"verordnung": "/werkstatt",
"legal": "/werkstatt",
"ekickroller": "/werkstatt",
"ekfv": "/werkstatt",
# ─── Vermietung-Themen ───────────────────────────────────────
"touristik": "/vermietung",
"freizeit": "/vermietung",
# ─── Marken-Indikatoren (linken auf /marken) ─────────────────
"marknunabhaengig": "/marken",
}
def search_static_pages(query: str, limit: int = 5) -> list[dict]:
"""Search static pages by title, slug, keywords, snippet substring.
Returns dicts in the same shape as content search results.
"""
q = (query or "").strip().lower()
if not q:
return []
results = []
for page in STATIC_PAGES:
score = 0.0
# Exact title match -> very high
if page.title.lower() == q:
score = 10.0
# Title prefix
elif page.title.lower().startswith(q):
score = 7.0
# Slug match (without leading slash)
elif page.slug.lstrip("/").lower() == q:
score = 9.0
# Title substring (only for queries >= 3 chars to avoid noise like 'ze' in 'fahrzeuge')
elif len(q) >= 3 and q in page.title.lower():
score = 5.0
# Keyword exact / prefix
elif any(kw == q for kw in page.keywords):
score = 6.0
elif any(kw.startswith(q) for kw in page.keywords):
score = 4.0
# Snippet substring (weakest, also requires >= 3 chars)
elif len(q) >= 3 and q in page.snippet.lower():
score = 2.0
if score > 0:
results.append({
"type": "page",
"title": page.title,
"slug": page.slug,
"snippet": page.snippet,
"score": score,
"matched_via": "static_page",
})
results.sort(key=lambda r: -r["score"])
return results[:limit]
def get_pages_for_tags(tags: Iterable[str]) -> set[str]:
"""Given an iterable of blog tags, return the set of page slugs they map to."""
if not tags:
return set()
return {TAG_TO_PAGE_BRIDGE[t.lower()] for t in tags if t and t.lower() in TAG_TO_PAGE_BRIDGE}
def page_by_slug(slug: str) -> StaticPage | None:
for p in STATIC_PAGES:
if p.slug == slug:
return p
return None