Mirrors /opt/ai-apps/eh-search/ on the server, including the full FastAPI app (intent routing, FTS+fuzzy+substring hybrid, multi-source federation across vehicles + blog + brands + pages + static + tag bridge), SQL schema (Postgres materialized view with german_unaccent text search, pg_trgm for fuzzy), Dockerfile and compose. Sanitized the hardcoded password in sql/01_init.sql — replaced with REPLACE_ME_BEFORE_APPLYING placeholder since this repo is public. The eh-search service binds only on the private network (10.0.0.8:8200) and is reachable only via Pegasus nginx proxy at /api/search. Refs OP#1094 OP#1105 OP#1112 OP#1116 OP#1117
194 lines
6.8 KiB
Python
194 lines
6.8 KiB
Python
"""Static page registry + tag-to-page bridge.
|
|
|
|
Hardcoded list of top-level Astro pages that aren't in Directus.
|
|
The tag bridge maps blog tags (real, curated by the team) to these pages,
|
|
turning the blog content taxonomy into a search vocabulary for the site.
|
|
"""
|
|
from dataclasses import dataclass, field
|
|
from typing import Iterable
|
|
|
|
|
|
@dataclass
|
|
class StaticPage:
|
|
slug: str # URL path, e.g. "/werkstatt"
|
|
title: str # Display title
|
|
snippet: str # Description shown in search results
|
|
keywords: list[str] = field(default_factory=list) # Direct synonyms
|
|
|
|
|
|
# Top-level pages of electric-horses.de that live as Astro routes,
|
|
# not in Directus. Order matters for tie-breaking (earlier = preferred).
|
|
STATIC_PAGES: list[StaticPage] = [
|
|
StaticPage(
|
|
slug="/werkstatt",
|
|
title="Werkstatt",
|
|
snippet="Spezialwerkstatt fuer E-Roller und E-Motorraeder. Inspektion, Reparatur, Akku-Service.",
|
|
keywords=["werkstatt", "reparatur", "service", "inspektion", "wartung"],
|
|
),
|
|
StaticPage(
|
|
slug="/ersatzteile",
|
|
title="Ersatzteile",
|
|
snippet="Original-Ersatzteile und Zubehoer fuer E-Roller, E-Motorraeder und E-Scooter.",
|
|
keywords=["ersatzteile", "teile", "zubehoer", "originalteile"],
|
|
),
|
|
StaticPage(
|
|
slug="/fahrzeuge",
|
|
title="Fahrzeuge",
|
|
snippet="Unser kompletter Bestand an E-Rollern, E-Motorraedern und E-Autos.",
|
|
keywords=["fahrzeuge", "bestand", "angebot", "uebersicht", "alle"],
|
|
),
|
|
StaticPage(
|
|
slug="/marken",
|
|
title="Marken",
|
|
snippet="Alle Marken im Ueberblick: Askoll, RAY, ZERO, Energica, Nissan und mehr.",
|
|
keywords=["marken", "hersteller", "brands"],
|
|
),
|
|
StaticPage(
|
|
slug="/vermietung",
|
|
title="Vermietung",
|
|
snippet="E-Motorrad-Vermietung in Wendelstein bei Nuernberg. Tagestouren, Wochenmiete.",
|
|
keywords=["vermietung", "mieten", "leihen", "verleih", "rental"],
|
|
),
|
|
StaticPage(
|
|
slug="/kontakt",
|
|
title="Kontakt",
|
|
snippet="Kontakt zu Electric Horses: Telefon, E-Mail, Anfahrt, Oeffnungszeiten.",
|
|
keywords=["kontakt", "anfahrt", "adresse", "telefon", "email", "oeffnungszeiten"],
|
|
),
|
|
StaticPage(
|
|
slug="/ueber-uns",
|
|
title="Ueber uns",
|
|
snippet="Electric Horses ist die E-Mobilitaets-Sparte des Autohaus Richter & Zech.",
|
|
keywords=["ueber uns", "team", "geschichte", "richter zech", "wendelstein"],
|
|
),
|
|
StaticPage(
|
|
slug="/blog",
|
|
title="Blog",
|
|
snippet="News, Ratgeber und Erfahrungsberichte rund um Elektromobilitaet.",
|
|
keywords=["blog", "news", "artikel", "ratgeber"],
|
|
),
|
|
]
|
|
|
|
|
|
# Tag → Page bridge.
|
|
# These tags exist on real blog posts; matching one of them surfaces the
|
|
# linked page (in addition to the blog posts that carry the tag).
|
|
# Source: actual tag pool extracted from blog_posts.tags.
|
|
TAG_TO_PAGE_BRIDGE: dict[str, str] = {
|
|
# ─── Werkstatt-Themen (Technik, Service, Reifen, Akku) ───────
|
|
"akku": "/werkstatt",
|
|
"akkuladung": "/werkstatt",
|
|
"ladedauer": "/werkstatt",
|
|
"reichweite": "/werkstatt",
|
|
"verbrauch": "/werkstatt",
|
|
"bremsen": "/werkstatt",
|
|
"reifen": "/werkstatt",
|
|
"metzeler": "/werkstatt",
|
|
"pirelli": "/werkstatt",
|
|
"heidenau": "/werkstatt",
|
|
"wartung": "/werkstatt",
|
|
"service": "/werkstatt",
|
|
"installation": "/werkstatt",
|
|
"montage": "/werkstatt",
|
|
"werkzeug": "/werkstatt",
|
|
"trittbrettverbreiterung": "/werkstatt",
|
|
"radnabenmotor": "/werkstatt",
|
|
"propilot": "/werkstatt",
|
|
"effizienz": "/werkstatt",
|
|
"ledersitze": "/werkstatt",
|
|
|
|
# ─── Beratung & Foerderung (Kontakt) ─────────────────────────
|
|
"versicherung": "/kontakt",
|
|
"foerderung": "/kontakt",
|
|
"foerderungen": "/kontakt",
|
|
"praemie": "/kontakt",
|
|
"preisvorteil": "/kontakt",
|
|
"innovationspraemie": "/kontakt",
|
|
"umweltpraemie": "/kontakt",
|
|
"kosten": "/kontakt",
|
|
"vergleich": "/kontakt",
|
|
"vorfuehrer": "/kontakt",
|
|
"preis": "/kontakt",
|
|
"goelectric": "/kontakt",
|
|
|
|
# ─── Fuehrerschein & Legales (Werkstatt-Section "Recht") ─────
|
|
"abe": "/werkstatt",
|
|
"ekfv": "/werkstatt",
|
|
"klassea1": "/werkstatt",
|
|
"klasseb": "/werkstatt",
|
|
"schluesselzahl196": "/werkstatt",
|
|
"fuehrerschein": "/werkstatt",
|
|
"strassenzulassung": "/werkstatt",
|
|
"verkehrsregeln": "/werkstatt",
|
|
"verordnung": "/werkstatt",
|
|
"legal": "/werkstatt",
|
|
"ekickroller": "/werkstatt",
|
|
"ekfv": "/werkstatt",
|
|
|
|
# ─── Vermietung-Themen ───────────────────────────────────────
|
|
"touristik": "/vermietung",
|
|
"freizeit": "/vermietung",
|
|
|
|
# ─── Marken-Indikatoren (linken auf /marken) ─────────────────
|
|
"marknunabhaengig": "/marken",
|
|
}
|
|
|
|
|
|
def search_static_pages(query: str, limit: int = 5) -> list[dict]:
|
|
"""Search static pages by title, slug, keywords, snippet substring.
|
|
|
|
Returns dicts in the same shape as content search results.
|
|
"""
|
|
q = (query or "").strip().lower()
|
|
if not q:
|
|
return []
|
|
results = []
|
|
for page in STATIC_PAGES:
|
|
score = 0.0
|
|
# Exact title match -> very high
|
|
if page.title.lower() == q:
|
|
score = 10.0
|
|
# Title prefix
|
|
elif page.title.lower().startswith(q):
|
|
score = 7.0
|
|
# Slug match (without leading slash)
|
|
elif page.slug.lstrip("/").lower() == q:
|
|
score = 9.0
|
|
# Title substring (only for queries >= 3 chars to avoid noise like 'ze' in 'fahrzeuge')
|
|
elif len(q) >= 3 and q in page.title.lower():
|
|
score = 5.0
|
|
# Keyword exact / prefix
|
|
elif any(kw == q for kw in page.keywords):
|
|
score = 6.0
|
|
elif any(kw.startswith(q) for kw in page.keywords):
|
|
score = 4.0
|
|
# Snippet substring (weakest, also requires >= 3 chars)
|
|
elif len(q) >= 3 and q in page.snippet.lower():
|
|
score = 2.0
|
|
|
|
if score > 0:
|
|
results.append({
|
|
"type": "page",
|
|
"title": page.title,
|
|
"slug": page.slug,
|
|
"snippet": page.snippet,
|
|
"score": score,
|
|
"matched_via": "static_page",
|
|
})
|
|
|
|
results.sort(key=lambda r: -r["score"])
|
|
return results[:limit]
|
|
|
|
|
|
def get_pages_for_tags(tags: Iterable[str]) -> set[str]:
|
|
"""Given an iterable of blog tags, return the set of page slugs they map to."""
|
|
if not tags:
|
|
return set()
|
|
return {TAG_TO_PAGE_BRIDGE[t.lower()] for t in tags if t and t.lower() in TAG_TO_PAGE_BRIDGE}
|
|
|
|
|
|
def page_by_slug(slug: str) -> StaticPage | None:
|
|
for p in STATIC_PAGES:
|
|
if p.slug == slug:
|
|
return p
|
|
return None
|