feat(backlog_setlists v2): explicit ends, label-strip, leet fallback + tests

- ANCHORS support (slug, end_line) to bound bleeders (mephisteuf/opal-2025/ 38c3-toilet) into clean blocks — no more swallowing neighbour setlists. - clean_track_line: strip leading set-position labels (Intro:/Finale:/Sunset), keep 'Label: Name' / drop 'Name: gloss'; recovers the LIVE-Noctambule pattern. - leet_fold fallback (B00K→book, T01l3ts→toilets, G00D→good) — resolves only after a straight miss so genuine digits (sept1) still match first. - 9 gap gigs recovered (opal-2024 14/14, algolia-fdlm 26/26, 39c3 13/14…). - 6 mechanical tests (leet, label, gloss, bpm/transition, ascii/comment skip, real-backlog coverage guard). 59 pass.

feat(backlog_setlists v2): explicit ends, label-strip, leet fallback + tests
- ANCHORS support (slug, end_line) to bound bleeders (mephisteuf/opal-2025/ 38c3-toilet) into clean blocks — no more swallowing neighbour setlists. - clean_track_line: strip leading set-position labels (Intro:/Finale:/Sunset), keep 'Label: Name' / drop 'Name: gloss'; recovers the LIVE-Noctambule pattern. - leet_fold fallback (B00K→book, T01l3ts→toilets, G00D→good) — resolves only after a straight miss so genuine digits (sept1) still match first. - 9 gap gigs recovered (opal-2024 14/14, algolia-fdlm 26/26, 39c3 13/14…). - 6 mechanical tests (leet, label, gloss, bpm/transition, ascii/comment skip, real-backlog coverage guard). 59 pass.
9a428b6e · PLN (Algolia) · 21d003dd · 9a428b6e · 9a428b6e · 9a428b6e
Commit 9a428b6e authored Jun 06, 2026 by PLN (Algolia)
Showing with 93 additions and 13 deletions

backlog_setlists.json armada/tide-table/backlog_setlists.json +0 -0

backlog_setlists.py armada/tide-table/backlog_setlists.py +44 -13

test_backlog_setlists.py armada/tide-table/tests/test_backlog_setlists.py +49 -0

No files found.
--- a/armada/tide-table/backlog_setlists.json
+++ b/armada/tide-table/backlog_setlists.json
--- a/armada/tide-table/backlog_setlists.py
+++ b/armada/tide-table/backlog_setlists.py
@@ -30,22 +30,26 @@ AS_OF = "2026-06-06"
 # ── Authored gig anchors: (1-based line of the header in backlog.md) → site slug ──
 # `confirm:`-prefixed slugs are NOT yet trusted (ambiguous header / needs PLN's ear);
 # they are parsed and reported but withheld from the emitted artifact.
+# value = slug  OR  (slug, end_line) to bound a block that would otherwise bleed
+# into the next densely-packed setlist (the backlog has no blank-gap between them).
 ANCHORS = {
    1441: "2025/ensad",                    # "# Set @ENSAD 2025"
    1816: "2026/le-vortex",                # "# VORTEX26"
    1031: "2024/opal-festival-2024",       # "## Opal <3🦉" / OPAL 2024
-    1632: "2025/opal-festival-2025",       # "# OPAL 2025"
+    1632: ("2025/opal-festival-2025", 1667),  # "# OPAL 2025" (bleeds into Latin Heritage)
-    710:  "2023/mephisteuf",               # "## Live @MephisTeuf"
+    710:  ("2023/mephisteuf", 735),        # "## Live @MephisTeuf" (commented block = cut)
    1194: "2024/38c3-house-of-tea",        # "#### Day 2 - HOUSE OF Tea" (under 38C3)
-    1242: "2024/38c3-toilet",              # "#### Day 3 - Hardcore in the Toilet Club"
+    1242: ("2024/38c3-toilet", 1253),      # cross-check only (already has tracks.json)
    1786: "2025/39c3-house-of-tea",        # "# CCC 39c3 ALGORAVE ... Pour Un Thé Dansant"
    1169: "2024/toplap-solstice-2024",     # "### TopLap Solstice Stream 2024"
    988:  "confirm:2024/algolia-fdlm",     # "## Algolia FDLM2024" — already has tracks.json
    1550: "confirm:2025/algorave-lyon",    # "GZ 2025 LYON ALGORAVE" then "# LABENNE LIVE" (?)
    1831: "confirm:2026/ete-surprise",     # "## SURPRISE IT'S A DANCEFLOOR" — slug guess
 }
-# Gigs still unanchored (no clear backlog block found): 2022/algolia-fdlm-2022,
+# Still unanchored (gaps for #66, need PLN's ear / SC confirmation): 2022/algolia-fdlm-2022,
-# 2024/38c3-chaos-music-club, 2025/39c3-toilet-rave, 2025/toplap-fromscratch-2025.
+# 2024/38c3-chaos-music-club, 2025/39c3-toilet-rave, 2025/toplap-fromscratch-2025. The
+# backlog also holds many UNMAPPED setlists (Latin Heritage, NON-ANNIVERSAIRE, MICROLIVE,
+# SovieTeuf, CCC Day-1, RAISE afterparty, Garibaldi…) whose slugs need confirming.
 # Lines that are decoration / structure, never tracks.
 _ASCII_ART = re.compile(r"[⠀-⣿▓▒░█▀▄■◆◢◣◤◥╔╗╚╝║═]")
@@ -65,6 +69,15 @@ def norm_name(s: str) -> str:
    return re.sub(r"[^a-z0-9]+", "", deaccent(s).lower())
+_LEET = str.maketrans({"0": "o", "1": "i", "3": "e", "4": "a", "5": "s", "7": "t"})
+def leet_fold(s: str) -> str:
+    """Fold ParVagues' leetspeak (B00K, T01l3ts, G00D, L1VEC0DE) → letters. Used only
+    as a FALLBACK so genuine digits (sept1, 165) still match straight first."""
+    return s.translate(_LEET)
 def build_name_index():
    """norm(name) → .tidal path, from the catalog's existing alias set (DRY)."""
    cv = json.load(open(CATALOG_VIEW))
@@ -102,7 +115,12 @@ def clean_track_line(raw: str):
    body = re.sub(r"\([^)]*\)", "", body)            # (Chloe cover) etc
    body = re.sub(r"\bfeat\b.*$", "", body, flags=re.I)
    body = body.replace("*", "").replace("_", "").replace("`", "")
-    body = re.sub(r":\s*.*$", "", body)              # "Café tiède: note" → drop gloss after colon
+    # strip a leading set-position label ("Intro: X", "Finale: X", "Sunset X")
+    body = _LABEL.sub("", body)
+    # remaining colon: "Label: Name" → keep Name; "Name: gloss" → keep Name
+    if ":" in body:
+        left, right = (p.strip() for p in body.split(":", 1))
+        body = right if right and (right[:1].isupper() and len(right.split()) <= 5) else left
    # remove emoji / symbol chars, keep letters/digits/space/'-/&
    body = "".join(ch for ch in body
                   if ch.isalnum() or ch in " '-/&." or unicodedata.category(ch).startswith("L"))
@@ -118,14 +136,21 @@ def clean_track_line(raw: str):
    return body, bpm, transition, True
-def block_span(lines, start_idx):
+def _slug_end(val):
+    """ANCHORS value → (slug, end_line|None)."""
+    return val if isinstance(val, tuple) else (val, None)
+def block_span(lines, start_idx, end_line=None):
    """Collect a gig block. Headers in backlog.md are DECORATIVE (stylized banners,
    repeated `# PARVAGUES`, sub-labels) not hierarchical, so we don't scope by level.
-    Instead: from the anchor, walk to the next ANCHOR, but terminate early once the
+    From the anchor, walk to the next ANCHOR (or an explicit end_line), terminating
-    setlist proper ends — a run of ≥3 blank lines after we've collected ≥2 items
+    early once the setlist proper ends — a run of ≥3 blank lines after we've collected
-    (journal prose resumes). Decorative headers are kept as section context, not breaks."""
+    ≥2 items (journal prose resumes). Decorative headers are kept as section context."""
    anchor_lines = sorted(k - 1 for k in ANCHORS)
    hard_end = next((a for a in anchor_lines if a > start_idx), len(lines))
+    if end_line is not None:
+        hard_end = min(hard_end, end_line - 1)
    out, items, blanks = [], 0, 0
    for raw in lines[start_idx + 1:hard_end]:
        if not raw.strip():
@@ -142,18 +167,23 @@ def block_span(lines, start_idx):
 # transition tails appended to a track name: "-> 5+7", " 0 - / 11", "- 9"
 _TRANS_TAIL = re.compile(r"\s*(->|[-–])\s*[\d\s./+,]*$|\s+[\d][\d\s./+,>-]*$")
+# leading set-position label to strip ("Intro: ", "Finale ", "Sunset — ", "Day 1 ")
+_LABEL = re.compile(
+    r"^\s*(intro|outro|final[e]?|bonus|rappel|sunset|sunrise|darkness|pause|"
+    r"d[eé]but|fin|end|day\s*\d+)\b\s*[:.\-–]?\s*", re.I)
 def parse(lines, name_idx):
    gigs = {}
-    for ln1, slug in sorted(ANCHORS.items()):
+    for ln1, val in sorted(ANCHORS.items()):
+        slug, end_line = _slug_end(val)
        confirm = slug.startswith("confirm:")
        real_slug = slug.split(":", 1)[1] if confirm else slug
        i = ln1 - 1
        header = lines[i].strip()
        section = None
        tracks, unmatched = [], []
-        for raw in block_span(lines, i):
+        for raw in block_span(lines, i, end_line):
            hm = re.match(r"\s*#{1,6}\s+(.+)$", raw)
            if hm:                                          # subsection label
                section = re.sub(r"[^\w\s'-]", "", deaccent(hm.group(1))).strip() or None
@@ -163,7 +193,8 @@ def parse(lines, name_idx):
                continue
            if _SECTION_HINT.match(name) and norm_name(name) not in name_idx:
                continue
-            track = name_idx.get(norm_name(name))
+            nm = norm_name(name)
+            track = name_idx.get(nm) or name_idx.get(leet_fold(nm))
            entry = {"raw": name, "track": track, "bpm": bpm, "transition": transition, "section": section}
            (tracks if track else unmatched).append(entry)
            if not track:

--- a/armada/tide-table/tests/test_backlog_setlists.py
+++ b/armada/tide-table/tests/test_backlog_setlists.py
+"""Backlog setlist recovery — mechanical guards (parsers-over-copy).
+The parser cleans noisy backlog lines and resolves to canonical .tidal via the
+catalog alias index. Test the load-bearing bits (leet fold, label strip, transition
+tails) on synthetic lines, plus a coverage regression guard on the real backlog."""
+import backlog_setlists as B
+def test_leet_fold():
+    assert B.leet_fold(B.norm_name("Burn this B00K")) == "burnthisbook"
+    assert B.leet_fold(B.norm_name("Ghosts in the T01l3ts")) == "ghostsinthetoilets"
+    assert B.leet_fold(B.norm_name("So G00D")) == "sogood"
+def test_clean_strips_labels_and_keeps_track():
+    # "Intro: X" / "Finale: X" → keep X, not the label
+    name, *_ , ok = B.clean_track_line("- Intro    _🚪 Contre-Visite     👁️_")
+    assert ok and name == "Contre-Visite"
+    name, *_, ok = B.clean_track_line("- Finale: Lady Perplexity          [138bpm]")
+    assert ok and name == "Lady Perplexity"
+def test_clean_drops_gloss_after_colon():
+    # "Name: gloss" (lowercase gloss) → keep Name
+    name, *_, ok = B.clean_track_line("- Café tiède: au plus chaud")
+    assert ok and name.lower().startswith("café tiède")
+def test_clean_captures_bpm_and_transition():
+    name, bpm, trans, ok = B.clean_track_line("- Quand on Décolle               [120] {11+12->11}")
+    assert ok and bpm == 120 and trans == "11+12->11"
+def test_clean_skips_ascii_and_comments():
+    assert B.clean_track_line("⣿-----⣿")[-1] is False
+    assert B.clean_track_line("<!-- - It's about Time -->")[-1] is False
+def test_real_backlog_coverage_does_not_regress():
+    """Guard: the cleanly-recovered gigs keep resolving. If the backlog or parser
+    changes and these drop, that's a regression to look at (not a silent loss)."""
+    out = B.build()
+    by = {**out["gigs"], **out["pending"]}
+    assert out["n_gigs"] >= 8
+    # opal-2024 is a clean, fully-resolvable block — it must stay at parity
+    assert by["2024/opal-festival-2024"]["n_resolved"] >= 13
+    # no recovered gig should be empty
+    for slug, g in out["gigs"].items():
+        assert g["n_tracks"] >= 1, slug