feat(corpus-viz): 'by the numbers' scrollytelling dataviz (task #64)

Phase-2 infodesign of the tide_eda corpus EDA: a standalone, scroll-driven data essay (corpus.html) on the Ship's Bridge dark instrument language. Six curated stories, vanilla SVG (zero npm deps), measured-width responsive: 1. the slow climb — studio vs club tempo, dual median lines + track cloud + histogram + felt-vs-written 2x inset; studio<->club lens is first-class 2. 2024 breakout — vocab burst + gig cadence on a shared time axis, magenta reserved for the one earned 2024 accent 3. palette — 12 sample families as a proportion bar, fleet colors + glyphs 4. the accent — signature gMask/gMute idioms (f*16 in 62/73) 5. collab fingerprint — per-collab bpm range + distinctive samples (raph fast) 6. set-staples — recurrence, Cafe trilogy highlighted - tide_eda: add stage_tempo_by_year (gig-date x track-tempo) for the club lens - build_corpus.py + 'tide.py corpus': inline-bundle to one self-contained file for deploy to me.nech.pl/parvagues/viz (dist/ gitignored) - IntersectionObserver reveal/lazy-draw, reduced-motion + mobile reflow, load-fail banner; lens auto-disables on single-lens sections (honest) - 59 tests green

feat(corpus-viz): 'by the numbers' scrollytelling dataviz (task #64)
Phase-2 infodesign of the tide_eda corpus EDA: a standalone, scroll-driven data essay (corpus.html) on the Ship's Bridge dark instrument language. Six curated stories, vanilla SVG (zero npm deps), measured-width responsive: 1. the slow climb — studio vs club tempo, dual median lines + track cloud + histogram + felt-vs-written 2x inset; studio<->club lens is first-class 2. 2024 breakout — vocab burst + gig cadence on a shared time axis, magenta reserved for the one earned 2024 accent 3. palette — 12 sample families as a proportion bar, fleet colors + glyphs 4. the accent — signature gMask/gMute idioms (f*16 in 62/73) 5. collab fingerprint — per-collab bpm range + distinctive samples (raph fast) 6. set-staples — recurrence, Cafe trilogy highlighted - tide_eda: add stage_tempo_by_year (gig-date x track-tempo) for the club lens - build_corpus.py + 'tide.py corpus': inline-bundle to one self-contained file for deploy to me.nech.pl/parvagues/viz (dist/ gitignored) - IntersectionObserver reveal/lazy-draw, reduced-motion + mobile reflow, load-fail banner; lens auto-disables on single-lens sections (honest) - 59 tests green
33d49591 · PLN (Algolia) · 830d2dd1 · 33d49591 · 33d49591 · 33d49591
Commit 33d49591 authored Jun 06, 2026 by PLN (Algolia)
6 changed files
--- a/armada/tide-table/.gitignore
+++ b/armada/tide-table/.gitignore
+dist/
+__pycache__/
--- a/armada/tide-table/build_corpus.py
+++ b/armada/tide-table/build_corpus.py
+#!/usr/bin/env python3
+"""Inline-bundle corpus.html → a single portable file for the public deploy.
+The dev/serve version of corpus.html fetches eda_report.json + tokens.json over
+HTTP. For deploy (me.nech.pl/parvagues/viz) we want ONE self-contained file with
+no fetches and no relative-asset fragility, so we embed the two JSONs into the
+empty <script id="eda-data"> / <script id="tok-data"> blocks the page already
+reads first (it only falls back to fetch when those are empty).
+    python3 build_corpus.py            # → dist/viz.html
+    python3 build_corpus.py -o out.html
+parsers-over-copy: the source of truth stays corpus.html + the generated JSONs;
+this is a pure mechanical bundle step, re-runnable after any `tide_eda.py` run.
+"""
+import json
+import sys
+from pathlib import Path
+HERE = Path(__file__).resolve().parent
+def _embed(html: str, script_id: str, payload: str) -> str:
+    """Replace the (empty) <script id=...></script> body with JSON payload."""
+    open_tag = f'<script id="{script_id}" type="application/json">'
+    i = html.find(open_tag)
+    if i < 0:
+        raise SystemExit(f"corpus.html missing <script id={script_id!r}> block")
+    j = html.find("</script>", i)
+    # </ inside JSON would break the parser; escape the only dangerous sequence.
+    safe = payload.replace("</", "<\\/")
+    return html[: i + len(open_tag)] + safe + html[j:]
+def build(out: Path) -> Path:
+    html = (HERE / "corpus.html").read_text()
+    eda = (HERE / "eda_report.json").read_text()
+    tok_path = HERE / "tokens.json"
+    tok = tok_path.read_text() if tok_path.exists() else "null"
+    # minify the JSON a touch (re-dump without the indent)
+    eda = json.dumps(json.loads(eda), ensure_ascii=False, separators=(",", ":"))
+    tok = json.dumps(json.loads(tok), ensure_ascii=False, separators=(",", ":"))
+    html = _embed(html, "eda-data", eda)
+    html = _embed(html, "tok-data", tok)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text(html)
+    return out
+def main():
+    out = HERE / "dist" / "viz.html"
+    args = sys.argv[1:]
+    if args and args[0] in ("-o", "--out"):
+        out = Path(args[1])
+    p = build(out)
+    kb = p.stat().st_size / 1024
+    print(f"⛵ bundled → {p}  ({kb:.0f} KB, self-contained, no fetch)")
+    print(f"   deploy: copy to me.nech.pl/parvagues/viz/index.html")
+if __name__ == "__main__":
+    main()
--- a/armada/tide-table/corpus.html
+++ b/armada/tide-table/corpus.html
--- a/armada/tide-table/eda_report.json
+++ b/armada/tide-table/eda_report.json
@@ -475,6 +475,13 @@
   "2025": 122,
   "2026": 126
  },
+  "stage_tempo_by_year": {
+   "2022": 115,
+   "2023": 112,
+   "2024": 124,
+   "2025": 120,
+   "2026": 124
+  },
  "by_creation": [
   {
    "name": "CBOW",
@@ -945,12 +952,12 @@
   "bpm_max": 142.0,
   "distinctive_samples": [
    {
-     "sound": "moogBass",
+     "sound": "risers",
     "lift": 4.6,
     "n": 2
    },
    {
-     "sound": "risers",
+     "sound": "moogBass",
     "lift": 4.6,
     "n": 2
    },

--- a/armada/tide-table/tide.py
+++ b/armada/tide-table/tide.py
@@ -109,20 +109,28 @@ def cmd_serve(args):
    if args and args[0].isdigit():
        port = int(args[0])
    serve_py = HERE.parent / "serve.py"        # armada/serve.py (Range-capable)
-    url = f"http://127.0.0.1:{port}/triangle.html"
+    base = f"http://127.0.0.1:{port}"
-    print(f"⛵ tide serve — open the triangle at:\n    {url}\n   (Ctrl-C to stop)\n")
+    print(f"⛵ tide serve — open:\n    {base}/triangle.html   (catalog)"
+          f"\n    {base}/corpus.html     (by-the-numbers dataviz)\n   (Ctrl-C to stop)\n")
    try:
        subprocess.run([sys.executable, str(serve_py), "--dir", str(HERE), "--port", str(port)])
    except KeyboardInterrupt:
        print("\n✓ stopped")
+def cmd_corpus(args):
+    """Bundle the by-the-numbers dataviz into one self-contained file for deploy."""
+    r = subprocess.run([sys.executable, str(HERE / "build_corpus.py"), *args])
+    sys.exit(r.returncode)
 def cmd_list():
    print("⛵ tide pipeline (dependency order):\n")
    for name, desc, _ in STEPS:
        print(f"  {name:<22} {desc}")
    print("\n  test                   run the pytest suite")
-    print("  serve [port]           serve the triangle viz (default :8731)")
+    print("  serve [port]           serve the triangle + corpus viz (default :8731)")
+    print("  corpus [-o out.html]   bundle corpus.html → self-contained deploy file")
 def main():
@@ -134,6 +142,8 @@ def main():
        cmd_test()
    elif cmd == "serve":
        cmd_serve(args[1:])
+    elif cmd == "corpus":
+        cmd_corpus(args[1:])
    elif cmd in ("list", "ls"):
        cmd_list()
    else:

--- a/armada/tide-table/tide_eda.py
+++ b/armada/tide-table/tide_eda.py
@@ -264,6 +264,21 @@ def build():
        creation_year_bpm[x["created"][:4]].append(x["bpm"])
    creation_tempo = {y: round(st.median(v)) for y, v in sorted(creation_year_bpm.items())}
+    # stage-BPM story: tempo of what was actually PERFORMED each year (club lens).
+    # Attribute a track's score-tempo to every gig-year it appears in (one vote per
+    # performance), then take the year median. The "in the club" counterpart to the
+    # studio (creation) line above — same tracks, weighted by when they hit a stage.
+    stage_year_bpm = defaultdict(list)
+    for t in T:
+        tp = tempo.get(t["track"])
+        if not tp:
+            continue
+        for g in t.get("gigs", []):
+            gdate = gigs.get(g)
+            if gdate:
+                stage_year_bpm[gdate[:4]].append(tp["bpm"])
+    stage_tempo = {y: round(st.median(v)) for y, v in sorted(stage_year_bpm.items())}
    # palette + families
    snd = Counter()
    for t in T:
@@ -308,6 +323,7 @@ def build():
            "histogram": dict(sorted(Counter(int(b // 10) * 10 for b in bpms).items())),
            "ac_delta": sorted(ac_delta, key=lambda x: -abs(x["delta"])),
            "creation_tempo_by_year": creation_tempo,
+            "stage_tempo_by_year": stage_tempo,
            "by_creation": by_creation,
        },
        "collab_fingerprint": collab_fingerprint(T, tempo),