Commit 33d49591 by PLN (Algolia)

feat(corpus-viz): 'by the numbers' scrollytelling dataviz (task #64)

Phase-2 infodesign of the tide_eda corpus EDA: a standalone, scroll-driven
data essay (corpus.html) on the Ship's Bridge dark instrument language.

Six curated stories, vanilla SVG (zero npm deps), measured-width responsive:
1. the slow climb — studio vs club tempo, dual median lines + track cloud +
   histogram + felt-vs-written 2x inset; studio<->club lens is first-class
2. 2024 breakout — vocab burst + gig cadence on a shared time axis, magenta
   reserved for the one earned 2024 accent
3. palette — 12 sample families as a proportion bar, fleet colors + glyphs
4. the accent — signature gMask/gMute idioms (f*16 in 62/73)
5. collab fingerprint — per-collab bpm range + distinctive samples (raph fast)
6. set-staples — recurrence, Cafe trilogy highlighted

- tide_eda: add stage_tempo_by_year (gig-date x track-tempo) for the club lens
- build_corpus.py + 'tide.py corpus': inline-bundle to one self-contained file
  for deploy to me.nech.pl/parvagues/viz (dist/ gitignored)
- IntersectionObserver reveal/lazy-draw, reduced-motion + mobile reflow,
  load-fail banner; lens auto-disables on single-lens sections (honest)
- 59 tests green
parent 830d2dd1
#!/usr/bin/env python3
"""Inline-bundle corpus.html → a single portable file for the public deploy.
The dev/serve version of corpus.html fetches eda_report.json + tokens.json over
HTTP. For deploy (me.nech.pl/parvagues/viz) we want ONE self-contained file with
no fetches and no relative-asset fragility, so we embed the two JSONs into the
empty <script id="eda-data"> / <script id="tok-data"> blocks the page already
reads first (it only falls back to fetch when those are empty).
python3 build_corpus.py # → dist/viz.html
python3 build_corpus.py -o out.html
parsers-over-copy: the source of truth stays corpus.html + the generated JSONs;
this is a pure mechanical bundle step, re-runnable after any `tide_eda.py` run.
"""
import json
import sys
from pathlib import Path
HERE = Path(__file__).resolve().parent
def _embed(html: str, script_id: str, payload: str) -> str:
"""Replace the (empty) <script id=...></script> body with JSON payload."""
open_tag = f'<script id="{script_id}" type="application/json">'
i = html.find(open_tag)
if i < 0:
raise SystemExit(f"corpus.html missing <script id={script_id!r}> block")
j = html.find("</script>", i)
# </ inside JSON would break the parser; escape the only dangerous sequence.
safe = payload.replace("</", "<\\/")
return html[: i + len(open_tag)] + safe + html[j:]
def build(out: Path) -> Path:
html = (HERE / "corpus.html").read_text()
eda = (HERE / "eda_report.json").read_text()
tok_path = HERE / "tokens.json"
tok = tok_path.read_text() if tok_path.exists() else "null"
# minify the JSON a touch (re-dump without the indent)
eda = json.dumps(json.loads(eda), ensure_ascii=False, separators=(",", ":"))
tok = json.dumps(json.loads(tok), ensure_ascii=False, separators=(",", ":"))
html = _embed(html, "eda-data", eda)
html = _embed(html, "tok-data", tok)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(html)
return out
def main():
out = HERE / "dist" / "viz.html"
args = sys.argv[1:]
if args and args[0] in ("-o", "--out"):
out = Path(args[1])
p = build(out)
kb = p.stat().st_size / 1024
print(f"⛵ bundled → {p} ({kb:.0f} KB, self-contained, no fetch)")
print(f" deploy: copy to me.nech.pl/parvagues/viz/index.html")
if __name__ == "__main__":
main()
...@@ -475,6 +475,13 @@ ...@@ -475,6 +475,13 @@
"2025": 122, "2025": 122,
"2026": 126 "2026": 126
}, },
"stage_tempo_by_year": {
"2022": 115,
"2023": 112,
"2024": 124,
"2025": 120,
"2026": 124
},
"by_creation": [ "by_creation": [
{ {
"name": "CBOW", "name": "CBOW",
...@@ -945,12 +952,12 @@ ...@@ -945,12 +952,12 @@
"bpm_max": 142.0, "bpm_max": 142.0,
"distinctive_samples": [ "distinctive_samples": [
{ {
"sound": "moogBass", "sound": "risers",
"lift": 4.6, "lift": 4.6,
"n": 2 "n": 2
}, },
{ {
"sound": "risers", "sound": "moogBass",
"lift": 4.6, "lift": 4.6,
"n": 2 "n": 2
}, },
......
...@@ -109,20 +109,28 @@ def cmd_serve(args): ...@@ -109,20 +109,28 @@ def cmd_serve(args):
if args and args[0].isdigit(): if args and args[0].isdigit():
port = int(args[0]) port = int(args[0])
serve_py = HERE.parent / "serve.py" # armada/serve.py (Range-capable) serve_py = HERE.parent / "serve.py" # armada/serve.py (Range-capable)
url = f"http://127.0.0.1:{port}/triangle.html" base = f"http://127.0.0.1:{port}"
print(f"⛵ tide serve — open the triangle at:\n {url}\n (Ctrl-C to stop)\n") print(f"⛵ tide serve — open:\n {base}/triangle.html (catalog)"
f"\n {base}/corpus.html (by-the-numbers dataviz)\n (Ctrl-C to stop)\n")
try: try:
subprocess.run([sys.executable, str(serve_py), "--dir", str(HERE), "--port", str(port)]) subprocess.run([sys.executable, str(serve_py), "--dir", str(HERE), "--port", str(port)])
except KeyboardInterrupt: except KeyboardInterrupt:
print("\n✓ stopped") print("\n✓ stopped")
def cmd_corpus(args):
"""Bundle the by-the-numbers dataviz into one self-contained file for deploy."""
r = subprocess.run([sys.executable, str(HERE / "build_corpus.py"), *args])
sys.exit(r.returncode)
def cmd_list(): def cmd_list():
print("⛵ tide pipeline (dependency order):\n") print("⛵ tide pipeline (dependency order):\n")
for name, desc, _ in STEPS: for name, desc, _ in STEPS:
print(f" {name:<22} {desc}") print(f" {name:<22} {desc}")
print("\n test run the pytest suite") print("\n test run the pytest suite")
print(" serve [port] serve the triangle viz (default :8731)") print(" serve [port] serve the triangle + corpus viz (default :8731)")
print(" corpus [-o out.html] bundle corpus.html → self-contained deploy file")
def main(): def main():
...@@ -134,6 +142,8 @@ def main(): ...@@ -134,6 +142,8 @@ def main():
cmd_test() cmd_test()
elif cmd == "serve": elif cmd == "serve":
cmd_serve(args[1:]) cmd_serve(args[1:])
elif cmd == "corpus":
cmd_corpus(args[1:])
elif cmd in ("list", "ls"): elif cmd in ("list", "ls"):
cmd_list() cmd_list()
else: else:
......
...@@ -264,6 +264,21 @@ def build(): ...@@ -264,6 +264,21 @@ def build():
creation_year_bpm[x["created"][:4]].append(x["bpm"]) creation_year_bpm[x["created"][:4]].append(x["bpm"])
creation_tempo = {y: round(st.median(v)) for y, v in sorted(creation_year_bpm.items())} creation_tempo = {y: round(st.median(v)) for y, v in sorted(creation_year_bpm.items())}
# stage-BPM story: tempo of what was actually PERFORMED each year (club lens).
# Attribute a track's score-tempo to every gig-year it appears in (one vote per
# performance), then take the year median. The "in the club" counterpart to the
# studio (creation) line above — same tracks, weighted by when they hit a stage.
stage_year_bpm = defaultdict(list)
for t in T:
tp = tempo.get(t["track"])
if not tp:
continue
for g in t.get("gigs", []):
gdate = gigs.get(g)
if gdate:
stage_year_bpm[gdate[:4]].append(tp["bpm"])
stage_tempo = {y: round(st.median(v)) for y, v in sorted(stage_year_bpm.items())}
# palette + families # palette + families
snd = Counter() snd = Counter()
for t in T: for t in T:
...@@ -308,6 +323,7 @@ def build(): ...@@ -308,6 +323,7 @@ def build():
"histogram": dict(sorted(Counter(int(b // 10) * 10 for b in bpms).items())), "histogram": dict(sorted(Counter(int(b // 10) * 10 for b in bpms).items())),
"ac_delta": sorted(ac_delta, key=lambda x: -abs(x["delta"])), "ac_delta": sorted(ac_delta, key=lambda x: -abs(x["delta"])),
"creation_tempo_by_year": creation_tempo, "creation_tempo_by_year": creation_tempo,
"stage_tempo_by_year": stage_tempo,
"by_creation": by_creation, "by_creation": by_creation,
}, },
"collab_fingerprint": collab_fingerprint(T, tempo), "collab_fingerprint": collab_fingerprint(T, tempo),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment