Commit 9731a386 by PLN (Algolia)

feat(build_eda): per-take EDA emitter + TakeEDA/OrbitEDA/EDAStatus models

Measures level/register/time-activity per orbit from local Ardour stems → eda_{take}.json
(silent/empty flags); ledger scans interchange → eda_status.json. Finding: 61 takes
locally reachable (not the 9 assumed) → most of the catalog groundable offline.
parent 3ce8bba8
#!/usr/bin/env python3
"""build_eda — per-take EDA emitter (corner-B grounding, the mastering first step).
Measures each orbit's LEVEL / REGISTER / TIME-ACTIVITY from the LOCAL Ardour
interchange stems (no freebox) → eda_{take}.json, validated against models.TakeEDA.
Never infers role from a name; flags silent orbits and empty takes (stem files
present ≠ real audio). eda_index() auto-discovers the emitted files, so the
triangle's corner-B coverage climbs with no wiring (feedback_mastering_eda).
python3 build_eda.py ledger # Stage 0: reachability → eda_status.json
python3 build_eda.py run Take70 # Stage 1: emit eda_Take70.json
python3 build_eda.py all [--limit N] [--force] # emit for every local take
Each orbit stem is decoded ONCE: a coarse 2 s-bin envelope gives activity + level
over the whole take, and the loudest ~20 s window is profiled (centroid/bands) — so
a 40-min SET take costs one decode per orbit, not a full-length FFT.
"""
import json
import sys
from datetime import date
from pathlib import Path
import numpy as np
import audio_lens as AL
import models as M
HERE = Path(__file__).resolve().parent
LIT_FLOOR = -52.0 # dB; an orbit bin above this is "lit" (Judge UI convention)
SILENT_PEAK = -55.0 # an orbit whose loudest window peaks below this is silent
WIN_S = 20.0 # spectral-profile window around the loudest bin
def take_orbits(take):
"""Orbits (dN) with a present interchange stem for `take`, ascending."""
return [o for o in range(1, 13) if AL.orbit_files(take, o)[0] is not None]
def local_takes():
"""{take: [orbits]} for every take with stems in the interchange (one scan)."""
out = {}
for f in AL.IX.glob("*_Tidal *-1%L.wav"):
name = f.name
i = name.find("_Tidal ")
take = name[:i]
out.setdefault(take, set()).add(int(name[i + 7:name.index("-1%L.wav")]))
return {t: sorted(v) for t, v in out.items()}
def _orbit_eda(take, o):
"""Decode orbit `o` once → OrbitEDA (activity + loudest-window spectrum)."""
L, R = AL.orbit_files(take, o)
sig = AL.decode(L)
if R is not None:
r = AL.decode(R)
n = min(len(sig), len(r))
sig = (sig[:n] + r[:n]) / 2
dur = len(sig) / AL.SR
# coarse 2 s-bin envelope (activity + overall level), cheap
per = int(2.0 * AL.SR)
nb = max(1, len(sig) // per)
env = np.array([20 * np.log10(np.sqrt(np.mean(sig[b * per:(b + 1) * per] ** 2)) + 1e-9)
for b in range(nb)])
active_pct = 100.0 * float(np.mean(env > LIT_FLOOR))
peak_db = 20 * np.log10(np.abs(sig).max() + 1e-9)
# profile the loudest WIN_S window (full-SR spectrum, but bounded length)
pk = int(np.argmax(env)) * per
half = int(WIN_S * AL.SR / 2)
prof = AL.profile(sig[max(0, pk - half): pk + half])
fam, cen = AL.classify_family(AL.ROLE.get(o), prof)
return M.OrbitEDA(
orbit=o, role=AL.ROLE.get(o), active_pct=round(active_pct, 1),
peak_db=round(float(peak_db), 1),
rms_db=round(prof["rms_db"], 1) if prof else -120.0,
centroid=round(cen, 1) if cen is not None else None,
family=fam,
bands={k: round(v, 1) for k, v in prof["bands"].items()} if prof else {},
silent=peak_db < SILENT_PEAK,
), dur
def build_take(take):
orbits = take_orbits(take)
if not orbits:
return None
oedas, dur = [], 0.0
for o in orbits:
oe, d = _orbit_eda(take, o)
oedas.append(oe)
dur = max(dur, d)
n_active = sum(1 for x in oedas if not x.silent)
peak = max((x.peak_db for x in oedas), default=-120.0)
return M.TakeEDA(
take=take, dur_s=round(dur, 1), n_orbits=len(oedas), n_active=n_active,
peak_db=round(peak, 1), empty=(n_active == 0), orbits=oedas,
provenance=M.Provenance(source=M.Source.derived,
locator="build_eda: local Ardour interchange stems",
as_of=date.today()),
)
def out_path(take):
return HERE / f"eda_{take}.json"
def cmd_run(take, force=True):
if not force and out_path(take).exists():
print(f" {take}: already grounded (use --force to redo)")
return
eda = build_take(take)
if eda is None:
print(f" {take}: no local stems")
return
out_path(take).write_text(eda.model_dump_json(indent=1, by_alias=True))
tag = " ⚠ EMPTY" if eda.empty else ""
print(f"✓ eda_{take}.json — {eda.dur_s:.0f}s, {eda.n_active}/{eda.n_orbits} active, "
f"peak {eda.peak_db:.1f}dB{tag}")
def cmd_all(limit=None, force=False):
takes = sorted(local_takes())
if limit:
takes = takes[:limit]
print(f"⛵ build_eda over {len(takes)} local takes (force={force})\n")
for t in takes:
cmd_run(t, force=force)
def cmd_ledger():
"""Stage 0: classify reachability per take, count grounded, emit eda_status.json."""
loc = local_takes()
rows = []
for t, orbits in sorted(loc.items()):
rows.append({"take": t, "reach": "local-stems", "n_orbits": len(orbits),
"grounded": out_path(t).exists()})
grounded = sum(1 for r in rows if r["grounded"])
led = M.EDAStatus(as_of=date.today().isoformat(), n_takes=len(rows),
n_local=len(rows), n_grounded=grounded, takes=rows)
(HERE / "eda_status.json").write_text(led.model_dump_json(indent=1, by_alias=True))
print(f"✓ eda_status.json — {led.n_takes} takes, {led.n_local} local-reachable, "
f"{led.n_grounded} already grounded ({led.n_takes - led.n_grounded} to go)")
def main():
args = sys.argv[1:]
cmd = args[0] if args else "ledger"
if cmd == "ledger":
cmd_ledger()
elif cmd == "run" and len(args) > 1:
cmd_run(args[1])
elif cmd == "all":
cmd_all(limit=int(args[args.index("--limit") + 1]) if "--limit" in args else None,
force="--force" in args)
else:
sys.exit("usage: build_eda.py [ledger | run <Take> | all [--limit N] [--force]]")
if __name__ == "__main__":
main()
{
"schema": "EDA reachability ledger",
"as_of": "2026-06-07",
"n_takes": 61,
"n_local": 61,
"n_grounded": 0,
"takes": [
{
"take": "Take14",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take15",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take16",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take17",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take18",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take19",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take20",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take21",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take22",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take24",
"reach": "local-stems",
"n_orbits": 10,
"grounded": false
},
{
"take": "Take28",
"reach": "local-stems",
"n_orbits": 8,
"grounded": false
},
{
"take": "Take29",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take3",
"reach": "local-stems",
"n_orbits": 11,
"grounded": false
},
{
"take": "Take30",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take32",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take33",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take34",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take35",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take36",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take37",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take38",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take39",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take4",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take40",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take42",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take45",
"reach": "local-stems",
"n_orbits": 11,
"grounded": false
},
{
"take": "Take49",
"reach": "local-stems",
"n_orbits": 11,
"grounded": false
},
{
"take": "Take5",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take53",
"reach": "local-stems",
"n_orbits": 11,
"grounded": false
},
{
"take": "Take54",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take55",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take58",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take59",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take61",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take62",
"reach": "local-stems",
"n_orbits": 11,
"grounded": false
},
{
"take": "Take63",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take64",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take65",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take66",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take67",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take68",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take70",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take71",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take72",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take73",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take74",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take75",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take76",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take77",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take79",
"reach": "local-stems",
"n_orbits": 10,
"grounded": false
},
{
"take": "Take8",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take80",
"reach": "local-stems",
"n_orbits": 1,
"grounded": false
},
{
"take": "Take82",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take83",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take84",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take85",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take86",
"reach": "local-stems",
"n_orbits": 8,
"grounded": false
},
{
"take": "Take87",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take88",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take89",
"reach": "local-stems",
"n_orbits": 12,
"grounded": false
},
{
"take": "Take90",
"reach": "local-stems",
"n_orbits": 9,
"grounded": false
}
]
}
\ No newline at end of file
......@@ -696,3 +696,51 @@ class Presence(BaseModel):
as_of: str
profiles: list[PresenceLink] = Field(default_factory=list) # artist-level identities
releases: list[Release] = Field(default_factory=list)
# ── per-take EDA (corner-B grounding; build_eda.py emits eda_{take}.json) ──────
# The "required first step of mastering": measure each orbit's LEVEL, REGISTER and
# TIME-ACTIVITY from the LOCAL Ardour stems — never infer a sound's role from its
# name (feedback_mastering_eda: meth_bass is a wobble, but its register is decided
# by measured centroid, not the word "bass"). Stem files present ≠ real audio, so the
# EDA flags silent orbits / empty takes (the Take63/64 'export but no audio' case).
# eda_index() auto-discovers eda_*.json → triangle corner-B coverage climbs. Each
# file validated against TakeEDA on emit (parsers-over-copy, [[feedback_parsers_over_copy]]).
class OrbitEDA(BaseModel):
orbit: int # == dN
role: Optional[str] = None # generic ROLE label (fallback, NOT authoritative)
active_pct: float = 0.0 # % of take duration above the lit floor (−52 dB)
peak_db: float = -120.0
rms_db: float = -120.0
centroid: Optional[float] = None # Hz of the loudest window (None if silent)
family: Optional[str] = None # audio_lens.classify_family (measurement-aware)
bands: dict[str, float] = Field(default_factory=dict) # band-energy % of the window
silent: bool = False # near-silent orbit (empty export / unused)
class TakeEDA(BaseModel):
"""Audio-grounded EDA for one take. The corner-B truth audio_lens measures."""
model_config = ConfigDict(populate_by_name=True)
schema_: str = Field("per-take EDA", alias="schema")
take: str
gig: Optional[str] = None # site slug (a pointer, never an audio fact)
dur_s: float = 0.0
n_orbits: int = 0 # orbits with a stem file present
n_active: int = 0 # orbits that are not silent
peak_db: float = -120.0 # loudest orbit (take-level proxy)
empty: bool = False # whole take near-silent (sketch / empty export)
orbits: list[OrbitEDA] = Field(default_factory=list)
provenance: Optional[Provenance] = None
class EDAStatus(BaseModel):
"""Stage-0 reachability ledger: per take, can we EDA it and from what?
reach ∈ {local-stems | local-proxy | freebox-stems | none}. Drives the fetch
manifest (seek-MINIMIZING) and shows what coverage is one `build_eda all` away."""
model_config = ConfigDict(populate_by_name=True)
schema_: str = Field("EDA reachability ledger", alias="schema")
as_of: str
n_takes: int = 0
n_local: int = 0 # takes reachable from local stems (no fetch)
n_grounded: int = 0 # takes with an eda_*.json already emitted
takes: list[dict] = Field(default_factory=list) # {take, reach, n_orbits, grounded}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment