Commit 78f564ff by PLN (Algolia)

feat(audio_lens): reusable spectral "ears" + self-verifying stem mixer

audio_lens.py consolidates the spectral toolbox (profile / stemmap / mix) so the
machine catches OBJECTIVE audio defects autonomously, reserving PLN's ears for
subjective feel.

- profile(file): band-energy % + centroid → is_broadband() gate (centroid >140Hz
  and >18% energy above 150Hz). A pure-kick mix is centroid ~111Hz, mid<12% → FAIL.
- mix(take, --orbits): EXPLICIT numpy sum of local interchange stems (no amix black
  box, no freebox), peak-normalized, then SELF-VERIFIES the render is broadband and
  exits non-zero on FAIL. Caught + fixed the kick-clone bug that shipped twice.
- stemmap(take): per-orbit activity (driving/sparse/silent) for arrangement reads.

Take35 mix rebuilt via the tool (v3): cen 168Hz, mid present, PASS — replaces the
two broken kick-clone mixes (deleted). Lesson: measure your own output; extend the
toolbox, no /tmp throwaways.

EDL: seed_edl_take89.py + master_edl_take89.json (9 typed edits, 3 bad_cut labels)
from PLN's WH-1000XM5 review, logged in performance_notes.md. tasks/011 captures the
TF-IDF + verify-your-output story. #25 reframed: auto-detect boundary sample-leak.
parent cb010b96
#!/usr/bin/env python3
"""audio_lens — reusable spectral "ears" for the salvage toolbox.
Claude can't hear, so this is how it listens: band-energy profiling, per-orbit
activity maps, and — crucially — **self-verification of rendered audio** so a mix
is never served without checking it actually contains what it should. Born from a
bug where a hand-rolled `amix` silently produced a pure-kick "mix" that got served
twice before PLN's ears caught it. Lesson encoded: measure your own output.
Reads the LOCAL Ardour interchange (no freebox). Needs numpy:
PY=/home/pln/Work/Sound/tidal-ears/.venv312/bin/python
$PY audio_lens.py profile FILE [--window 110:130] # what's in this file
$PY audio_lens.py stemmap Take35 # per-orbit activity grid
$PY audio_lens.py mix Take35 --orbits 1,2,3,4,5,8,9 --out mix.flac # sum+verify
Library: profile(), load_window(), orbit_envelope(), mix_take() — reuse in the EDL
renderer (#36), Judge mix path (#33), and locate-matrix L3 fingerprint (#34).
"""
from __future__ import annotations
import argparse, subprocess, sys
from pathlib import Path
import numpy as np
IX = Path("/home/pln/Work/Sound/Ardour/Tidal Multi/interchange/Tidal Multi/audiofiles")
SR = 44100
ROLE = {1:"kick",2:"snare",3:"hats",4:"acid/bass",5:"cpluck",6:"meth_bass",
7:"d7",8:"break",9:"moog sub",10:"d10",11:"atmos",12:"d12"}
# bands (Hz) — kick/sub live <150; cpluck/acid-mid/break live 150-2k; air >2k
BANDS = [("<80",0,80), ("80-150",80,150), ("150-500",150,500),
("500-2k",500,2000), ("2k+",2000,SR/2)]
# ── decode helpers (ffmpeg → numpy, no soundfile dep) ─────────────────────────
def decode(path, ac=1, ss=None, t=None):
args = ["ffmpeg","-v","error"]
if ss is not None: args += ["-ss",str(ss)]
if t is not None: args += ["-t",str(t)]
args += ["-i",str(path),"-ac",str(ac),"-ar",str(SR),"-f","f32le","-"]
raw = subprocess.run(args, capture_output=True).stdout
return np.frombuffer(raw, dtype=np.float32).astype(np.float64)
def orbit_files(take, o):
L = IX / f"{take}_Tidal {o}-1%L.wav"
R = IX / f"{take}_Tidal {o}-1%R.wav"
return (L if L.exists() else None), (R if R.exists() else None)
# ── the lens: spectral profile ────────────────────────────────────────────────
def profile(sig):
"""Band-energy %, spectral centroid, rms — the core 'what is this sound'."""
if sig.size < SR // 2:
return None
w = np.hanning(len(sig))
S = np.abs(np.fft.rfft(sig * w)) ** 2
f = np.fft.rfftfreq(len(sig), 1 / SR)
tot = S.sum() + 1e-20
bands = {name: 100 * S[(f >= lo) & (f < hi)].sum() / tot for name, lo, hi in BANDS}
return {"rms_db": 20*np.log10(np.sqrt(np.mean(sig**2))+1e-9),
"centroid": float((f*S).sum()/tot), "bands": bands}
def is_broadband(p):
"""A real musical mix has mid+high energy and a centroid above pure-sub.
Pure kick/sub ≈ centroid <130Hz and <20% above 150Hz → FAIL."""
if not p: return False
above150 = p["bands"]["150-500"] + p["bands"]["500-2k"] + p["bands"]["2k+"]
return above150 > 18 and p["centroid"] > 140
def load_window(path, t0, t1, ac=1):
return decode(path, ac=ac, ss=t0, t=t1 - t0)
# ── per-orbit activity envelope (for stem maps + fingerprints) ────────────────
def orbit_envelope(take, o, bin_s=2.0):
L, R = orbit_files(take, o)
if L is None: return None
sig = decode(L)
if R is not None:
r = decode(R); n = min(len(sig), len(r)); sig = (sig[:n] + r[:n]) / 2
per = int(bin_s * SR); n = len(sig) // per
db = []
for b in range(n):
seg = sig[b*per:(b+1)*per]
rms = float(np.sqrt(np.mean(seg**2))) if seg.size else 0.0
db.append(20*np.log10(rms) if rms > 1e-7 else -120.0)
return np.array(db)
# ── transparent mix + SELF-VERIFY ─────────────────────────────────────────────
def mix_take(take, orbits, out, target_tp=-1.0, window=(110, 130)):
"""Explicit numpy sum of orbit stems → peak-normalized stereo FLAC.
Returns (path, render_profile, ok). NEVER uses amix; verifies its own output."""
accL = accR = None
for o in orbits:
L, R = orbit_files(take, o)
if L is None:
print(f" ! orbit-{o:02d} missing, skipped", file=sys.stderr); continue
l = decode(L); r = decode(R) if R else l
n = min(len(l), len(r))
l, r = l[:n], r[:n]
if accL is None:
accL, accR = np.zeros(n), np.zeros(n)
m = min(len(accL), n)
accL, accR = accL[:m] + l[:m], accR[:m] + r[:m]
print(f" + orbit-{o:02d} {ROLE.get(o,''):<10} peak {20*np.log10(np.abs(l).max()+1e-9):.1f}dB")
if accL is None:
raise SystemExit("no orbits loaded")
g = 10**(target_tp/20) / (max(np.abs(accL).max(), np.abs(accR).max()) + 1e-12)
accL *= g; accR *= g
inter = np.empty(len(accL)*2, dtype=np.float32)
inter[0::2], inter[1::2] = accL, accR
subprocess.run(["ffmpeg","-y","-v","error","-f","f32le","-ar",str(SR),"-ac","2",
"-i","pipe:0", str(out)], input=inter.tobytes(), check=True)
seg = (accL[int(window[0]*SR):int(window[1]*SR)]
+ accR[int(window[0]*SR):int(window[1]*SR)]) / 2
p = profile(seg); ok = is_broadband(p)
return out, p, ok
# ── CLI ───────────────────────────────────────────────────────────────────────
def _fmt(p):
b = p["bands"]
return (f"cen={p['centroid']:.0f}Hz rms={p['rms_db']:.1f}dB | " +
" ".join(f"{k}={b[k]:.0f}" for k, _, _ in BANDS))
def cmd_profile(a):
t0, t1 = (map(float, a.window.split(":")) if a.window else (0, 20))
p = profile(load_window(a.file, t0, t1))
print(f"{Path(a.file).name} [{t0:.0f}-{t1:.0f}s]: {_fmt(p)}")
print(f" broadband? {'✓ yes' if is_broadband(p) else '✗ NO — kick/sub-only'}")
def cmd_stemmap(a):
envs = {o: e for o in range(1, 13)
if (e := orbit_envelope(a.take, o)) is not None}
n = min(len(e) for e in envs.values())
print(f"\n{a.take}: {n} bins×2s, {len(envs)} orbits\n{'orbit':<14}{'peak':>6}{'%aud':>6}")
HEAR = -45
for o, e in envs.items():
e = e[:n]; pct = 100*(e > HEAR).sum()/n
tag = "driving" if pct > 50 else "sparse" if pct > 5 else "~silent"
print(f"{o:>2} {ROLE.get(o,''):<11}{e.max():>6.1f}{pct:>5.0f}% {tag}")
def cmd_mix(a):
orbits = [int(x) for x in a.orbits.split(",")]
out, p, ok = mix_take(a.take, orbits, a.out)
print(f"\nRENDER: {_fmt(p)}")
print(f" → {'✓ PASS broadband' if ok else '✗ FAIL — still kick/sub-only, DO NOT serve'}")
print(f"wrote {out}")
if not ok:
sys.exit(3)
def main():
ap = argparse.ArgumentParser(description="reusable spectral ears for the toolbox")
sub = ap.add_subparsers(dest="cmd", required=True)
p = sub.add_parser("profile"); p.add_argument("file"); p.add_argument("--window")
p.set_defaults(func=cmd_profile)
s = sub.add_parser("stemmap"); s.add_argument("take"); s.set_defaults(func=cmd_stemmap)
m = sub.add_parser("mix"); m.add_argument("take")
m.add_argument("--orbits", required=True); m.add_argument("--out", required=True)
m.set_defaults(func=cmd_mix)
a = ap.parse_args(); a.func(a)
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment