Commit c32db821 by PLN (Algolia)

perf(resolve): batch the resolver's audio path into one CLAP forward/folder

resolve_folder looped classify_file per file -> one CLAP forward per opaque
file. Now: run L1 (filename) for all files first, then classify ONLY the opaque
ones in a single batched forward. Extracted classify_files() as the one place
per-file audio prediction lives (CLAP batched, PANNs per-file); classify_file
and classify_folder both route through it so batching can't be lost by accident.
Validated on the jazz kit (per-index BD/HH/SN via filename, CB/P1/P2 via audio).
parent 16937a78
...@@ -150,38 +150,16 @@ def clap_vectors(paths): ...@@ -150,38 +150,16 @@ def clap_vectors(paths):
return out return out
def classify_file(path): def classify_files(files):
"""One sample → (family, confidence) or None, per the active METHOD: """Batched per-file classification → list of (family, conf)|None aligned to `files`.
clap (fine/coarse) · panns (AudioSet) · ensemble (mean of both vectors).""" CLAP runs as ONE forward across all files (the expensive tower is shared); PANNs,
vecs = [] when in play, stays per-file (AudioSet model has no batched path here). This is the
if METHOD in ("clap", "ensemble"): one place per-file audio prediction lives — classify_file and the resolver both
v = clap_vector(path) route through it so batching is never accidentally lost."""
if v:
vecs.append(v)
if METHOD in ("panns", "ensemble"):
import sample_panns
v = sample_panns.family_vector(path)
if v:
vecs.append(v)
if not vecs:
return None
keys = ONT.FAMILIES
avg = {k: sum(v.get(k, 0.0) for v in vecs) / len(vecs) for k in keys}
f = max(avg, key=avg.get)
return f, round(avg[f], 3)
def classify_folder(name):
"""Aggregate per-file predictions for a folder → distribution + dominant.
CLAP is batched across the folder's files (one forward); PANNs stays per-file."""
files = folder_files(name)
if not files:
return None
files = files[:MAX_FILES]
clap_vs = clap_vectors(files) if METHOD in ("clap", "ensemble") else [None] * len(files) clap_vs = clap_vectors(files) if METHOD in ("clap", "ensemble") else [None] * len(files)
if METHOD in ("panns", "ensemble"): if METHOD in ("panns", "ensemble"):
import sample_panns import sample_panns
dist, confs = Counter(), [] out = []
for i, f in enumerate(files): for i, f in enumerate(files):
vecs = [] vecs = []
if clap_vs[i]: if clap_vs[i]:
...@@ -191,11 +169,34 @@ def classify_folder(name): ...@@ -191,11 +169,34 @@ def classify_folder(name):
if pv: if pv:
vecs.append(pv) vecs.append(pv)
if not vecs: if not vecs:
out.append(None)
continue continue
avg = {k: sum(v.get(k, 0.0) for v in vecs) / len(vecs) for k in ONT.FAMILIES} avg = {k: sum(v.get(k, 0.0) for v in vecs) / len(vecs) for k in ONT.FAMILIES}
fam = max(avg, key=avg.get) fam = max(avg, key=avg.get)
out.append((fam, round(avg[fam], 3)))
return out
def classify_file(path):
"""One sample → (family, confidence) or None, per the active METHOD:
clap (fine/coarse) · panns (AudioSet) · ensemble (mean of both vectors)."""
return classify_files([path])[0]
def classify_folder(name):
"""Aggregate per-file predictions for a folder → distribution + dominant.
CLAP is batched across the folder's files (one forward); PANNs stays per-file."""
files = folder_files(name)
if not files:
return None
files = files[:MAX_FILES]
dist, confs = Counter(), []
for r in classify_files(files):
if not r:
continue
fam, conf = r
dist[fam] += 1 dist[fam] += 1
confs.append(round(avg[fam], 3)) confs.append(conf)
n = sum(dist.values()) n = sum(dist.values())
if not n: if not n:
return None return None
......
...@@ -51,9 +51,20 @@ def resolve_folder(name, do_audio=True): ...@@ -51,9 +51,20 @@ def resolve_folder(name, do_audio=True):
if not files: if not files:
return None return None
files = files[:CLF.MAX_FILES] files = files[:CLF.MAX_FILES]
# L1 first for every file; collect the opaque ones and classify their AUDIO in a
# single batched forward (one CLAP pass for the whole folder, not one per file).
fams = [META.parse_name(f.stem)["family"] for f in files]
srcs_per = ["filename" if fam else None for fam in fams]
if do_audio:
opaque = [i for i, fam in enumerate(fams) if not fam]
if opaque:
audio = CLF.classify_files([files[i] for i in opaque])
for i, r in zip(opaque, audio):
if r:
fams[i], srcs_per[i] = r[0], "audio"
per, dist, srcs = [], Counter(), Counter() per, dist, srcs = [], Counter(), Counter()
for i, f in enumerate(files): for i, f in enumerate(files):
fam, src = resolve_file(f, do_audio=do_audio) fam, src = fams[i], srcs_per[i]
per.append({"index": i, "name": f.stem, "family": fam, "source": src}) per.append({"index": i, "name": f.stem, "family": fam, "source": src})
if fam: if fam:
dist[fam] += 1 dist[fam] += 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment