Commit 9134398c by PLN (Algolia)

fix(resolve): a folder is loose — kits read as kits, not a 'dominant' verdict

PLN: each SAMPLE is classified individually; a folder is a loose grouping, often a
heterogeneous KIT. Added kind=single|dominant|kit; for kits folder_agrees=None and
the run shows KIT [fam+fam] not a misleading dominant+flag. Folder-name flag now fires
only for folders claiming ~one family (cpluck->synth still flagged). Regenerated palette.
parent 01b88ed5
...@@ -76,17 +76,28 @@ def resolve_folder(name, do_audio=True): ...@@ -76,17 +76,28 @@ def resolve_folder(name, do_audio=True):
folder_fam = M.classify_sample_family(name) # L2 cross-check folder_fam = M.classify_sample_family(name) # L2 cross-check
label_source = ("filename" if not srcs.get("audio") else label_source = ("filename" if not srcs.get("audio") else
"audio" if not srcs.get("filename") else "mixed") "audio" if not srcs.get("filename") else "mixed")
homogeneous = dn / n >= 0.6
kit_like = len(dist) >= 2
# A folder is a LOOSE grouping; per_index is the ground truth. `kind` says how to
# READ this folder: single (one family), dominant (one family ≥60% + minority),
# kit (heterogeneous — jazz/glitch/electro1: no single family, use per_index).
kind = "single" if not kit_like else "dominant" if homogeneous else "kit"
# The folder-name disagreement only MEANS something when the folder really claims
# ~one family (the cpluck='keys'-but-sounds-synth / 808hc lesson). For a true kit
# there's nothing single to disagree with → N/A (None), never a flagged error.
folder_agrees = (folder_fam is None or folder_fam == dominant) if homogeneous else None
return { return {
"n": len(files), "n": len(files),
"n_resolved": n, "n_resolved": n,
"by_family": dict(dist.most_common()), "by_family": dict(dist.most_common()),
"dominant": dominant, "kind": kind, # single | dominant | kit
"dominant": dominant, # modal family (read with kind!)
"conf": round(dn / n, 3), # fraction agreeing "conf": round(dn / n, 3), # fraction agreeing
"homogeneous": dn / n >= 0.6, # else a kit / mixed "homogeneous": homogeneous, # dominant ≥60%
"kit_like": len(dist) >= 2, "kit_like": kit_like, # ≥2 families present
"label_source": label_source, # filename | audio | mixed "label_source": label_source, # filename | audio | mixed
"folder_name_family": folder_fam, # L2 (or None) "folder_name_family": folder_fam, # L2 (or None)
"folder_agrees": folder_fam is None or folder_fam == dominant, "folder_agrees": folder_agrees, # None for kits (N/A)
"per_index": per, "per_index": per,
} }
...@@ -111,12 +122,17 @@ def cmd_run(all_folders=False, limit=None): ...@@ -111,12 +122,17 @@ def cmd_run(all_folders=False, limit=None):
out[name] = r out[name] = r
if r["label_source"] in ("audio", "mixed"): if r["label_source"] in ("audio", "mixed"):
n_audio_folders += 1 n_audio_folders += 1
if not r["folder_agrees"]: if r["folder_agrees"] is False: # only single/dominant folders
flags += 1 flags += 1
tag = "" if r["folder_agrees"] else f" ⚠ folder='{r['folder_name_family']}'≠{r['dominant']}" if r["kind"] == "kit": # per_index is the truth here
kit = " kit" if r["kit_like"] and not r["homogeneous"] else "" fams = "+".join(dict(r["by_family"]))
print(f" [{i}/{len(names)}] {name:<24} KIT [{fams}] "
f"src={r['label_source']}", flush=True)
else:
tag = ("" if r["folder_agrees"] is not False
else f" ⚠ folder='{r['folder_name_family']}'≠{r['dominant']}")
print(f" [{i}/{len(names)}] {name:<24} {r['dominant']:<6} conf={r['conf']} " print(f" [{i}/{len(names)}] {name:<24} {r['dominant']:<6} conf={r['conf']} "
f"src={r['label_source']}{kit}{tag}", flush=True) f"src={r['label_source']}{tag}", flush=True)
prov = M.Provenance(source=M.Source.derived, prov = M.Provenance(source=M.Source.derived,
locator=f"sample_resolve: L1 filename + L3 {method}", locator=f"sample_resolve: L1 filename + L3 {method}",
as_of=date.today()).model_dump(mode="json") as_of=date.today()).model_dump(mode="json")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment