Commit 9134398c by PLN (Algolia)

fix(resolve): a folder is loose — kits read as kits, not a 'dominant' verdict

PLN: each SAMPLE is classified individually; a folder is a loose grouping, often a
heterogeneous KIT. Added kind=single|dominant|kit; for kits folder_agrees=None and
the run shows KIT [fam+fam] not a misleading dominant+flag. Folder-name flag now fires
only for folders claiming ~one family (cpluck->synth still flagged). Regenerated palette.
parent 01b88ed5
......@@ -76,17 +76,28 @@ def resolve_folder(name, do_audio=True):
folder_fam = M.classify_sample_family(name) # L2 cross-check
label_source = ("filename" if not srcs.get("audio") else
"audio" if not srcs.get("filename") else "mixed")
homogeneous = dn / n >= 0.6
kit_like = len(dist) >= 2
# A folder is a LOOSE grouping; per_index is the ground truth. `kind` says how to
# READ this folder: single (one family), dominant (one family ≥60% + minority),
# kit (heterogeneous — jazz/glitch/electro1: no single family, use per_index).
kind = "single" if not kit_like else "dominant" if homogeneous else "kit"
# The folder-name disagreement only MEANS something when the folder really claims
# ~one family (the cpluck='keys'-but-sounds-synth / 808hc lesson). For a true kit
# there's nothing single to disagree with → N/A (None), never a flagged error.
folder_agrees = (folder_fam is None or folder_fam == dominant) if homogeneous else None
return {
"n": len(files),
"n_resolved": n,
"by_family": dict(dist.most_common()),
"dominant": dominant,
"kind": kind, # single | dominant | kit
"dominant": dominant, # modal family (read with kind!)
"conf": round(dn / n, 3), # fraction agreeing
"homogeneous": dn / n >= 0.6, # else a kit / mixed
"kit_like": len(dist) >= 2,
"homogeneous": homogeneous, # dominant ≥60%
"kit_like": kit_like, # ≥2 families present
"label_source": label_source, # filename | audio | mixed
"folder_name_family": folder_fam, # L2 (or None)
"folder_agrees": folder_fam is None or folder_fam == dominant,
"folder_agrees": folder_agrees, # None for kits (N/A)
"per_index": per,
}
......@@ -111,12 +122,17 @@ def cmd_run(all_folders=False, limit=None):
out[name] = r
if r["label_source"] in ("audio", "mixed"):
n_audio_folders += 1
if not r["folder_agrees"]:
if r["folder_agrees"] is False: # only single/dominant folders
flags += 1
tag = "" if r["folder_agrees"] else f" ⚠ folder='{r['folder_name_family']}'≠{r['dominant']}"
kit = " kit" if r["kit_like"] and not r["homogeneous"] else ""
if r["kind"] == "kit": # per_index is the truth here
fams = "+".join(dict(r["by_family"]))
print(f" [{i}/{len(names)}] {name:<24} KIT [{fams}] "
f"src={r['label_source']}", flush=True)
else:
tag = ("" if r["folder_agrees"] is not False
else f" ⚠ folder='{r['folder_name_family']}'≠{r['dominant']}")
print(f" [{i}/{len(names)}] {name:<24} {r['dominant']:<6} conf={r['conf']} "
f"src={r['label_source']}{kit}{tag}", flush=True)
f"src={r['label_source']}{tag}", flush=True)
prov = M.Provenance(source=M.Source.derived,
locator=f"sample_resolve: L1 filename + L3 {method}",
as_of=date.today()).model_dump(mode="json")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment