Commit 6351e0ae by PLN (Algolia)

feat(unwrapped): seeded vibe-search + find-similar + semantic vibe-map (#82,#87)

- build_unwrapped: 2D PCA of the CLAP embeds → per-sample 'vibe map' coords +
  16 seed-vibe chips (PLN's own words — the on-ramp, since users don't know what
  to type). vibe0/vibe1 join the axis picker as a semantic-space lens.
- unwrapped.html: VIBE SEARCH box (free text → /vibe) + clickable seed chips;
  results highlight on the map (dim misses, size hits by similarity, ring the top,
  faint-violet→magenta ramp) and auto-reveal the vibe map; result strip auditions.
  Shift-click any dot → /similar (nearest neighbours in embedding space). Null-safe
  plotting for vibe/raw axes. Graceful banner when the endpoint is absent.
parent ac4643d0
...@@ -34,9 +34,20 @@ import sample_meta as META ...@@ -34,9 +34,20 @@ import sample_meta as META
HERE = Path(__file__).resolve().parent HERE = Path(__file__).resolve().parent
TOKENS = HERE.parent / "ui" / "src" / "tokens.json" TOKENS = HERE.parent / "ui" / "src" / "tokens.json"
FAMILIES = HERE / "sample_families.json" FAMILIES = HERE / "sample_families.json"
EMBEDS = HERE / "semantics_embeds.npz" # CLAP per-sample embeds (vibe map + search)
OUT = HERE / "unwrapped.json" OUT = HERE / "unwrapped.json"
SAMPLES_LINK = HERE / "_samples" # relative symlink → Dirt-Samples (gitignored) SAMPLES_LINK = HERE / "_samples" # relative symlink → Dirt-Samples (gitignored)
# Seed vibe-search chips — PLN's OWN words (users don't know what to type). Drawn from
# the sample_semantics ontology but phrased as natural queries. These are the on-ramp;
# the box takes any free text via the /vibe endpoint.
SEED_VIBES = [
"warm dusty rhodes", "lush rich pad", "shadowy ténébreux synth", "californian g-funk",
"nu jazz keys", "boom bap drum break", "acid bassline", "ethereal dreamy texture",
"punchy tight kick", "gritty distorted reese", "airy breathy vocal", "deep sub bass",
"glassy bell melody", "jungle amen break", "soulful brass stab", "hypnotic techno stab",
]
# Raw features worth exposing as direct axes (music-aficionado language, not just PCs). # Raw features worth exposing as direct axes (music-aficionado language, not just PCs).
RAW_AXES = [ RAW_AXES = [
("spectral_centroid", "Brightness", "spectral centroid (Hz) — dull → bright", "Hz"), ("spectral_centroid", "Brightness", "spectral centroid (Hz) — dull → bright", "Hz"),
...@@ -110,6 +121,21 @@ def main(): ...@@ -110,6 +121,21 @@ def main():
fam_pal = {f["key"]: {"label": f["label"], "color": f["base"], "glyph": f["glyph"]} fam_pal = {f["key"]: {"label": f["label"], "color": f["base"], "glyph": f["glyph"]}
for f in tok["sample"]} for f in tok["sample"]}
# ── semantic vibe-map: 2D PCA of the CLAP embeddings (where SOUNDS cluster, not
# features). Joined by "folder/stem"; samples without an embed get null coords. ──
vibe_xy = {}
if EMBEDS.exists():
z = np.load(EMBEDS, allow_pickle=True)
Memb, enames = z["embeds"].astype(float), [str(x) for x in z["names"]]
vp = PCA(n_components=2).fit(Memb)
proj2 = vp.transform(Memb)
# orient deterministically: larger-variance spread on +x
for j in range(2):
if proj2[:, j].mean() < 0:
proj2[:, j] *= -1
vibe_xy = {enames[i]: [round(float(proj2[i, 0]), 3), round(float(proj2[i, 1]), 3)]
for i in range(len(enames))}
# ── per-folder kind / agreement + real .wav filenames ────────────────────── # ── per-folder kind / agreement + real .wav filenames ──────────────────────
famdoc = json.loads(FAMILIES.read_text())["families"] famdoc = json.loads(FAMILIES.read_text())["families"]
folder_meta = {name: {"kind": v["kind"], "dominant": v["dominant"], folder_meta = {name: {"kind": v["kind"], "dominant": v["dominant"],
...@@ -136,6 +162,9 @@ def main(): ...@@ -136,6 +162,9 @@ def main():
"feat": {kk: round(float(X[i, fidx[kk]]), 4) "feat": {kk: round(float(X[i, fidx[kk]]), 4)
for kk in RAW_KEEP if kk in fidx}, for kk in RAW_KEEP if kk in fidx},
} }
vk = f"{r['folder']}/{r['file']}"
if vk in vibe_xy:
rec["vibe"] = vibe_xy[vk]
if wav: if wav:
rec["wav"] = f"_samples/{r['folder']}/{wav}" rec["wav"] = f"_samples/{r['folder']}/{wav}"
samples.append(rec) samples.append(rec)
...@@ -199,6 +228,12 @@ def main(): ...@@ -199,6 +228,12 @@ def main():
"pc_axes": pc_axes, "pc_axes": pc_axes,
"raw_axes": [{"key": kk, "label": lbl, "desc": desc, "unit": unit} "raw_axes": [{"key": kk, "label": lbl, "desc": desc, "unit": unit}
for kk, lbl, desc, unit in RAW_AXES if kk in fidx], for kk, lbl, desc, unit in RAW_AXES if kk in fidx],
"vibe_axes": ([{"key": "vibe0", "label": "Vibe ① (semantic)",
"lo": "one timbral pole", "hi": "the other"},
{"key": "vibe1", "label": "Vibe ② (semantic)",
"lo": "one timbral pole", "hi": "the other"}] if vibe_xy else []),
"seed_vibes": SEED_VIBES,
"n_vibe": len(vibe_xy),
"rf_importance": rf, "rf_importance": rf,
"correlation": correlation, "correlation": correlation,
"contingency": grid, "contingency": grid,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment