Commit e9bba22c by PLN (Algolia)

fix(classifier): refuse to guess kits + source-named folders; fix sept1 morpher

PLN-flagged chain of labeling errors, traced to the SSOT classifier:
- 'jazz' was matched to BREAK, but jazz is a multisample KIT (jazz:0=kick,
  :1=snare/hat…). A folder name is not a reliable family signal: it may be one
  family, a heterogeneous kit, or a demucs grab named after a SOURCE song
  (wap, take5, the_revolution, xplosive, rample*). classify_sample_family now
  fires ONLY on names that lexically encode an instrument; everything else is
  None (= needs per-sample analysis). No 'kit registry' (that's name-guessing too).
- removed over-reaching genre/source tokens: jazz, dnb, jungle, loop from break;
  drum from perc. This also FIXES jungle_pads (→pad, was break) and
  jungle_vocals (→vox). amen kept (amencutup genuinely is the Amen break).
- tempo: strip Tidal '--' line comments before parsing cps (ton_numero's
  commented-out morpher no longer counts); a track with a live 'cps (range …)'
  is now flagged morph even when it also declares a fixed setcps. morphing=1
  (Septembre 1er, 60→180), was 0.
- report: + stage_tempo_by_year, sources/roadmap, recurrence gig_slugs,
  classified/unclassified coverage (21% of palette uses need analysis, honest).
- tests: classifier refuses kit/source names; jungle_pads→pad guard. 60 green.
parent 3f2863d4
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
"min": 80.0, "min": 80.0,
"max": 170.0, "max": 170.0,
"median": 120.0, "median": 120.0,
"mean": 120.2, "mean": 120.5,
"morphing_tracks": 0, "morphing_tracks": 1,
"histogram": { "histogram": {
"80": 8, "80": 8,
"90": 7, "90": 7,
...@@ -22,9 +22,9 @@ ...@@ -22,9 +22,9 @@
"110": 11, "110": 11,
"120": 23, "120": 23,
"130": 4, "130": 4,
"140": 7, "140": 6,
"150": 2, "150": 2,
"160": 7, "160": 8,
"170": 1 "170": 1
}, },
"ac_delta": [ "ac_delta": [
...@@ -53,6 +53,12 @@ ...@@ -53,6 +53,12 @@
"delta": 60.0 "delta": 60.0
}, },
{ {
"track": "You My Sunshine",
"score_bpm": 166.0,
"meta_bpm": 144,
"delta": 22.0
},
{
"track": "Contre visite", "track": "Contre visite",
"score_bpm": 90.0, "score_bpm": 90.0,
"meta_bpm": 80, "meta_bpm": 80,
...@@ -77,12 +83,6 @@ ...@@ -77,12 +83,6 @@
"delta": 0.0 "delta": 0.0
}, },
{ {
"track": "Septembre 1er",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Blue Gold", "track": "Blue Gold",
"score_bpm": 140.0, "score_bpm": 140.0,
"meta_bpm": 140, "meta_bpm": 140,
...@@ -323,12 +323,6 @@ ...@@ -323,12 +323,6 @@
"delta": 0.0 "delta": 0.0
}, },
{ {
"track": "You My Sunshine",
"score_bpm": 144.0,
"meta_bpm": 144,
"delta": 0.0
},
{
"track": "Nouveau Soleil", "track": "Nouveau Soleil",
"score_bpm": 110.0, "score_bpm": 110.0,
"meta_bpm": 110, "meta_bpm": 110,
...@@ -747,7 +741,7 @@ ...@@ -747,7 +741,7 @@
"track": "live/collab/baba/sept1.tidal", "track": "live/collab/baba/sept1.tidal",
"created": "2024-09-06", "created": "2024-09-06",
"bpm": 120.0, "bpm": 120.0,
"morph": false "morph": true
}, },
{ {
"name": "Drifting Soul", "name": "Drifting Soul",
...@@ -900,7 +894,7 @@ ...@@ -900,7 +894,7 @@
"name": "You My Sunshine", "name": "You My Sunshine",
"track": "live/midi/nova/dnb/liquid/you_my_sunshine.tidal", "track": "live/midi/nova/dnb/liquid/you_my_sunshine.tidal",
"created": "2026-03-18", "created": "2026-03-18",
"bpm": 144.0, "bpm": 166.0,
"morph": false "morph": false
}, },
{ {
...@@ -1080,7 +1074,7 @@ ...@@ -1080,7 +1074,7 @@
"n": 45, "n": 45,
"bpm_median": 117, "bpm_median": 117,
"bpm_min": 80.0, "bpm_min": 80.0,
"bpm_max": 165.0, "bpm_max": 166.0,
"tracks": [ "tracks": [
{ {
"name": "Sessions Break", "name": "Sessions Break",
...@@ -1239,10 +1233,6 @@ ...@@ -1239,10 +1233,6 @@
"bpm": 138.0 "bpm": 138.0
}, },
{ {
"name": "You My Sunshine",
"bpm": 144.0
},
{
"name": "Nuit Agitée", "name": "Nuit Agitée",
"bpm": 160.0 "bpm": 160.0
}, },
...@@ -1261,6 +1251,10 @@ ...@@ -1261,6 +1251,10 @@
{ {
"name": "Break the Loop", "name": "Break the Loop",
"bpm": 165.0 "bpm": 165.0
},
{
"name": "You My Sunshine",
"bpm": 166.0
} }
], ],
"distinctive_samples": [ "distinctive_samples": [
...@@ -1453,23 +1447,28 @@ ...@@ -1453,23 +1447,28 @@
"sn": 5 "sn": 5
}, },
"families": { "families": {
"break": 112,
"synth": 85, "synth": 85,
"snare": 72, "snare": 72,
"bass": 64, "break": 67,
"bass": 65,
"hat": 39, "hat": 39,
"keys": 37, "keys": 37,
"kick": 27, "kick": 27,
"vox": 20, "vox": 24,
"fx": 18, "fx": 18,
"perc": 9, "pad": 7,
"pad": 5,
"lead": 4 "lead": 4
}, },
"classified_uses": 445,
"unclassified_uses": 118,
"unclassified_names": 64,
"unclassified_top": { "unclassified_top": {
"jazz": 37,
"drum": 5,
"armora": 3, "armora": 3,
"dr": 3, "dr": 3,
"90s_matrix": 3, "90s_matrix": 3,
"drums_atari": 2,
"praise": 2, "praise": 2,
"fsynth": 2, "fsynth": 2,
"superfork": 2, "superfork": 2,
...@@ -1478,10 +1477,7 @@ ...@@ -1478,10 +1477,7 @@
"supersiren": 2, "supersiren": 2,
"nujazz_beats120": 2, "nujazz_beats120": 2,
"ifdrums": 1, "ifdrums": 1,
"rhadamanthe_melo": 1, "rhadamanthe_melo": 1
"ccc": 1,
"ghost": 1,
"jane_wang": 1
}, },
"idioms_top": [ "idioms_top": [
{ {
...@@ -1554,102 +1550,274 @@ ...@@ -1554,102 +1550,274 @@
{ {
"name": "Sunny Side Up", "name": "Sunny Side Up",
"gigs": 11, "gigs": 11,
"track": "live/midi/nova/lounge/sunny_side_up.tidal" "track": "live/midi/nova/lounge/sunny_side_up.tidal",
"gig_slugs": [
"2024/algolia-last-all-hands",
"2024/la-french-stack",
"2025/air-elementeuf",
"2025/algolia-rko",
"2025/bunker",
"2025/cosmicfest",
"2025/fairyteuf",
"2025/la-french-stack",
"2025/raise",
"2025/val-thorens",
"2026/montreuil-algorave"
]
}, },
{ {
"name": "Café Tiède", "name": "Café Tiède",
"gigs": 10, "gigs": 10,
"track": "live/midi/nova/nujazz/cafe_tiede.tidal" "track": "live/midi/nova/nujazz/cafe_tiede.tidal",
"gig_slugs": [
"2024/algolia-fdlm",
"2024/algolia-last-all-hands",
"2024/ccc-live",
"2024/cookie-collective-compilation",
"2024/la-french-stack",
"2024/toplap-20-years",
"2025/air-elementeuf",
"2025/algolia-rko",
"2025/la-french-stack",
"2025/raise"
]
}, },
{ {
"name": "Contre visite", "name": "Contre visite",
"gigs": 9, "gigs": 9,
"track": "live/midi/nova/ambient/contre_visite.tidal" "track": "live/midi/nova/ambient/contre_visite.tidal",
"gig_slugs": [
"2022/bazurto",
"2023/cmny-2",
"2024/algolia-fdlm",
"2024/algolia-last-all-hands",
"2024/divin-live",
"2024/la-french-stack",
"2024/velociteuf",
"2025/algolia-rko",
"2025/la-french-stack"
]
}, },
{ {
"name": "Force Motrice", "name": "Force Motrice",
"gigs": 9, "gigs": 9,
"track": "live/midi/nova/dnb/force_motrice.tidal" "track": "live/midi/nova/dnb/force_motrice.tidal",
"gig_slugs": [
"2024/algolia-fdlm",
"2024/algolia-last-all-hands",
"2024/ccc-live",
"2024/cookie-collective-compilation",
"2024/la-french-stack",
"2024/toplap-20-years",
"2025/bunker",
"2025/la-french-stack",
"2025/raise"
]
}, },
{ {
"name": "Nuit Agitée", "name": "Nuit Agitée",
"gigs": 8, "gigs": 8,
"track": "live/midi/nova/breaks/nuit_agitee.tidal" "track": "live/midi/nova/breaks/nuit_agitee.tidal",
"gig_slugs": [
"2024/algolia-fdlm",
"2024/algolia-last-all-hands",
"2024/ccc-live",
"2024/cookie-collective-compilation",
"2024/toplap-20-years",
"2025/algolia-rko",
"2025/raise",
"2025/val-thorens"
]
}, },
{ {
"name": "Salut Nu", "name": "Salut Nu",
"gigs": 8, "gigs": 8,
"track": "live/midi/nova/nujazz/salut_nu.tidal" "track": "live/midi/nova/nujazz/salut_nu.tidal",
"gig_slugs": [
"2023/toplap-solstice",
"2024/algolia-fdlm",
"2024/ccc-live",
"2024/cookie-collective-compilation",
"2024/la-french-stack",
"2025/air-elementeuf",
"2025/la-french-stack",
"2025/raise"
]
}, },
{ {
"name": "Permanence", "name": "Permanence",
"gigs": 7, "gigs": 7,
"track": "live/collab/raph/permanence.tidal" "track": "live/collab/raph/permanence.tidal",
"gig_slugs": [
"2023/toplap-solstice",
"2024/algolia-fdlm",
"2024/algolia-last-all-hands",
"2024/la-french-stack",
"2024/toplap-20-years",
"2025/algolia-rko",
"2025/la-french-stack"
]
}, },
{ {
"name": "Invoque l'ete", "name": "Invoque l'ete",
"gigs": 7, "gigs": 7,
"track": "live/midi/nova/lounge/invoque_ete.tidal" "track": "live/midi/nova/lounge/invoque_ete.tidal",
"gig_slugs": [
"2022/bazurto",
"2023/cmny-2",
"2023/devcon23",
"2024/algolia-fdlm",
"2024/algolia-last-all-hands",
"2024/divin-live",
"2025/algolia-rko"
]
}, },
{ {
"name": "Café Glacé", "name": "Café Glacé",
"gigs": 7, "gigs": 7,
"track": "live/midi/nova/nujazz/cafe_glace.tidal" "track": "live/midi/nova/nujazz/cafe_glace.tidal",
"gig_slugs": [
"2024/algolia-fdlm",
"2024/ccc-live",
"2024/cookie-collective-compilation",
"2024/toplap-20-years",
"2025/air-elementeuf",
"2025/raise",
"2025/val-thorens"
]
}, },
{ {
"name": "Septembre 1er", "name": "Septembre 1er",
"gigs": 6, "gigs": 6,
"track": "live/collab/baba/sept1.tidal" "track": "live/collab/baba/sept1.tidal",
"gig_slugs": [
"2025/air-elementeuf",
"2025/bunker",
"2025/cosmicfest",
"2025/raise",
"2025/val-thorens",
"2026/montreuil-algorave"
]
}, },
{ {
"name": "L'or Bleu", "name": "L'or Bleu",
"gigs": 6, "gigs": 6,
"track": "live/collab/mousquetaires/blue_gold.tidal" "track": "live/collab/mousquetaires/blue_gold.tidal",
"gig_slugs": [
"2024/38c3-toilet",
"2024/algolia-last-all-hands",
"2024/cookie-collective-compilation",
"2025/raise",
"2025/val-thorens",
"2026/montreuil-algorave"
]
}, },
{ {
"name": "Alerte Verte", "name": "Alerte Verte",
"gigs": 6, "gigs": 6,
"track": "live/midi/nova/dnb/alerte_verte.tidal" "track": "live/midi/nova/dnb/alerte_verte.tidal",
"gig_slugs": [
"2022/bazurto",
"2023/cmny-2",
"2024/ccc-live",
"2024/cookie-collective-compilation",
"2024/divin-live",
"2025/val-thorens"
]
}, },
{ {
"name": "Café Bouillant", "name": "Café Bouillant",
"gigs": 6, "gigs": 6,
"track": "live/midi/nova/nujazz/cafe_bouillant.tidal" "track": "live/midi/nova/nujazz/cafe_bouillant.tidal",
"gig_slugs": [
"2023/toplap-solstice",
"2024/algolia-fdlm",
"2024/algolia-last-all-hands",
"2025/air-elementeuf",
"2025/algolia-rko",
"2025/val-thorens"
]
}, },
{ {
"name": "Acidule", "name": "Acidule",
"gigs": 5, "gigs": 5,
"track": "live/collab/raph/acidule.tidal" "track": "live/collab/raph/acidule.tidal",
"gig_slugs": [
"2024/38c3-toilet",
"2024/ccc-live",
"2024/cookie-collective-compilation",
"2025/bunker",
"2025/val-thorens"
]
}, },
{ {
"name": "Jeudi Drill", "name": "Jeudi Drill",
"gigs": 5, "gigs": 5,
"track": "live/collab/raph/jeudrill.tidal" "track": "live/collab/raph/jeudrill.tidal",
"gig_slugs": [
"2024/algolia-fdlm",
"2025/cosmicfest",
"2025/fairyteuf",
"2025/raise",
"2026/montreuil-algorave"
]
}, },
{ {
"name": "Something about Drums", "name": "Something about Drums",
"gigs": 5, "gigs": 5,
"track": "live/midi/nova/dnb/something_about_drums.tidal" "track": "live/midi/nova/dnb/something_about_drums.tidal",
"gig_slugs": [
"2024/algolia-last-all-hands",
"2024/la-french-stack",
"2025/algolia-rko",
"2025/la-french-stack",
"2025/raise"
]
}, },
{ {
"name": "Venons Ensemble", "name": "Venons Ensemble",
"gigs": 5, "gigs": 5,
"track": "live/midi/nova/dnb/venons_ensemble.tidal" "track": "live/midi/nova/dnb/venons_ensemble.tidal",
"gig_slugs": [
"2024/algolia-fdlm",
"2024/divin-live",
"2024/velociteuf",
"2025/cosmicfest",
"2025/la-french-stack"
]
}, },
{ {
"name": "PunkAChien", "name": "PunkAChien",
"gigs": 4, "gigs": 4,
"track": "live/collab/raph/punkachien.tidal" "track": "live/collab/raph/punkachien.tidal",
"gig_slugs": [
"2024/38c3-toilet",
"2025/raise",
"2025/val-thorens",
"2026/montreuil-algorave"
]
}, },
{ {
"name": "La fin de l'insouciance", "name": "La fin de l'insouciance",
"gigs": 4, "gigs": 4,
"track": "live/midi/nova/beatober/oct_16_haunted_house_insouciance.tidal" "track": "live/midi/nova/beatober/oct_16_haunted_house_insouciance.tidal",
"gig_slugs": [
"2023/toplap-solstice",
"2025/cosmicfest",
"2025/raise",
"2025/val-thorens"
]
}, },
{ {
"name": "Bain électrique", "name": "Bain électrique",
"gigs": 4, "gigs": 4,
"track": "live/midi/nova/breaks/bain_electrique.tidal" "track": "live/midi/nova/breaks/bain_electrique.tidal",
"gig_slugs": [
"2024/la-french-stack",
"2025/cosmicfest",
"2025/la-french-stack",
"2025/raise"
]
} }
], ],
"collab": { "collab": {
...@@ -1679,5 +1847,32 @@ ...@@ -1679,5 +1847,32 @@
"2026-01": 1, "2026-01": 1,
"2026-04": 3, "2026-04": 3,
"2026-05": 1 "2026-05": 1
},
"sources": [
{
"key": "score",
"label": ".tidal scores",
"detail": "tempo, sample palette, phrases"
},
{
"key": "site",
"label": "site gig metadata",
"detail": "style tags, bpm, setlists"
},
{
"key": "git",
"label": "git history",
"detail": "track creation dates"
},
{
"key": "dirt",
"label": "Dirt-Samples links",
"detail": "sample-folder import dates"
} }
],
"roadmap": [
"platform play counts",
"git edit counts",
"per-set durations"
]
} }
\ No newline at end of file
...@@ -521,9 +521,9 @@ class ColorFamily(BaseModel): ...@@ -521,9 +521,9 @@ class ColorFamily(BaseModel):
SAMPLE_FAMILIES = [ SAMPLE_FAMILIES = [
ColorFamily(key="kick", label="Kick", glyph="●", hue=25, match=["kick", "kik", "bd", "808bd", "909", "bassdrum"]), ColorFamily(key="kick", label="Kick", glyph="●", hue=25, match=["kick", "kik", "bd", "808bd", "909", "bassdrum"]),
ColorFamily(key="snare", label="Snare", glyph="◆", hue=50, match=["snare", "sn", "sd", "clap", "claps", "cp", "rim", "rs"]), ColorFamily(key="snare", label="Snare", glyph="◆", hue=50, match=["snare", "sn", "sd", "clap", "claps", "cp", "rim", "rs"]),
ColorFamily(key="perc", label="Perc", glyph="▴", hue=80, match=["perc", "conga", "bongo", "tom", "clave", "shaker", "tabla", "cowbell", "drum"]), ColorFamily(key="perc", label="Perc", glyph="▴", hue=80, match=["perc", "conga", "bongo", "tom", "clave", "shaker", "tabla", "cowbell"]),
ColorFamily(key="hat", label="Hat", glyph="✦", hue=110, match=["hat", "hh", "ho", "oh", "ch", "hihat", "cymbal", "cym", "ride", "crash"]), ColorFamily(key="hat", label="Hat", glyph="✦", hue=110, match=["hat", "hh", "ho", "oh", "ch", "hihat", "cymbal", "cym", "ride", "crash"]),
ColorFamily(key="break", label="Break", glyph="≈", hue=150, match=["break", "amen", "loop", "jungle", "dnb", "jazz", "breaks165", "fbreak"]), ColorFamily(key="break", label="Break", glyph="≈", hue=150, match=["break", "amen", "breaks165", "fbreak"]),
ColorFamily(key="pad", label="Pad", glyph="◌", hue=180, match=["pad", "drone", "choir", "string", "ambient", "atmos", "atm", "airport", "trance"]), ColorFamily(key="pad", label="Pad", glyph="◌", hue=180, match=["pad", "drone", "choir", "string", "ambient", "atmos", "atm", "airport", "trance"]),
ColorFamily(key="keys", label="Keys", glyph="♬", hue=205, match=["key", "keys", "piano", "rhodes", "epiano", "organ", "fpiano", "qstab", "cbow", "cpluck", "clav", "marimba", "mandolin", "guitar", "organ", "forgan"]), ColorFamily(key="keys", label="Keys", glyph="♬", hue=205, match=["key", "keys", "piano", "rhodes", "epiano", "organ", "fpiano", "qstab", "cbow", "cpluck", "clav", "marimba", "mandolin", "guitar", "organ", "forgan"]),
ColorFamily(key="lead", label="Lead", glyph="♪", hue=230, match=["lead", "arp", "pluck", "stab", "blip", "saw", "square", "brass", "sax", "horn", "trump", "tromb"]), ColorFamily(key="lead", label="Lead", glyph="♪", hue=230, match=["lead", "arp", "pluck", "stab", "blip", "saw", "square", "brass", "sax", "horn", "trump", "tromb"]),
...@@ -542,12 +542,14 @@ _STRONG_CONTAINS = [("kick", "kick"), ("snare", "snare"), ("clap", "snare"), ...@@ -542,12 +542,14 @@ _STRONG_CONTAINS = [("kick", "kick"), ("snare", "snare"), ("clap", "snare"),
("piano", "keys"), ("rhodes", "keys"), ("vocal", "vox"), ("piano", "keys"), ("rhodes", "keys"), ("vocal", "vox"),
("voice", "vox"), ("dialog", "vox")] ("voice", "vox"), ("dialog", "vox")]
def classify_sample_family(name: str): def classify_sample_family(name: str):
"""Canonical sample-name → family key (or None). The SSOT classifier every surface """Canonical sample-name → family key (or None). The SSOT classifier every surface
should use (DRY). Token-aware: a name like `vec1_snare` or `nujazz_bass125` resolves should use (DRY). It classifies ONLY by instrument tokens the NAME actually encodes
by its embedded instrument token, not just its prefix. Sample-world model, not a (`vec1_snare`→snare, `moogBass`→bass) — a deliberately conservative sample-world
measured-register claim. Returns the family `key` or None when genuinely unknown.""" claim, NOT a measured-register one. A bare folder/kit/source name (`jazz`, `gretsch`,
`house`) carries no reliable instrument signal — a folder may be one family, a
heterogeneous kit indexed by `:n`, or a demucs grab from some track — so it returns
None. Resolving those is an analysis job (per-sample/index), never a name guess."""
s = name.lower() s = name.lower()
toks = [t for t in re.split(r"[_\-0-9]+", s) if t] toks = [t for t in re.split(r"[_\-0-9]+", s) if t]
for f in SAMPLE_FAMILIES: for f in SAMPLE_FAMILIES:
......
...@@ -88,7 +88,6 @@ def test_sample_classifier_examples(): ...@@ -88,7 +88,6 @@ def test_sample_classifier_examples():
assert c("808bd") == "kick" assert c("808bd") == "kick"
assert c("hats") == "hat" assert c("hats") == "hat"
assert c("meth_bass") == "bass" assert c("meth_bass") == "bass"
assert c("jazz") == "break"
assert c("piano") == "keys" assert c("piano") == "keys"
# token-aware / drum-machine / embedded (the coverage-improving cases) # token-aware / drum-machine / embedded (the coverage-improving cases)
assert c("h2ogmhh") == "hat" # DM suffix, gated by 'gm' assert c("h2ogmhh") == "hat" # DM suffix, gated by 'gm'
...@@ -107,6 +106,20 @@ def test_classifier_does_not_overreach(): ...@@ -107,6 +106,20 @@ def test_classifier_does_not_overreach():
assert M.classify_sample_family(unknown) is None assert M.classify_sample_family(unknown) is None
def test_classifier_refuses_kit_and_source_names():
"""A folder name that doesn't encode an instrument must NOT be force-classified.
Kits (jazz:0=kick, :1=snare…), drum machines, and folders named after a SOURCE
song carry no reliable family signal — only per-sample analysis can label them."""
c = M.classify_sample_family
for kit in ("jazz", "gretsch", "drum", "drumtraks", "techno"): # multisample kits
assert c(kit) is None, kit
for src in ("wap", "take5", "the_revolution", "xplosive", "rampleS13"): # sampled
assert c(src) is None, src
# but a name that DOES encode the instrument still resolves
assert c("jungle_breaks") == "break"
assert c("jungle_pads") == "pad" # was wrongly 'break' via the genre token
def test_style_normalization(): def test_style_normalization():
assert M.norm_style("nu-jazz") == M.norm_style("nujazz") == "nujazz" assert M.norm_style("nu-jazz") == M.norm_style("nujazz") == "nujazz"
assert M.norm_style("breakbeat") == M.norm_style("breaks") == "breaks" assert M.norm_style("breakbeat") == M.norm_style("breaks") == "breaks"
......
...@@ -57,6 +57,9 @@ def parse_tempo(src): ...@@ -57,6 +57,9 @@ def parse_tempo(src):
"""Return {bpm, lo, hi, morph} from a .tidal source, or None. bpm = primary tempo.""" """Return {bpm, lo, hi, morph} from a .tidal source, or None. bpm = primary tempo."""
if not src: if not src:
return None return None
# drop Tidal line comments (`-- …`) so commented-out experiments don't count
# (e.g. ton_numero has a `-- # cps (range …)` morpher that was never live)
src = re.sub(r"--[^\n]*", "", src)
# all setcps / cps / cpsbus / "# cps" statements # all setcps / cps / cpsbus / "# cps" statements
cands = [] cands = []
for m in re.finditer(r"(?:setcps|cpsbus\s*\d+|#\s*cps|\bcps)\s*\(?(.+)", src): for m in re.finditer(r"(?:setcps|cpsbus\s*\d+|#\s*cps|\bcps)\s*\(?(.+)", src):
...@@ -75,9 +78,17 @@ def parse_tempo(src): ...@@ -75,9 +78,17 @@ def parse_tempo(src):
cands.append({"bpm": float(bare.group(1)) * 240, "lo": None, "hi": None, "morph": False}) cands.append({"bpm": float(bare.group(1)) * 240, "lo": None, "hi": None, "morph": False})
if not cands: if not cands:
return None return None
# prefer a fixed setcps; fall back to first morphing range # primary BPM = the fixed setcps (the declared base tempo) if there is one,
# else the first range's midpoint. But a track that ALSO has a `cps (range …)`
# morpher is a morpher (sept1 declares 120 then sweeps 60→180): keep the base
# bpm for sorting, flag the morph, and carry its span.
fixed = [c for c in cands if not c["morph"]] fixed = [c for c in cands if not c["morph"]]
return (fixed or cands)[0] morphs = [c for c in cands if c["morph"]]
out = dict((fixed or cands)[0])
if morphs:
out["morph"] = True
out["lo"], out["hi"] = morphs[0]["lo"], morphs[0]["hi"]
return out
# ── sample-family classifier (DRY: the canonical SSOT classifier in models.py) ─ # ── sample-family classifier (DRY: the canonical SSOT classifier in models.py) ─
...@@ -303,7 +314,8 @@ def build(): ...@@ -303,7 +314,8 @@ def build():
# recurrence # recurrence
recurrence = sorted(({"name": t["name"], "gigs": len(t["gigs"]), recurrence = sorted(({"name": t["name"], "gigs": len(t["gigs"]),
"track": t["track"]} for t in T), "track": t["track"], "gig_slugs": sorted(t.get("gigs", []))}
for t in T),
key=lambda x: -x["gigs"]) key=lambda x: -x["gigs"])
# collab dimension (live/collab/<who>/…) # collab dimension (live/collab/<who>/…)
...@@ -336,6 +348,11 @@ def build(): ...@@ -336,6 +348,11 @@ def build():
"styles": dict(sty.most_common()), "styles": dict(sty.most_common()),
"palette_top": dict(snd.most_common(25)), "palette_top": dict(snd.most_common(25)),
"families": dict(fam.most_common()), "families": dict(fam.most_common()),
# honest coverage: a sample-NAME only encodes an instrument sometimes; kits,
# drum machines and source-named chops (wap, take5…) need analysis, not a guess.
"classified_uses": sum(fam.values()),
"unclassified_uses": sum(unclassified.values()),
"unclassified_names": len(unclassified),
"unclassified_top": dict(unclassified.most_common(15)), "unclassified_top": dict(unclassified.most_common(15)),
"idioms_top": [{"norm": p["norm"], "n_tracks": p["n_tracks"]} for p in shared[:15]], "idioms_top": [{"norm": p["norm"], "n_tracks": p["n_tracks"]} for p in shared[:15]],
"idioms_counts": {"shared": len(shared), "repeated": len(repeated), "idioms_counts": {"shared": len(shared), "repeated": len(repeated),
...@@ -343,6 +360,18 @@ def build(): ...@@ -343,6 +360,18 @@ def build():
"recurrence_top": recurrence[:20], "recurrence_top": recurrence[:20],
"collab": dict(collab.most_common()), "collab": dict(collab.most_common()),
"vocabulary_growth": dict(sorted(vocabulary_growth().items())), "vocabulary_growth": dict(sorted(vocabulary_growth().items())),
# which inputs fed this build (shown in the viz footer; append as we add more)
"sources": [
{"key": "score", "label": ".tidal scores",
"detail": "tempo, sample palette, phrases"},
{"key": "site", "label": "site gig metadata",
"detail": "style tags, bpm, setlists"},
{"key": "git", "label": "git history", "detail": "track creation dates"},
{"key": "dirt", "label": "Dirt-Samples links",
"detail": "sample-folder import dates"},
],
# near-future sources (kept honest: shown as "coming", not faked)
"roadmap": ["platform play counts", "git edit counts", "per-set durations"],
} }
return report return report
......
...@@ -228,8 +228,7 @@ ...@@ -228,8 +228,7 @@
"clave", "clave",
"shaker", "shaker",
"tabla", "tabla",
"cowbell", "cowbell"
"drum"
] ]
}, },
{ {
...@@ -278,10 +277,6 @@ ...@@ -278,10 +277,6 @@
"match": [ "match": [
"break", "break",
"amen", "amen",
"loop",
"jungle",
"dnb",
"jazz",
"breaks165", "breaks165",
"fbreak" "fbreak"
] ]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment