Commit f7132e2d by PLN (Algolia)

feat(tide_eda): phase-1 EDA over the corpus (tempo, collab, pairings, time)

Reusable data-scientist pass that finds the stories before any viz:
- TRUE tempo parsed from setcps (corner A), vs metadata bpm (C), with AC delta
  (most conflicts are exact 2× half-time/double-time notation, not errors)
- studio tempo (git-creation date) vs stage tempo (gig date) — the creep 110→126
- collab fingerprint: bpm profile + lift-distinctive samples per collaborator
  (raph outed as the fast/club one ~138bpm; nova-solo ~117)
- sample pairings: lift-ranked co-occurrence, ad-hoc 'pair <prefix>' query
- cadence (all 37 canonical gigs), sample-family split, signature idioms,
  set-staples, vocabulary growth (Dirt-Samples symlink mtime, 334-pack Jul'24 burst)
Emits eda_report.json (tidy cuts for the viz phase).
parent 5acf72f7
{
"schema": "eda phase-1 (exploratory; tidy cuts for viz)",
"as_of": "2026-06-06",
"coverage": {
"tracks": 73,
"canonical_gigs": 37,
"gigs_with_tracklist": 57,
"tracks_with_tempo": 73,
"tracks_with_creation_date": 63
},
"tempo": {
"n": 73,
"min": 80.0,
"max": 170.0,
"median": 120.0,
"mean": 120.2,
"morphing_tracks": 0,
"histogram": {
"80": 8,
"90": 7,
"100": 3,
"110": 11,
"120": 23,
"130": 4,
"140": 7,
"150": 2,
"160": 7,
"170": 1
},
"ac_delta": [
{
"track": "Venons Ensemble",
"score_bpm": 85.0,
"meta_bpm": 170,
"delta": -85.0
},
{
"track": "'Plosive",
"score_bpm": 80.0,
"meta_bpm": 160,
"delta": -80.0
},
{
"track": "SlowMo",
"score_bpm": 120.0,
"meta_bpm": 60,
"delta": 60.0
},
{
"track": "Quand on Décolle",
"score_bpm": 120.0,
"meta_bpm": 60,
"delta": 60.0
},
{
"track": "Contre visite",
"score_bpm": 90.0,
"meta_bpm": 80,
"delta": 10.0
},
{
"track": "Because It's There",
"score_bpm": 110.0,
"meta_bpm": 110,
"delta": 0.0
},
{
"track": "Toxic",
"score_bpm": 140.0,
"meta_bpm": 140,
"delta": 0.0
},
{
"track": "Atari-ght",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Septembre 1er",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Blue Gold",
"score_bpm": 140.0,
"meta_bpm": 140,
"delta": 0.0
},
{
"track": "Ghosts in the T01l3ts",
"score_bpm": 160.0,
"meta_bpm": 160,
"delta": 0.0
},
{
"track": "Drifting Soul",
"score_bpm": 80.0,
"meta_bpm": 80,
"delta": 0.0
},
{
"track": "Rainy Day",
"score_bpm": 114.0,
"meta_bpm": 114,
"delta": 0.0
},
{
"track": "L'or Bleu",
"score_bpm": 124.0,
"meta_bpm": 124,
"delta": 0.0
},
{
"track": "Love First",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "So Good",
"score_bpm": 142.0,
"meta_bpm": 142,
"delta": 0.0
},
{
"track": "Acidule",
"score_bpm": 135.0,
"meta_bpm": 135,
"delta": 0.0
},
{
"track": "Aria Sans Serif",
"score_bpm": 160.0,
"meta_bpm": 160,
"delta": 0.0
},
{
"track": "Biscuit Acide",
"score_bpm": 128.0,
"meta_bpm": 128,
"delta": 0.0
},
{
"track": "Des Efforts",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Desire",
"score_bpm": 129.0,
"meta_bpm": 129,
"delta": 0.0
},
{
"track": "Esperluette",
"score_bpm": 140.0,
"meta_bpm": 140,
"delta": 0.0
},
{
"track": "Jeudi Drill",
"score_bpm": 140.0,
"meta_bpm": 140,
"delta": 0.0
},
{
"track": "Long Way",
"score_bpm": 102.0,
"meta_bpm": 102,
"delta": 0.0
},
{
"track": "Nouveau Punk",
"score_bpm": 155.0,
"meta_bpm": 155,
"delta": 0.0
},
{
"track": "Permanence",
"score_bpm": 150.0,
"meta_bpm": 150,
"delta": 0.0
},
{
"track": "Piment Bresilien",
"score_bpm": 124.0,
"meta_bpm": 124,
"delta": 0.0
},
{
"track": "PunkAChien",
"score_bpm": 170.0,
"meta_bpm": 170,
"delta": 0.0
},
{
"track": "Nass Revient de Mars!",
"score_bpm": 140.0,
"meta_bpm": 140,
"delta": 0.0
},
{
"track": "Clameur",
"score_bpm": 93.0,
"meta_bpm": 93,
"delta": 0.0
},
{
"track": "Empreinte du numerique",
"score_bpm": 110.0,
"meta_bpm": 110,
"delta": 0.0
},
{
"track": "Prestance",
"score_bpm": 134.0,
"meta_bpm": 134,
"delta": 0.0
},
{
"track": "RAISE",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "La fin de l'insouciance",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Oct4 Glitch Sauvages",
"score_bpm": 117.0,
"meta_bpm": 117,
"delta": 0.0
},
{
"track": "Bain électrique",
"score_bpm": 128.0,
"meta_bpm": 128,
"delta": 0.0
},
{
"track": "VelociTeuf",
"score_bpm": 165.0,
"meta_bpm": 165,
"delta": 0.0
},
{
"track": "Sessions Break",
"score_bpm": 80.0,
"meta_bpm": 80,
"delta": 0.0
},
{
"track": "Green Land",
"score_bpm": 80.0,
"meta_bpm": 80,
"delta": 0.0
},
{
"track": "It's About Time",
"score_bpm": 110.0,
"meta_bpm": 110,
"delta": 0.0
},
{
"track": "Lady Perplexity",
"score_bpm": 138.0,
"meta_bpm": 138,
"delta": 0.0
},
{
"track": "Lunar",
"score_bpm": 110.0,
"meta_bpm": 110,
"delta": 0.0
},
{
"track": "Paris",
"score_bpm": 80.0,
"meta_bpm": 80,
"delta": 0.0
},
{
"track": "Nuit Agitée",
"score_bpm": 160.0,
"meta_bpm": 160,
"delta": 0.0
},
{
"track": "Solar",
"score_bpm": 110.0,
"meta_bpm": 110,
"delta": 0.0
},
{
"track": "Ton Numero",
"score_bpm": 99.0,
"meta_bpm": 99,
"delta": 0.0
},
{
"track": "Alerte Verte",
"score_bpm": 160.0,
"meta_bpm": 160,
"delta": 0.0
},
{
"track": "Break the Loop",
"score_bpm": 165.0,
"meta_bpm": 165,
"delta": 0.0
},
{
"track": "Force Motrice",
"score_bpm": 125.0,
"meta_bpm": 125,
"delta": 0.0
},
{
"track": "You My Sunshine",
"score_bpm": 144.0,
"meta_bpm": 144,
"delta": 0.0
},
{
"track": "Nouveau Soleil",
"score_bpm": 110.0,
"meta_bpm": 110,
"delta": 0.0
},
{
"track": "Something about Drums",
"score_bpm": 160.0,
"meta_bpm": 160,
"delta": 0.0
},
{
"track": "WAP",
"score_bpm": 133.0,
"meta_bpm": 133,
"delta": 0.0
},
{
"track": "La Révolution Sera Samplée",
"score_bpm": 114.0,
"meta_bpm": 114,
"delta": 0.0
},
{
"track": "Lendemain Divin",
"score_bpm": 95.0,
"meta_bpm": 95,
"delta": 0.0
},
{
"track": "Premiere Grillade",
"score_bpm": 80.0,
"meta_bpm": 80,
"delta": 0.0
},
{
"track": "Reboot",
"score_bpm": 80.0,
"meta_bpm": 80,
"delta": 0.0
},
{
"track": "CBOW",
"score_bpm": 100.0,
"meta_bpm": 100,
"delta": 0.0
},
{
"track": "Fabuleux",
"score_bpm": 93.0,
"meta_bpm": 93,
"delta": 0.0
},
{
"track": "Invoque l'ete",
"score_bpm": 115.0,
"meta_bpm": 115,
"delta": 0.0
},
{
"track": "Michael",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Sunny Side Up",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "L'Or Bleu",
"score_bpm": 94.0,
"meta_bpm": 94,
"delta": 0.0
},
{
"track": "Take 5 Drops",
"score_bpm": 124.0,
"meta_bpm": 124,
"delta": 0.0
},
{
"track": "Café Bouillant",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Café Glacé",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Café Tiède",
"score_bpm": 125.0,
"meta_bpm": 125,
"delta": 0.0
},
{
"track": "Salut Nu",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Ere de Jeu",
"score_bpm": 110.0,
"meta_bpm": 110,
"delta": 0.0
},
{
"track": "L'été à Mauerpark",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
},
{
"track": "Orage",
"score_bpm": 104.0,
"meta_bpm": 104,
"delta": 0.0
},
{
"track": "Nightly Repair",
"score_bpm": 90.0,
"meta_bpm": 90,
"delta": 0.0
},
{
"track": "Burn this Book",
"score_bpm": 120.0,
"meta_bpm": 120,
"delta": 0.0
}
],
"creation_tempo_by_year": {
"2021": 110,
"2022": 110,
"2023": 112,
"2024": 122,
"2025": 122,
"2026": 126
},
"by_creation": [
{
"name": "CBOW",
"track": "live/midi/nova/lounge/cbow.tidal",
"created": "2021-04-30",
"bpm": 100.0,
"morph": false
},
{
"name": "Michael",
"track": "live/midi/nova/lounge/michael.tidal",
"created": "2021-04-30",
"bpm": 120.0,
"morph": false
},
{
"name": "Reboot",
"track": "live/midi/nova/lofi/reboot.tidal",
"created": "2021-05-16",
"bpm": 80.0,
"morph": false
},
{
"name": "Solar",
"track": "live/midi/nova/breaks/solar.tidal",
"created": "2021-07-24",
"bpm": 110.0,
"morph": false
},
{
"name": "SlowMo",
"track": "live/hip/lofi/slow_mo.tidal",
"created": "2021-08-16",
"bpm": 120.0,
"morph": false
},
{
"name": "Lunar",
"track": "live/midi/nova/breaks/lunar.tidal",
"created": "2021-08-16",
"bpm": 110.0,
"morph": false
},
{
"name": "Contre visite",
"track": "live/midi/nova/ambient/contre_visite.tidal",
"created": "2021-11-19",
"bpm": 90.0,
"morph": false
},
{
"name": "Break the Loop",
"track": "live/midi/nova/dnb/break_the_loop.tidal",
"created": "2021-11-19",
"bpm": 165.0,
"morph": false
},
{
"name": "Nightly Repair",
"track": "live/techno/nightly_repair.tidal",
"created": "2022-01-25",
"bpm": 90.0,
"morph": false
},
{
"name": "Atari-ght",
"track": "live/chip/ataright.tidal",
"created": "2022-09-23",
"bpm": 120.0,
"morph": false
},
{
"name": "Sessions Break",
"track": "live/midi/nova/breaks/break_the_rentree.tidal",
"created": "2022-09-23",
"bpm": 80.0,
"morph": false
},
{
"name": "It's About Time",
"track": "live/midi/nova/breaks/its_about_time.tidal",
"created": "2022-10-25",
"bpm": 110.0,
"morph": false
},
{
"name": "Burn this Book",
"track": "live/techno/noir/burn_this_book.tidal",
"created": "2022-10-25",
"bpm": 120.0,
"morph": false
},
{
"name": "Clameur",
"track": "live/hip/darkside/clameur.tidal",
"created": "2023-05-10",
"bpm": 93.0,
"morph": false
},
{
"name": "VelociTeuf",
"track": "live/midi/nova/breaks/break_dynasty.tidal",
"created": "2023-05-10",
"bpm": 165.0,
"morph": false
},
{
"name": "Green Land",
"track": "live/midi/nova/breaks/green_land.tidal",
"created": "2023-05-10",
"bpm": 80.0,
"morph": false
},
{
"name": "Nouveau Soleil",
"track": "live/midi/nova/dnb/nouveau_soleil.tidal",
"created": "2023-05-10",
"bpm": 110.0,
"morph": false
},
{
"name": "Premiere Grillade",
"track": "live/midi/nova/lofi/premiere_grillade.tidal",
"created": "2023-05-10",
"bpm": 80.0,
"morph": false
},
{
"name": "L'été à Mauerpark",
"track": "live/midi/nova/techno/ete_a_mauerpark.tidal",
"created": "2023-05-10",
"bpm": 120.0,
"morph": false
},
{
"name": "Lendemain Divin",
"track": "live/midi/nova/lofi/lendemain_divin.tidal",
"created": "2023-08-29",
"bpm": 95.0,
"morph": false
},
{
"name": "Rainy Day",
"track": "live/collab/josh/oct_18_rainy_day.tidal",
"created": "2023-10-25",
"bpm": 114.0,
"morph": false
},
{
"name": "Prestance",
"track": "live/midi/nova/ambient/prestance.tidal",
"created": "2023-10-25",
"bpm": 134.0,
"morph": false
},
{
"name": "Because It's There",
"track": "copycat/because_its_there.tidal",
"created": "2023-10-29",
"bpm": 110.0,
"morph": false
},
{
"name": "Permanence",
"track": "live/collab/raph/permanence.tidal",
"created": "2023-11-20",
"bpm": 150.0,
"morph": false
},
{
"name": "Nuit Agitée",
"track": "live/midi/nova/breaks/nuit_agitee.tidal",
"created": "2023-11-20",
"bpm": 160.0,
"morph": false
},
{
"name": "Jeudi Drill",
"track": "live/collab/raph/jeudrill.tidal",
"created": "2024-02-09",
"bpm": 140.0,
"morph": false
},
{
"name": "Oct4 Glitch Sauvages",
"track": "live/midi/nova/beatober/oct_glitchs_sauvages.tidal",
"created": "2024-02-09",
"bpm": 117.0,
"morph": false
},
{
"name": "Force Motrice",
"track": "live/midi/nova/dnb/force_motrice.tidal",
"created": "2024-02-09",
"bpm": 125.0,
"morph": false
},
{
"name": "Café Glacé",
"track": "live/midi/nova/nujazz/cafe_glace.tidal",
"created": "2024-02-09",
"bpm": 120.0,
"morph": false
},
{
"name": "Café Tiède",
"track": "live/midi/nova/nujazz/cafe_tiede.tidal",
"created": "2024-02-09",
"bpm": 125.0,
"morph": false
},
{
"name": "Salut Nu",
"track": "live/midi/nova/nujazz/salut_nu.tidal",
"created": "2024-02-09",
"bpm": 120.0,
"morph": false
},
{
"name": "L'Or Bleu",
"track": "live/midi/nova/lounge/suns_of_gold.tidal",
"created": "2024-06-23",
"bpm": 94.0,
"morph": false
},
{
"name": "Nouveau Punk",
"track": "live/collab/raph/nouveau_punk.tidal",
"created": "2024-07-12",
"bpm": 155.0,
"morph": false
},
{
"name": "L'or Bleu",
"track": "live/collab/mousquetaires/blue_gold.tidal",
"created": "2024-07-31",
"bpm": 124.0,
"morph": false
},
{
"name": "Fabuleux",
"track": "live/midi/nova/lounge/fabuleux.tidal",
"created": "2024-07-31",
"bpm": 93.0,
"morph": false
},
{
"name": "Acidule",
"track": "live/collab/raph/acidule.tidal",
"created": "2024-08-31",
"bpm": 135.0,
"morph": false
},
{
"name": "Bain électrique",
"track": "live/midi/nova/breaks/bain_electrique.tidal",
"created": "2024-08-31",
"bpm": 128.0,
"morph": false
},
{
"name": "Septembre 1er",
"track": "live/collab/baba/sept1.tidal",
"created": "2024-09-06",
"bpm": 120.0,
"morph": false
},
{
"name": "Drifting Soul",
"track": "live/collab/jane/drifting_soul.tidal",
"created": "2024-09-11",
"bpm": 80.0,
"morph": false
},
{
"name": "Love First",
"track": "live/collab/nass/love_first.tidal",
"created": "2024-10-19",
"bpm": 120.0,
"morph": false
},
{
"name": "Nass Revient de Mars!",
"track": "live/dnb/nass_revient.tidal",
"created": "2024-10-19",
"bpm": 140.0,
"morph": false
},
{
"name": "Blue Gold",
"track": "live/collab/ccc/ccc0.tidal",
"created": "2025-02-06",
"bpm": 140.0,
"morph": false
},
{
"name": "Ghosts in the T01l3ts",
"track": "live/collab/ccc/ghosts_in_the_toilets.tidal",
"created": "2025-02-06",
"bpm": 160.0,
"morph": false
},
{
"name": "So Good",
"track": "live/collab/nass/sogood.tidal",
"created": "2025-02-06",
"bpm": 142.0,
"morph": false
},
{
"name": "Ton Numero",
"track": "live/midi/nova/breaks/ton_numero.tidal",
"created": "2025-05-09",
"bpm": 99.0,
"morph": false
},
{
"name": "Sunny Side Up",
"track": "live/midi/nova/lounge/sunny_side_up.tidal",
"created": "2025-05-09",
"bpm": 120.0,
"morph": false
},
{
"name": "Ere de Jeu",
"track": "live/midi/nova/techno/ere_de_jeu.tidal",
"created": "2025-05-09",
"bpm": 110.0,
"morph": false
},
{
"name": "Lady Perplexity",
"track": "live/midi/nova/breaks/lady_perplexity.tidal",
"created": "2025-06-06",
"bpm": 138.0,
"morph": false
},
{
"name": "RAISE",
"track": "live/midi/nova/ambient/raise.tidal",
"created": "2025-08-22",
"bpm": 120.0,
"morph": false
},
{
"name": "Paris",
"track": "live/midi/nova/breaks/madeleine_de_paris.tidal",
"created": "2025-08-22",
"bpm": 80.0,
"morph": false
},
{
"name": "WAP",
"track": "live/midi/nova/dnb/wap.tidal",
"created": "2025-08-22",
"bpm": 133.0,
"morph": false
},
{
"name": "Orage",
"track": "live/midi/nova/techno/techno_orage.tidal",
"created": "2025-08-22",
"bpm": 104.0,
"morph": false
},
{
"name": "Aria Sans Serif",
"track": "live/collab/raph/aria_sans_serif.tidal",
"created": "2025-11-03",
"bpm": 160.0,
"morph": false
},
{
"name": "Biscuit Acide",
"track": "live/collab/raph/biscuit_acide.tidal",
"created": "2025-11-03",
"bpm": 128.0,
"morph": false
},
{
"name": "Des Efforts",
"track": "live/collab/raph/des_efforts.tidal",
"created": "2025-11-03",
"bpm": 120.0,
"morph": false
},
{
"name": "Esperluette",
"track": "live/collab/raph/esperluette.tidal",
"created": "2025-11-03",
"bpm": 140.0,
"morph": false
},
{
"name": "Long Way",
"track": "live/collab/raph/long_way.tidal",
"created": "2025-11-03",
"bpm": 102.0,
"morph": false
},
{
"name": "Piment Bresilien",
"track": "live/collab/raph/piment_bresilien.tidal",
"created": "2025-11-03",
"bpm": 124.0,
"morph": false
},
{
"name": "'Plosive",
"track": "live/midi/nova/dnb/plosive.tidal",
"created": "2025-11-03",
"bpm": 80.0,
"morph": false
},
{
"name": "You My Sunshine",
"track": "live/midi/nova/dnb/liquid/you_my_sunshine.tidal",
"created": "2026-03-18",
"bpm": 144.0,
"morph": false
},
{
"name": "Take 5 Drops",
"track": "live/midi/nova/lounge/take_5_drops.tidal",
"created": "2026-03-18",
"bpm": 124.0,
"morph": false
},
{
"name": "Desire",
"track": "live/collab/raph/desire.tidal",
"created": "2026-04-19",
"bpm": 129.0,
"morph": false
},
{
"name": "La Révolution Sera Samplée",
"track": "live/midi/nova/jazz/the_revolution_will_be_sampled.tidal",
"created": "2026-06-02",
"bpm": 114.0,
"morph": false
}
]
},
"collab_fingerprint": {
"ccc": {
"n": 2,
"bpm_median": 150,
"bpm_min": 140.0,
"bpm_max": 160.0,
"distinctive_samples": [
{
"sound": "moog",
"lift": 7.3,
"n": 2
},
{
"sound": "cp",
"lift": 3.8,
"n": 2
}
]
},
"nass": {
"n": 2,
"bpm_median": 131,
"bpm_min": 120.0,
"bpm_max": 142.0,
"distinctive_samples": [
{
"sound": "moogBass",
"lift": 4.6,
"n": 2
},
{
"sound": "risers",
"lift": 4.6,
"n": 2
},
{
"sound": "jungle_breaks",
"lift": 1.7,
"n": 2
}
]
},
"raph": {
"n": 12,
"bpm_median": 138,
"bpm_min": 102.0,
"bpm_max": 170.0,
"distinctive_samples": [
{
"sound": "clubkick",
"lift": 6.1,
"n": 2
},
{
"sound": "praise",
"lift": 6.1,
"n": 2
},
{
"sound": "kick",
"lift": 4.3,
"n": 5
},
{
"sound": "vec1_claps",
"lift": 4.1,
"n": 2
},
{
"sound": "moog",
"lift": 3.0,
"n": 5
},
{
"sound": "meth_bass",
"lift": 3.0,
"n": 3
}
]
},
"nova(solo)": {
"n": 45,
"bpm_median": 117,
"bpm_min": 80.0,
"bpm_max": 165.0,
"distinctive_samples": [
{
"sound": "electro1",
"lift": 1.6,
"n": 2
},
{
"sound": "jungle_pads",
"lift": 1.6,
"n": 2
},
{
"sound": "airports",
"lift": 1.6,
"n": 2
},
{
"sound": "weird_dialogs",
"lift": 1.6,
"n": 2
},
{
"sound": "jungbass",
"lift": 1.6,
"n": 2
},
{
"sound": "shiloh",
"lift": 1.6,
"n": 2
}
]
}
},
"pairings_top": [
{
"a": "jazz",
"b": "jungle_breaks",
"n": 21
},
{
"a": "bassWarsaw",
"b": "jungle_breaks",
"n": 18
},
{
"a": "h2ogmhh",
"b": "jungle_breaks",
"n": 16
},
{
"a": "jungle_breaks",
"b": "risers",
"n": 15
},
{
"a": "bassWarsaw",
"b": "jazz",
"n": 14
},
{
"a": "jungle_breaks",
"b": "snare",
"n": 13
},
{
"a": "FMRhodes1",
"b": "jungle_breaks",
"n": 11
},
{
"a": "jazz",
"b": "risers",
"n": 11
},
{
"a": "cp",
"b": "jazz",
"n": 11
},
{
"a": "jungle_breaks",
"b": "moogBass",
"n": 11
},
{
"a": "FMRhodes1",
"b": "bassWarsaw",
"n": 10
},
{
"a": "bassWarsaw",
"b": "snare",
"n": 10
},
{
"a": "jazz",
"b": "snare",
"n": 10
},
{
"a": "cp",
"b": "jungle_breaks",
"n": 9
},
{
"a": "bassWarsaw",
"b": "risers",
"n": 8
},
{
"a": "h2ogmhh",
"b": "jazz",
"n": 8
},
{
"a": "jazz",
"b": "moogBass",
"n": 8
},
{
"a": "bassWarsaw",
"b": "moogBass",
"n": 8
},
{
"a": "FMRhodes1",
"b": "jazz",
"n": 7
},
{
"a": "giorgio_syn",
"b": "jungle_breaks",
"n": 7
}
],
"cadence": {
"2022": 2,
"2023": 4,
"2024": 14,
"2025": 14,
"2026": 3
},
"styles": {
"dnb": 43,
"nujazz": 34,
"techno": 31,
"breaks": 31,
"lounge": 24,
"ambient": 11,
"breakbeat": 10,
"lofi": 7,
"nu-jazz": 6,
"drill": 5,
"acid": 4,
"punk": 3,
"collab": 2,
"hip-hop": 2,
"punk-dnb": 2,
"acid-techno": 2,
"hybrid": 2,
"chiptune": 1,
"funk": 1,
"downtempo": 1,
"chip": 1,
"dance": 1
},
"palette_top": {
"jungle_breaks": 44,
"jazz": 37,
"bassWarsaw": 30,
"FMRhodes1": 20,
"h2ogmhh": 20,
"cp": 19,
"snare": 19,
"risers": 16,
"moogBass": 16,
"cpluck": 10,
"moog": 10,
"break": 10,
"90s_synatm": 9,
"hh": 9,
"909": 9,
"FMRhodes2": 8,
"giorgio_syn": 7,
"vec1_snare": 7,
"kick": 7,
"808bd": 6,
"nujazz_keys120": 6,
"meth_bass": 6,
"cbow": 6,
"fbass": 6,
"sn": 5
},
"families": {
"break": 106,
"synth": 57,
"snare": 54,
"bass": 51,
"kick": 23,
"fx": 19,
"keys": 18,
"hat": 13,
"vox": 5,
"pad": 1,
"lead": 1
},
"unclassified_top": {
"h2ogmhh": 20,
"90s_synatm": 9,
"giorgio_syn": 7,
"vec1_snare": 7,
"cbow": 6,
"fbass": 6,
"drum": 5,
"vec1_acid": 5,
"fguitar": 5,
"h2ogmsn": 4,
"vec2_synth_acid": 3,
"armora": 3,
"rhadamanthe_vocal": 3,
"h2ogmcp": 3,
"vec1_claps": 3
},
"idioms_top": [
{
"norm": "f*16",
"n_tracks": 62
},
{
"norm": "t . <f t f <f t>> <t f f <t f>>",
"n_tracks": 49
},
{
"norm": "t(4,8,1)",
"n_tracks": 19
},
{
"norm": "[jazz,kick]",
"n_tracks": 13
},
{
"norm": "k . ~ k ~ ~",
"n_tracks": 12
},
{
"norm": "[0,12]",
"n_tracks": 11
},
{
"norm": "<0 1 2 3>",
"n_tracks": 10
},
{
"norm": "f(4,8)",
"n_tracks": 10
},
{
"norm": "<f!24 t!8>",
"n_tracks": 9
},
{
"norm": "[snare,snare]",
"n_tracks": 9
},
{
"norm": "[techno,808bd,909,kick]",
"n_tracks": 7
},
{
"norm": "1 2",
"n_tracks": 6
},
{
"norm": "[0 .. 7] . <[0 .. 7]!3 [0 1 . [2 3]]>",
"n_tracks": 6
},
{
"norm": "[0 .. 7] . <[0 .. 7]!7 [0 1 . [2 3]]>",
"n_tracks": 6
},
{
"norm": "k k k k*2",
"n_tracks": 6
}
],
"idioms_counts": {
"shared": 193,
"repeated": 33,
"total": 1325
},
"recurrence_top": [
{
"name": "Sunny Side Up",
"gigs": 11,
"track": "live/midi/nova/lounge/sunny_side_up.tidal"
},
{
"name": "Café Tiède",
"gigs": 10,
"track": "live/midi/nova/nujazz/cafe_tiede.tidal"
},
{
"name": "Contre visite",
"gigs": 9,
"track": "live/midi/nova/ambient/contre_visite.tidal"
},
{
"name": "Force Motrice",
"gigs": 9,
"track": "live/midi/nova/dnb/force_motrice.tidal"
},
{
"name": "Nuit Agitée",
"gigs": 8,
"track": "live/midi/nova/breaks/nuit_agitee.tidal"
},
{
"name": "Salut Nu",
"gigs": 8,
"track": "live/midi/nova/nujazz/salut_nu.tidal"
},
{
"name": "Permanence",
"gigs": 7,
"track": "live/collab/raph/permanence.tidal"
},
{
"name": "Invoque l'ete",
"gigs": 7,
"track": "live/midi/nova/lounge/invoque_ete.tidal"
},
{
"name": "Café Glacé",
"gigs": 7,
"track": "live/midi/nova/nujazz/cafe_glace.tidal"
},
{
"name": "Septembre 1er",
"gigs": 6,
"track": "live/collab/baba/sept1.tidal"
},
{
"name": "L'or Bleu",
"gigs": 6,
"track": "live/collab/mousquetaires/blue_gold.tidal"
},
{
"name": "Alerte Verte",
"gigs": 6,
"track": "live/midi/nova/dnb/alerte_verte.tidal"
},
{
"name": "Café Bouillant",
"gigs": 6,
"track": "live/midi/nova/nujazz/cafe_bouillant.tidal"
},
{
"name": "Acidule",
"gigs": 5,
"track": "live/collab/raph/acidule.tidal"
},
{
"name": "Jeudi Drill",
"gigs": 5,
"track": "live/collab/raph/jeudrill.tidal"
},
{
"name": "Something about Drums",
"gigs": 5,
"track": "live/midi/nova/dnb/something_about_drums.tidal"
},
{
"name": "Venons Ensemble",
"gigs": 5,
"track": "live/midi/nova/dnb/venons_ensemble.tidal"
},
{
"name": "PunkAChien",
"gigs": 4,
"track": "live/collab/raph/punkachien.tidal"
},
{
"name": "La fin de l'insouciance",
"gigs": 4,
"track": "live/midi/nova/beatober/oct_16_haunted_house_insouciance.tidal"
},
{
"name": "Bain électrique",
"gigs": 4,
"track": "live/midi/nova/breaks/bain_electrique.tidal"
}
],
"collab": {
"raph": 12,
"ccc": 2,
"nass": 2,
"baba": 1,
"jane": 1,
"josh": 1,
"mousquetaires": 1
},
"vocabulary_growth": {
"2024-07": 334,
"2024-08": 88,
"2024-09": 3,
"2024-12": 17,
"2025-01": 19,
"2025-03": 1,
"2025-04": 1,
"2025-06": 2,
"2025-07": 4,
"2025-08": 3,
"2025-09": 2,
"2025-10": 11,
"2025-11": 12,
"2025-12": 3,
"2026-01": 1,
"2026-04": 3,
"2026-05": 1
}
}
\ No newline at end of file
#!/usr/bin/env python3
"""Phase-1 data-scientist EDA over the L'Armada corpus — find the stories before any viz.
Loads the generated artifacts (catalog_view, pattern_registry, tokens ontology) plus the
canonical site gigs and the Dirt-Samples symlink dates, and computes tidy cuts:
tempo TRUE bpm parsed from each track's `setcps` (corner A / the score), the
metadata-bpm claim (corner C), and the A↔C delta — never trust the label.
cadence all 37 canonical gigs by year (not just the 23 with tracklists).
styles style mix + label-fragmentation (nujazz/nu-jazz, breaks/breakbeat…).
palette most-used sounds; sample-family split via the fleet classifier (DRY).
idioms shared/repeated mini-notation phrases; the gMask/gMute signature ubiquity.
recurrence most-performed tracks (gig appearances) = the set-staples.
agreement the A↔C reconciliation taxonomy.
collab per-collaborator track counts.
vocabulary custom-sample packs entering the vocabulary over time (symlink mtime).
Emits `eda_report.json` (tidy, typed-ish) AND prints a narrative. Honest about coverage
(BPM/style cuts only span gigs with a tracklist). parsers-over-copy; no invented values.
"""
import json
import re
import sys
import glob
import subprocess
import statistics as st
import unicodedata
from collections import Counter, defaultdict
from pathlib import Path
HERE = Path(__file__).parent
ROOT = HERE.parent.parent # …/Sound/Tidal
sys.path.insert(0, str(ROOT / "tools"))
import models as M # noqa: E402
LIVES = Path("/home/pln/Work/Web/www/next/content/lives")
DIRT = Path.home() / ".local/share/SuperCollider/downloaded-quarks/Dirt-Samples"
CV = HERE / "catalog_view.json"
PR = HERE / "pattern_registry.json"
OUT = HERE / "eda_report.json"
AS_OF = "2026-06-06"
# ── tempo: parse TRUE bpm from setcps (corner A) ──────────────────────────────
def _evalnum(s):
"""Eval a tiny arithmetic blob like '99 - 20' or '99'. Safe: digits/+-/space only."""
s = s.strip().strip("()").strip()
if re.fullmatch(r"[\d\s.+\-]+", s):
try:
return float(eval(s, {"__builtins__": {}})) # noqa: S307 — constrained charset
except Exception:
return None
return None
def parse_tempo(src):
"""Return {bpm, lo, hi, morph} from a .tidal source, or None. bpm = primary tempo."""
if not src:
return None
# all setcps / cps / cpsbus / "# cps" statements
cands = []
for m in re.finditer(r"(?:setcps|cpsbus\s*\d+|#\s*cps|\bcps)\s*\(?(.+)", src):
expr = m.group(1)
rng = re.search(r"range\s*\(?\s*([\d.\s+\-]+?)\)?\s+\(?\s*([\d.\s+\-]+?)\)?\s*[\"/)]", expr)
ndiv = re.search(r"(\d+(?:\.\d+)?)\s*/\s*60\s*/\s*4", expr)
bare = re.match(r"\s*(0?\.\d+|\d+\.\d+)\s*$", expr)
if rng:
lo, hi = _evalnum(rng.group(1)), _evalnum(rng.group(2))
if lo and hi:
cands.append({"bpm": (lo + hi) / 2, "lo": lo, "hi": hi, "morph": True})
continue
if ndiv:
cands.append({"bpm": float(ndiv.group(1)), "lo": None, "hi": None, "morph": False})
elif bare:
cands.append({"bpm": float(bare.group(1)) * 240, "lo": None, "hi": None, "morph": False})
if not cands:
return None
# prefer a fixed setcps; fall back to first morphing range
fixed = [c for c in cands if not c["morph"]]
return (fixed or cands)[0]
# ── sample-family classifier (DRY: same cues the fleet color language uses) ────
def sample_family(name):
s = name.lower()
for f in M.SAMPLE_FAMILIES:
if any(s == m or s.startswith(m) for m in f.match):
return f.key
return None
def gig_year_index():
"""All canonical gigs (every .md), date+year — the honest denominator."""
gigs = {}
for f in glob.glob(str(LIVES / "20*/*.md")):
txt = Path(f).read_text(errors="ignore")
m = re.search(r"^date:\s*\"?(\d{4}-\d{2}-\d{2})", txt, re.M)
if m:
slug = str(Path(f).relative_to(LIVES))[:-3]
gigs[slug] = m.group(1)
return gigs
def metadata_bpm():
"""Per-gig metadata bpm/style from tracks.json (corner C)."""
rows = []
for f in glob.glob(str(LIVES / "**/tracks.json"), recursive=True):
d = json.load(open(f))
slug = str(Path(f).parent.relative_to(LIVES))
for t in (d.get("tracks") or []):
rows.append({"gig": slug, "date": (d.get("date") or "")[:10],
"file": t.get("file"), "name": t.get("name"),
"bpm": t.get("bpm"), "style": t.get("style")})
return rows
def vocabulary_growth():
"""Custom-sample packs (symlinks) entering the vocab, by month (link mtime)."""
import os
months = Counter()
if not DIRT.exists():
return months
for p in DIRT.iterdir():
if p.is_symlink():
ts = p.lstat().st_mtime
import time
months[time.strftime("%Y-%m", time.localtime(ts))] += 1
return months
def git_creation_dates():
"""Earliest git add-date per .tidal (true 'studio creation' date, not mtime which
every reformat touches). git log is reverse-chron, so the LAST occurrence wins."""
out = subprocess.run(
["git", "-C", str(ROOT), "log", "--diff-filter=A", "--format=%as",
"--name-only", "--", "*.tidal"],
capture_output=True, text=True).stdout
date, first = None, {}
for line in out.splitlines():
if re.fullmatch(r"\d{4}-\d{2}-\d{2}", line.strip()):
date = line.strip()
elif line.strip().endswith(".tidal") and date:
first[line.strip()] = date # overwrite → ends at earliest
return first
def collab_fingerprint(T, tempo):
"""Per-collaborator tell: bpm profile + distinctive samples (lift vs corpus) + idioms.
'nova' = ParVagues solo (live/midi/nova); collab/<who> = co-writes."""
def who(track):
m = re.match(r"live/collab/([^/]+)/", track)
if m:
return m.group(1)
if track.startswith("live/midi/nova/"):
return "nova(solo)"
return None
groups = defaultdict(list)
for t in T:
w = who(t["track"])
if w:
groups[w].append(t)
global_snd = Counter()
for t in T:
for s in set(t["score_sounds"]):
global_snd[s] += 1
n_all = len(T)
out = {}
for w, tracks in groups.items():
if len(tracks) < 2:
continue
bpms = [tempo[t["track"]]["bpm"] for t in tracks if t["track"] in tempo]
gsnd = Counter()
for t in tracks:
for s in set(t["score_sounds"]):
gsnd[s] += 1
n = len(tracks)
# lift = (freq in group) / (freq in corpus); keep samples used ≥2× in group
distinctive = sorted(
((s, gsnd[s] / n / max(global_snd[s] / n_all, 1e-9), gsnd[s])
for s in gsnd if gsnd[s] >= 2),
key=lambda x: -x[1])[:6]
out[w] = {
"n": n,
"bpm_median": round(st.median(bpms)) if bpms else None,
"bpm_min": min(bpms) if bpms else None,
"bpm_max": max(bpms) if bpms else None,
"distinctive_samples": [{"sound": s, "lift": round(l, 1), "n": c}
for s, l, c in distinctive],
}
return out
def cooccurrence(T):
"""Sound co-occurrence within tracks → pair counts + per-sound partner index.
Returns (pair_counts:Counter[(a,b)], partners:dict[sound]->Counter, doc_freq)."""
from itertools import combinations
pair = Counter()
partners = defaultdict(Counter)
df = Counter()
for t in T:
sounds = sorted(set(t["score_sounds"]))
for s in sounds:
df[s] += 1
for a, b in combinations(sounds, 2):
pair[(a, b)] += 1
partners[a][b] += 1
partners[b][a] += 1
return pair, partners, df
def pairings_for(prefix, T, top=12):
"""What pairs well with `prefix`* — lift-weighted partners (the 🍷 question)."""
pair, partners, df = cooccurrence(T)
n = len(T)
hits = [s for s in df if s.lower().startswith(prefix.lower())]
if not hits:
return None, []
agg = Counter()
base = 0
for s in hits:
base += df[s]
for p, c in partners[s].items():
agg[p] += c
# lift = P(partner|hit) / P(partner) ; require co-occur ≥2
ranked = sorted(
((p, c, (c / base) / max(df[p] / n, 1e-9)) for p, c in agg.items() if c >= 1
and not p.lower().startswith(prefix.lower())),
key=lambda x: (-x[2], -x[1]))[:top]
return hits, ranked
def build():
cv = json.load(open(CV))
pr = json.load(open(PR))
T = cv["tracks"]
gigs = gig_year_index()
meta = metadata_bpm()
meta_by_file = {m["file"]: m for m in meta if m.get("file")}
# tempo per track (corner A) + A↔C delta
tempo = {}
ac_delta = []
for t in T:
tp = parse_tempo(t.get("source"))
if tp:
tempo[t["track"]] = tp
mc = meta_by_file.get(t["track"])
if mc and isinstance(mc.get("bpm"), (int, float)) and not tp["morph"]:
ac_delta.append({"track": t["name"], "score_bpm": tp["bpm"],
"meta_bpm": mc["bpm"], "delta": tp["bpm"] - mc["bpm"]})
bpms = [tp["bpm"] for tp in tempo.values()]
morph = [k for k, v in tempo.items() if v["morph"]]
# creation-BPM story: tempo vs git-creation date (studio tempo, not stage tempo)
created = git_creation_dates()
by_creation = []
for t in T:
cdate = created.get(t["track"])
if cdate and t["track"] in tempo:
by_creation.append({"name": t["name"], "track": t["track"],
"created": cdate, "bpm": tempo[t["track"]]["bpm"],
"morph": tempo[t["track"]]["morph"]})
by_creation.sort(key=lambda x: x["created"])
creation_year_bpm = defaultdict(list)
for x in by_creation:
creation_year_bpm[x["created"][:4]].append(x["bpm"])
creation_tempo = {y: round(st.median(v)) for y, v in sorted(creation_year_bpm.items())}
# palette + families
snd = Counter()
for t in T:
for s in t["score_sounds"]:
snd[s] += 1
fam, unclassified = Counter(), Counter()
for t in T:
for s in t["score_sounds"]:
k = sample_family(s)
(fam if k else unclassified)[k or s] += 1
# styles (corner C)
sty = Counter(m["style"] for m in meta if m.get("style"))
# idioms
shared = sorted((p for p in pr["patterns"] if p["scope"] == "shared"),
key=lambda p: -p["n_tracks"])
repeated = [p for p in pr["patterns"] if p["scope"] == "repeated"]
# recurrence
recurrence = sorted(({"name": t["name"], "gigs": len(t["gigs"]),
"track": t["track"]} for t in T),
key=lambda x: -x["gigs"])
# collab dimension (live/collab/<who>/…)
collab = Counter()
for t in T:
m = re.match(r"live/collab/([^/]+)/", t["track"])
if m:
collab[m.group(1)] += 1
report = {
"schema": "eda phase-1 (exploratory; tidy cuts for viz)", "as_of": AS_OF,
"coverage": {"tracks": len(T), "canonical_gigs": len(gigs),
"gigs_with_tracklist": cv["stats"]["with_metadata"],
"tracks_with_tempo": len(tempo),
"tracks_with_creation_date": len(by_creation)},
"tempo": {
"n": len(bpms), "min": min(bpms), "max": max(bpms),
"median": st.median(bpms), "mean": round(st.mean(bpms), 1),
"morphing_tracks": len(morph),
"histogram": dict(sorted(Counter(int(b // 10) * 10 for b in bpms).items())),
"ac_delta": sorted(ac_delta, key=lambda x: -abs(x["delta"])),
"creation_tempo_by_year": creation_tempo,
"by_creation": by_creation,
},
"collab_fingerprint": collab_fingerprint(T, tempo),
"pairings_top": [{"a": a, "b": b, "n": c}
for (a, b), c in cooccurrence(T)[0].most_common(20)],
"cadence": dict(sorted(Counter(d[:4] for d in gigs.values()).items())),
"styles": dict(sty.most_common()),
"palette_top": dict(snd.most_common(25)),
"families": dict(fam.most_common()),
"unclassified_top": dict(unclassified.most_common(15)),
"idioms_top": [{"norm": p["norm"], "n_tracks": p["n_tracks"]} for p in shared[:15]],
"idioms_counts": {"shared": len(shared), "repeated": len(repeated),
"total": pr["n_patterns"]},
"recurrence_top": recurrence[:20],
"collab": dict(collab.most_common()),
"vocabulary_growth": dict(sorted(vocabulary_growth().items())),
}
return report
def _bar(n, mx, w=32):
return "█" * round(w * n / mx) if mx else ""
def main():
r = build()
OUT.write_text(json.dumps(r, ensure_ascii=False, indent=1))
c = r["coverage"]
print(f"✓ {OUT}")
print(f"\ncoverage: {c['tracks']} tracks · {c['canonical_gigs']} canonical gigs · "
f"{c['tracks_with_tempo']} with parsed tempo\n")
t = r["tempo"]
print(f"── TEMPO (true, from setcps) ── {t['min']:.0f}–{t['max']:.0f} bpm, "
f"median {t['median']:.0f}, {t['morphing_tracks']} morphing")
mx = max(t["histogram"].values())
for b, n in t["histogram"].items():
print(f" {b:>3}s {_bar(n, mx, 24)} {n}")
print(" biggest score↔metadata bpm gaps:")
for d in t["ac_delta"][:6]:
print(f" {d['track']:<26} score {d['score_bpm']:.0f} vs meta {d['meta_bpm']:.0f} ({d['delta']:+.0f})")
print("\n── GIG CADENCE (all 37 canonical) ──")
mx = max(r["cadence"].values())
for y, n in r["cadence"].items():
print(f" {y} {_bar(n, mx, 24)} {n}")
print("\n── SAMPLE FAMILIES ──")
tot = sum(r["families"].values())
for k, n in r["families"].items():
print(f" {k:<7} {_bar(n, tot, 24)} {n:>3} ({100*n/tot:.0f}%)")
print(f" unclassified: {sum(r['unclassified_top'].values())}+ uses "
f"(top: {', '.join(list(r['unclassified_top'])[:6])})")
print("\n── SIGNATURE IDIOMS (shared phrases) ──")
for p in r["idioms_top"][:8]:
print(f" {p['n_tracks']:>2} tracks «{p['norm'][:48]}»")
print("\n── SET-STAPLES (most performed) ──")
for x in r["recurrence_top"][:10]:
print(f" {x['gigs']:>2} gigs {x['name']}")
print("\n── CREATION TEMPO (studio, by git-add year) vs stage tempo ──")
for y, b in r["tempo"]["creation_tempo_by_year"].items():
print(f" {y} median {b} bpm")
print("\n── COLLAB FINGERPRINT (who betrays themselves) ──")
fp = r["collab_fingerprint"]
for w in sorted(fp, key=lambda k: -(fp[k]["bpm_median"] or 0)):
f = fp[w]
tells = ", ".join(f"{d['sound']}(×{d['n']})" for d in f["distinctive_samples"][:4])
rng = f"{f['bpm_min']:.0f}–{f['bpm_max']:.0f}" if f["bpm_min"] else "—"
print(f" {w:<14} n={f['n']:<2} bpm~{f['bpm_median']} [{rng}] tells: {tells}")
print("\n── COLLAB ──", " ".join(f"{k}:{v}" for k, v in r["collab"].items()))
print("\n── VOCABULARY GROWTH (custom packs/month) ──")
vg = r["vocabulary_growth"]
mx = max(vg.values())
for mth, n in vg.items():
print(f" {mth} {_bar(n, mx, 28)} {n}")
def cmd_pair(prefix):
"""Ad-hoc: `tide_eda.py pair bogdan` → what `bogdan*` pairs well with 🍷."""
T = json.load(open(CV))["tracks"]
hits, ranked = pairings_for(prefix, T)
if not hits:
print(f"no sound matches «{prefix}*»")
return
print(f"«{prefix}*» = {', '.join(hits)} → pairs best with (lift-ranked):")
for p, c, lift in ranked:
fam = sample_family(p) or "?"
print(f" {c:>2}× lift {lift:>4.1f} {p:<18} [{fam}]")
if __name__ == "__main__":
if len(sys.argv) > 2 and sys.argv[1] == "pair":
cmd_pair(sys.argv[2])
else:
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment