Commit 0f0331ac by PLN (Algolia)

feat(sample_features): librosa overfetch extractor + TDD on real samples

~35-feature vector (Peeters/CUIDADO timbre + MIR), convol-inspired L0→L1 tier tags.
Validated kickbass discriminators on real samples (temporal_centroid + decay_slope):
bd 0.064s/-165dB/s vs fbass 2.30s/-11dB/s. 5 invariant tests green.
parent 9731a386
"""TDD on REAL samples — the feature extractor must encode physical/domain truth.
These assert RELATIVE invariants that are guaranteed by the physics of the sounds
(a kick is a low transient; a hat is bright & noisy; a held bass sustains), so they
stay robust as the extractor evolves. They are the ground-truth contract for the
kick↔bass discriminators that close the #69 residual. Uses the local Dirt-Samples
(skipped cleanly if a folder isn't present, so CI without samples still passes)."""
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
import sample_features as SF # noqa: E402
SF.MAX_FILES = 4 # 4 files/folder is enough for a stable mean, keeps tests fast
_CACHE = {}
def feat(folder):
"""Folder-mean feature dict, cached. Skips the test if the folder is absent."""
if folder not in _CACHE:
if SF.folder_files(folder) is None:
pytest.skip(f"Dirt-Samples/{folder} not present")
_CACHE[folder] = SF.folder_features(folder)["mean"]
return _CACHE[folder]
def test_kick_is_transient_bass_is_sustained():
"""The #69 residual: a kick's energy is EARLY and decays FAST; a held bass's
energy arrives late and decays slowly. temporal_centroid + decay_slope split them."""
bd, fbass = feat("bd"), feat("fbass")
assert bd["temporal_centroid"] < 0.5 < fbass["temporal_centroid"]
assert bd["decay_slope_db_s"] < -50 < fbass["decay_slope_db_s"]
def test_kick_is_lowest_register():
"""A kick sits below melodic/pad material in both spectral centroid and f0."""
bd, pad = feat("bd"), feat("pad")
assert bd["spectral_centroid"] < pad["spectral_centroid"]
assert bd["f0_median"] < pad["f0_median"]
def test_kick_more_percussive_than_pad():
"""HPSS percussive fraction is high for a drum, low for a sustained pad."""
assert feat("bd")["pct_percussive"] > feat("pad")["pct_percussive"]
def test_hat_brighter_and_noisier_than_kick():
"""A hi-hat is high-centroid and spectrally flatter (noisier) than a sub kick."""
bd, hh = feat("bd"), feat("hh")
assert hh["spectral_centroid"] > bd["spectral_centroid"]
assert hh["spectral_flatness"] >= bd["spectral_flatness"]
def test_break_loop_fires_multiple_onsets():
"""rhythm scope: a breakbeat loop has many onsets/sec; a single kick has ~one."""
for brk in ("jungle_breaks", "fbreak120", "breaks165", "amen"):
if SF.folder_files(brk):
y = SF._load(SF.folder_files(brk)[0])
assert SF.features(y, rhythm=True).get("onset_rate", 0) > 1.0
return
pytest.skip("no break folder present")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment