Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Imports and configuration

# --- Cell 1: Imports and configuration ---
from pathlib import Path
import json
import math
import warnings

import numpy as np
import pandas as pd
import librosa

print("librosa version:", librosa.__version__)

# === USER SETTINGS ===
# Set this to the folder that contains your audio files
#AUDIO_FOLDER = Path("/path/to/your/audio/folder")   # <-- CHANGE THIS
AUDIO_FOLDER = Path("/Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips")

# Output metadata files (JSON + CSV)
OUTPUT_JSON = AUDIO_FOLDER / "track_metadata.json"
OUTPUT_CSV  = AUDIO_FOLDER / "track_metadata.csv"

# Which analyses to run
DO_TEMPO_METER = True
DO_CHORDS      = True
DO_MELODY      = True   # CREPE-based melody extraction; heavier

# CREPE-related settings (only used if DO_MELODY is True)
CREPE_RESAMPLE_HZ = 16000   # resample to 16 kHz for CREPE
CREPE_STEP_MS     = 20      # frame step in ms
CREPE_MODEL       = "small" # "tiny" | "small" | "medium" | "large" | "full"
CREPE_CONF_MIN    = 0.6     # min confidence to keep a frame

# Beat / meter settings
HOP_LENGTH = 256
CANDIDATE_BEATS_PER_BAR = [2, 3, 4, 5, 6, 7]

print("Audio folder:", AUDIO_FOLDER)
librosa version: 0.11.0
Audio folder: /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips

File listing and audio loading

# --- Cell 2: Helper functions: file listing and audio loading ---

def list_audio_files(folder: Path):
    exts = [".wav", ".mp3", ".flac", ".ogg", ".m4a"]
    files = []
    for ext in exts:
        files.extend(folder.glob(f"*{ext}"))
        files.extend(folder.glob(f"**/*{ext}"))  # include subfolders
    files = sorted(set(files))
    return files

def load_audio_mono(path: Path):
    """Load an audio file as mono at its native sample rate."""
    y, sr = librosa.load(path, sr=None, mono=True)
    return y, sr

if AUDIO_FOLDER.exists():
    test_files = list_audio_files(AUDIO_FOLDER)
    print(f"Found {len(test_files)} audio files in {AUDIO_FOLDER}")
else:
    print("⚠️ AUDIO_FOLDER does not exist yet. Please set a valid path.")
Found 13 audio files in /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips

Tempo & meter analysis

# --- Cell 3: Tempo & meter analysis ---

def meter_score(acc_seq: np.ndarray, m: int) -> float:
    """Return normalized autocorrelation score at lag m (0..1)."""
    if len(acc_seq) <= m:
        return 0.0
    x = acc_seq - np.mean(acc_seq)
    num = np.sum(x[:-m] * x[m:])
    den = math.sqrt(float(np.sum(x[:-m]**2) * np.sum(x[m:]**2))) + 1e-12
    return float(max(0.0, num / den))

def label_from_m(m: int) -> str:
    if m == 6:
        return "≈6/8 (compound)"
    else:
        return f"≈{m}/4"

def analyze_tempo_and_meter(y: np.ndarray, sr: int) -> dict:
    """Return a dictionary with tempo, beat count, and a simple global meter estimate."""
    out = {
        "global_tempo_bpm": None,
        "num_beats": None,
        "meter_label": None,
        "meter_confidence": None,
    }

    tempo_bpm, beat_frames = librosa.beat.beat_track(y=y, sr=sr, hop_length=HOP_LENGTH)
    tempo_bpm_scalar = float(np.atleast_1d(tempo_bpm)[0])
    beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=HOP_LENGTH)

    out["global_tempo_bpm"] = tempo_bpm_scalar
    out["num_beats"] = int(len(beat_times))

    if len(beat_frames) < 4:
        return out

    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=HOP_LENGTH)
    beat_frames = beat_frames[(beat_frames >= 0) & (beat_frames < len(onset_env))]
    if len(beat_frames) < 4:
        return out

    beat_accents = onset_env[beat_frames].astype(float)
    if np.max(beat_accents) > 0:
        beat_accents = beat_accents / (np.max(beat_accents) + 1e-12)

    scores = {m: meter_score(beat_accents, m) for m in CANDIDATE_BEATS_PER_BAR}
    best_m = max(scores, key=scores.get)
    conf = float(scores[best_m])

    out["meter_label"] = label_from_m(best_m)
    out["meter_confidence"] = conf

    return out

Chord analysis (triads summary)

# --- Cell 4: Chord analysis (triads; summary only) ---

def analyze_chords(y: np.ndarray, sr: int) -> dict:
    """Estimate chords over time and summarize top chords by total duration."""
    out = {
        "top_chords": [],
        "num_chord_segments": 0,
    }

    tempo_bpm, beat_frames = librosa.beat.beat_track(y=y, sr=sr, hop_length=HOP_LENGTH)
    beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=HOP_LENGTH)
    if len(beat_times) < 2:
        return out

    chroma = librosa.feature.chroma_cqt(
        y=y, sr=sr, hop_length=HOP_LENGTH,
        n_chroma=12, bins_per_octave=36
    )

    bf = librosa.time_to_frames(beat_times, sr=sr, hop_length=HOP_LENGTH)
    bf = bf[(bf >= 0) & (bf <= chroma.shape[1])]
    bf = np.unique(bf)
    if len(bf) < 2:
        return out

    beat_chroma = librosa.util.sync(chroma, bf, aggregate=np.mean)
    norm = np.linalg.norm(beat_chroma, axis=0, keepdims=True)
    norm[norm == 0] = 1.0
    beat_chroma_norm = beat_chroma / norm

    pitch_names = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
    maj = np.zeros(12); maj[[0,4,7]] = 1.0
    minr = np.zeros(12); minr[[0,3,7]] = 1.0

    majors = np.vstack([np.roll(maj, r) for r in range(12)])
    minors = np.vstack([np.roll(minr, r) for r in range(12)])
    templates = np.vstack([majors, minors])
    chord_labels = [f"{p}" for p in pitch_names] + [f"{p}m" for p in pitch_names]

    sims = templates @ beat_chroma_norm
    chord_idx = np.argmax(sims, axis=0)
    chord_seq = [chord_labels[i] for i in chord_idx]

    seg_starts = beat_times[:-1]
    seg_ends   = beat_times[1:]
    n = min(len(chord_seq), len(seg_starts))
    chord_seq = chord_seq[:n]
    seg_starts = seg_starts[:n]
    seg_ends   = seg_ends[:n]

    segments = []
    if n > 0:
        cur_label = chord_seq[0]
        cur_start = seg_starts[0]
        for i in range(1, n):
            if chord_seq[i] != cur_label:
                segments.append((cur_start, seg_ends[i-1], cur_label))
                cur_label = chord_seq[i]
                cur_start = seg_starts[i]
        segments.append((cur_start, seg_ends[n-1], cur_label))

    if not segments:
        return out

    chords_df = pd.DataFrame(segments, columns=["start", "end", "chord"])
    chords_df["duration"] = chords_df["end"] - chords_df["start"]
    out["num_chord_segments"] = int(len(chords_df))

    used_summary = (chords_df.groupby("chord")["duration"]
                    .sum()
                    .sort_values(ascending=False)
                    .reset_index())

    out["top_chords"] = [
        {"chord": row["chord"], "total_duration_sec": float(row["duration"])}
        for _, row in used_summary.iterrows()
    ]

    return out

Melody analysis with CREPE (optional)

# --- Cell 5: Melody analysis with CREPE (optional) ---

def analyze_melody_crepe(y: np.ndarray, sr: int) -> dict:
    out = {
        "melody_available": False,
        "frames_kept": 0,
        "pitch_range_midi": None,
        "pitch_range_notes": None,
        "median_note": None,
        "top_notes": []
    }

    try:
        import crepe
    except Exception:
        out["melody_available"] = False
        return out

    if y.ndim > 1:
        y_mono = librosa.to_mono(y)
    else:
        y_mono = y

    target_sr = int(CREPE_RESAMPLE_HZ)
    if sr != target_sr:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            y_proc = librosa.resample(y_mono, orig_sr=sr, target_sr=target_sr, res_type="kaiser_fast")
        sr_crepe = target_sr
    else:
        y_proc = y_mono
        sr_crepe = sr

    time_s, freq_hz, conf, _ = crepe.predict(
        y_proc,
        sr_crepe,
        step_size=CREPE_STEP_MS,
        model_capacity=CREPE_MODEL,
        viterbi=True
    )

    mask = np.isfinite(freq_hz) & np.isfinite(conf) & (conf >= CREPE_CONF_MIN) & (freq_hz > 0)
    time_f = time_s[mask]
    freq_f = freq_hz[mask]

    out["frames_kept"] = int(len(time_f))
    if len(time_f) == 0:
        out["melody_available"] = False
        return out

    midi = librosa.hz_to_midi(freq_f)
    out["melody_available"] = True

    low_m = float(np.nanmin(midi))
    high_m = float(np.nanmax(midi))
    out["pitch_range_midi"] = [low_m, high_m]
    out["pitch_range_notes"] = [
        librosa.midi_to_note(low_m, octave=True),
        librosa.midi_to_note(high_m, octave=True),
    ]

    med_m = float(np.nanmedian(midi))
    out["median_note"] = librosa.midi_to_note(med_m, octave=True)

    note_names = librosa.midi_to_note(midi, octave=True)
    unique, counts = np.unique(note_names, return_counts=True)
    order = np.argsort(-counts)
    out["top_notes"] = [
        {"note": str(unique[i]), "frames": int(counts[i])}
        for i in order[:10]
    ]

    return out

Full-track wrapper

# --- Cell 6: Full-track analysis wrapper ---

def analyze_track(path: Path) -> dict:
    print(f"\n▶ Analyzing: {path.name}")
    meta = {
        "file_path": str(path),
        "file_name": path.name,
        "duration_sec": None,
        "sample_rate": None,
        "error": None,
        "tempo_meter": None,
        "chords": None,
        "melody": None,
    }

    try:
        y, sr = load_audio_mono(path)
        duration_sec = len(y) / sr
        meta["duration_sec"] = float(duration_sec)
        meta["sample_rate"] = int(sr)

        if DO_TEMPO_METER:
            tempo_info = analyze_tempo_and_meter(y, sr)
            meta["tempo_meter"] = tempo_info

        if DO_CHORDS:
            chord_info = analyze_chords(y, sr)
            meta["chords"] = chord_info

        if DO_MELODY:
            melody_info = analyze_melody_crepe(y, sr)
            meta["melody"] = melody_info

    except Exception as e:
        meta["error"] = repr(e)
        print(f"  ⚠️ Error analyzing {path.name}: {e}")

    return meta

Batch loop + output files

# --- Cell 7: Run batch analysis and save metadata ---

if not AUDIO_FOLDER.exists():
    raise FileNotFoundError(f"AUDIO_FOLDER does not exist: {AUDIO_FOLDER}")

audio_files = list_audio_files(AUDIO_FOLDER)
if not audio_files:
    raise RuntimeError(f"No audio files found in {AUDIO_FOLDER} (or subfolders).")

all_meta = []

for path in audio_files:
    meta = analyze_track(path)
    all_meta.append(meta)

# Save JSON
with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
    json.dump(all_meta, f, indent=2)

# Save CSV (flatten some nested fields)
rows = []
for m in all_meta:
    row = {
        "file_name": m.get("file_name"),
        "file_path": m.get("file_path"),
        "duration_sec": m.get("duration_sec"),
        "sample_rate": m.get("sample_rate"),
        "error": m.get("error"),
    }

    tm = m.get("tempo_meter") or {}
    row["global_tempo_bpm"]  = tm.get("global_tempo_bpm")
    row["num_beats"]         = tm.get("num_beats")
    row["meter_label"]       = tm.get("meter_label")
    row["meter_confidence"]  = tm.get("meter_confidence")

    ch = m.get("chords") or {}
    row["num_chord_segments"] = ch.get("num_chord_segments")
    if ch.get("top_chords"):
        top3 = ch["top_chords"][:3]
        row["top_chords_summary"] = "; ".join(
            f"{c['chord']} ({c['total_duration_sec']:.1f}s)" for c in top3
        )
    else:
        row["top_chords_summary"] = None

    mel = m.get("melody") or {}
    row["melody_available"] = mel.get("melody_available")
    pr = mel.get("pitch_range_notes") or [None, None]
    row["melody_range_low"]  = pr[0]
    row["melody_range_high"] = pr[1]
    row["melody_median_note"] = mel.get("median_note")

    rows.append(row)

df = pd.DataFrame(rows)
df.to_csv(OUTPUT_CSV, index=False)

print(f"\n✅ Done. Analyzed {len(all_meta)} tracks.")
print(f"JSON metadata saved to: {OUTPUT_JSON}")
print(f"CSV metadata saved to : {OUTPUT_CSV}")

▶ Analyzing: Mediu Zhiga.wav
294/294 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step

▶ Analyzing: Ra Bacheeza.wav
294/294 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step

▶ Analyzing: [SP] Alfonso Ortiz Tirado - TE QUIERO DIJISTE.mp3
311/311 ━━━━━━━━━━━━━━━━━━━━ 7s 21ms/step

▶ Analyzing: [SP] Alvaro Carrillo - Pinotepa Nacional.mp3
292/292 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step

▶ Analyzing: [SP] Lagrimas Negras.mp3
126/126 ━━━━━━━━━━━━━━━━━━━━ 3s 21ms/step

▶ Analyzing: [SP] Los Panchos - Contigo.mp3
258/258 ━━━━━━━━━━━━━━━━━━━━ 6s 25ms/step

▶ Analyzing: [SP] Los Panchos - Jamas Jamas Jamas.mp3
283/283 ━━━━━━━━━━━━━━━━━━━━ 6s 22ms/step

▶ Analyzing: [SP] Los Panchos - Te Quiero Dijiste.mp3
229/229 ━━━━━━━━━━━━━━━━━━━━ 5s 21ms/step

▶ Analyzing: [SP] Soledad y el Mar - Natalia Lafourcade.mp3
338/338 ━━━━━━━━━━━━━━━━━━━━ 7s 21ms/step

▶ Analyzing: [ZAP] Binni Gula_za - Ni_bixi Dxi Zina.mp3
277/277 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step

▶ Analyzing: [ZAP] Mediu Zhiga.mp3
294/294 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step

▶ Analyzing: [ZAP] Ra Bacheeza.mp3
294/294 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step

▶ Analyzing: [ZAP] Sabor a Mi - Trio Galenos Y Mario Carrillo.mp3
88/88 ━━━━━━━━━━━━━━━━━━━━ 2s 21ms/step

✅ Done. Analyzed 13 tracks.
JSON metadata saved to: /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips/track_metadata.json
CSV metadata saved to : /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips/track_metadata.csv