Imports and configuration¶
# --- Cell 1: Imports and configuration ---
from pathlib import Path
import json
import math
import warnings
import numpy as np
import pandas as pd
import librosa
print("librosa version:", librosa.__version__)
# === USER SETTINGS ===
# Set this to the folder that contains your audio files
#AUDIO_FOLDER = Path("/path/to/your/audio/folder") # <-- CHANGE THIS
AUDIO_FOLDER = Path("/Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips")
# Output metadata files (JSON + CSV)
OUTPUT_JSON = AUDIO_FOLDER / "track_metadata.json"
OUTPUT_CSV = AUDIO_FOLDER / "track_metadata.csv"
# Which analyses to run
DO_TEMPO_METER = True
DO_CHORDS = True
DO_MELODY = True # CREPE-based melody extraction; heavier
# CREPE-related settings (only used if DO_MELODY is True)
CREPE_RESAMPLE_HZ = 16000 # resample to 16 kHz for CREPE
CREPE_STEP_MS = 20 # frame step in ms
CREPE_MODEL = "small" # "tiny" | "small" | "medium" | "large" | "full"
CREPE_CONF_MIN = 0.6 # min confidence to keep a frame
# Beat / meter settings
HOP_LENGTH = 256
CANDIDATE_BEATS_PER_BAR = [2, 3, 4, 5, 6, 7]
print("Audio folder:", AUDIO_FOLDER)
librosa version: 0.11.0
Audio folder: /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips
File listing and audio loading¶
# --- Cell 2: Helper functions: file listing and audio loading ---
def list_audio_files(folder: Path):
exts = [".wav", ".mp3", ".flac", ".ogg", ".m4a"]
files = []
for ext in exts:
files.extend(folder.glob(f"*{ext}"))
files.extend(folder.glob(f"**/*{ext}")) # include subfolders
files = sorted(set(files))
return files
def load_audio_mono(path: Path):
"""Load an audio file as mono at its native sample rate."""
y, sr = librosa.load(path, sr=None, mono=True)
return y, sr
if AUDIO_FOLDER.exists():
test_files = list_audio_files(AUDIO_FOLDER)
print(f"Found {len(test_files)} audio files in {AUDIO_FOLDER}")
else:
print("⚠️ AUDIO_FOLDER does not exist yet. Please set a valid path.")
Found 13 audio files in /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips
Tempo & meter analysis¶
# --- Cell 3: Tempo & meter analysis ---
def meter_score(acc_seq: np.ndarray, m: int) -> float:
"""Return normalized autocorrelation score at lag m (0..1)."""
if len(acc_seq) <= m:
return 0.0
x = acc_seq - np.mean(acc_seq)
num = np.sum(x[:-m] * x[m:])
den = math.sqrt(float(np.sum(x[:-m]**2) * np.sum(x[m:]**2))) + 1e-12
return float(max(0.0, num / den))
def label_from_m(m: int) -> str:
if m == 6:
return "≈6/8 (compound)"
else:
return f"≈{m}/4"
def analyze_tempo_and_meter(y: np.ndarray, sr: int) -> dict:
"""Return a dictionary with tempo, beat count, and a simple global meter estimate."""
out = {
"global_tempo_bpm": None,
"num_beats": None,
"meter_label": None,
"meter_confidence": None,
}
tempo_bpm, beat_frames = librosa.beat.beat_track(y=y, sr=sr, hop_length=HOP_LENGTH)
tempo_bpm_scalar = float(np.atleast_1d(tempo_bpm)[0])
beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=HOP_LENGTH)
out["global_tempo_bpm"] = tempo_bpm_scalar
out["num_beats"] = int(len(beat_times))
if len(beat_frames) < 4:
return out
onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=HOP_LENGTH)
beat_frames = beat_frames[(beat_frames >= 0) & (beat_frames < len(onset_env))]
if len(beat_frames) < 4:
return out
beat_accents = onset_env[beat_frames].astype(float)
if np.max(beat_accents) > 0:
beat_accents = beat_accents / (np.max(beat_accents) + 1e-12)
scores = {m: meter_score(beat_accents, m) for m in CANDIDATE_BEATS_PER_BAR}
best_m = max(scores, key=scores.get)
conf = float(scores[best_m])
out["meter_label"] = label_from_m(best_m)
out["meter_confidence"] = conf
return out
Chord analysis (triads summary)¶
# --- Cell 4: Chord analysis (triads; summary only) ---
def analyze_chords(y: np.ndarray, sr: int) -> dict:
"""Estimate chords over time and summarize top chords by total duration."""
out = {
"top_chords": [],
"num_chord_segments": 0,
}
tempo_bpm, beat_frames = librosa.beat.beat_track(y=y, sr=sr, hop_length=HOP_LENGTH)
beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=HOP_LENGTH)
if len(beat_times) < 2:
return out
chroma = librosa.feature.chroma_cqt(
y=y, sr=sr, hop_length=HOP_LENGTH,
n_chroma=12, bins_per_octave=36
)
bf = librosa.time_to_frames(beat_times, sr=sr, hop_length=HOP_LENGTH)
bf = bf[(bf >= 0) & (bf <= chroma.shape[1])]
bf = np.unique(bf)
if len(bf) < 2:
return out
beat_chroma = librosa.util.sync(chroma, bf, aggregate=np.mean)
norm = np.linalg.norm(beat_chroma, axis=0, keepdims=True)
norm[norm == 0] = 1.0
beat_chroma_norm = beat_chroma / norm
pitch_names = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
maj = np.zeros(12); maj[[0,4,7]] = 1.0
minr = np.zeros(12); minr[[0,3,7]] = 1.0
majors = np.vstack([np.roll(maj, r) for r in range(12)])
minors = np.vstack([np.roll(minr, r) for r in range(12)])
templates = np.vstack([majors, minors])
chord_labels = [f"{p}" for p in pitch_names] + [f"{p}m" for p in pitch_names]
sims = templates @ beat_chroma_norm
chord_idx = np.argmax(sims, axis=0)
chord_seq = [chord_labels[i] for i in chord_idx]
seg_starts = beat_times[:-1]
seg_ends = beat_times[1:]
n = min(len(chord_seq), len(seg_starts))
chord_seq = chord_seq[:n]
seg_starts = seg_starts[:n]
seg_ends = seg_ends[:n]
segments = []
if n > 0:
cur_label = chord_seq[0]
cur_start = seg_starts[0]
for i in range(1, n):
if chord_seq[i] != cur_label:
segments.append((cur_start, seg_ends[i-1], cur_label))
cur_label = chord_seq[i]
cur_start = seg_starts[i]
segments.append((cur_start, seg_ends[n-1], cur_label))
if not segments:
return out
chords_df = pd.DataFrame(segments, columns=["start", "end", "chord"])
chords_df["duration"] = chords_df["end"] - chords_df["start"]
out["num_chord_segments"] = int(len(chords_df))
used_summary = (chords_df.groupby("chord")["duration"]
.sum()
.sort_values(ascending=False)
.reset_index())
out["top_chords"] = [
{"chord": row["chord"], "total_duration_sec": float(row["duration"])}
for _, row in used_summary.iterrows()
]
return out
Melody analysis with CREPE (optional)¶
# --- Cell 5: Melody analysis with CREPE (optional) ---
def analyze_melody_crepe(y: np.ndarray, sr: int) -> dict:
out = {
"melody_available": False,
"frames_kept": 0,
"pitch_range_midi": None,
"pitch_range_notes": None,
"median_note": None,
"top_notes": []
}
try:
import crepe
except Exception:
out["melody_available"] = False
return out
if y.ndim > 1:
y_mono = librosa.to_mono(y)
else:
y_mono = y
target_sr = int(CREPE_RESAMPLE_HZ)
if sr != target_sr:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
y_proc = librosa.resample(y_mono, orig_sr=sr, target_sr=target_sr, res_type="kaiser_fast")
sr_crepe = target_sr
else:
y_proc = y_mono
sr_crepe = sr
time_s, freq_hz, conf, _ = crepe.predict(
y_proc,
sr_crepe,
step_size=CREPE_STEP_MS,
model_capacity=CREPE_MODEL,
viterbi=True
)
mask = np.isfinite(freq_hz) & np.isfinite(conf) & (conf >= CREPE_CONF_MIN) & (freq_hz > 0)
time_f = time_s[mask]
freq_f = freq_hz[mask]
out["frames_kept"] = int(len(time_f))
if len(time_f) == 0:
out["melody_available"] = False
return out
midi = librosa.hz_to_midi(freq_f)
out["melody_available"] = True
low_m = float(np.nanmin(midi))
high_m = float(np.nanmax(midi))
out["pitch_range_midi"] = [low_m, high_m]
out["pitch_range_notes"] = [
librosa.midi_to_note(low_m, octave=True),
librosa.midi_to_note(high_m, octave=True),
]
med_m = float(np.nanmedian(midi))
out["median_note"] = librosa.midi_to_note(med_m, octave=True)
note_names = librosa.midi_to_note(midi, octave=True)
unique, counts = np.unique(note_names, return_counts=True)
order = np.argsort(-counts)
out["top_notes"] = [
{"note": str(unique[i]), "frames": int(counts[i])}
for i in order[:10]
]
return out
Full-track wrapper¶
# --- Cell 6: Full-track analysis wrapper ---
def analyze_track(path: Path) -> dict:
print(f"\n▶ Analyzing: {path.name}")
meta = {
"file_path": str(path),
"file_name": path.name,
"duration_sec": None,
"sample_rate": None,
"error": None,
"tempo_meter": None,
"chords": None,
"melody": None,
}
try:
y, sr = load_audio_mono(path)
duration_sec = len(y) / sr
meta["duration_sec"] = float(duration_sec)
meta["sample_rate"] = int(sr)
if DO_TEMPO_METER:
tempo_info = analyze_tempo_and_meter(y, sr)
meta["tempo_meter"] = tempo_info
if DO_CHORDS:
chord_info = analyze_chords(y, sr)
meta["chords"] = chord_info
if DO_MELODY:
melody_info = analyze_melody_crepe(y, sr)
meta["melody"] = melody_info
except Exception as e:
meta["error"] = repr(e)
print(f" ⚠️ Error analyzing {path.name}: {e}")
return meta
Batch loop + output files¶
# --- Cell 7: Run batch analysis and save metadata ---
if not AUDIO_FOLDER.exists():
raise FileNotFoundError(f"AUDIO_FOLDER does not exist: {AUDIO_FOLDER}")
audio_files = list_audio_files(AUDIO_FOLDER)
if not audio_files:
raise RuntimeError(f"No audio files found in {AUDIO_FOLDER} (or subfolders).")
all_meta = []
for path in audio_files:
meta = analyze_track(path)
all_meta.append(meta)
# Save JSON
with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
json.dump(all_meta, f, indent=2)
# Save CSV (flatten some nested fields)
rows = []
for m in all_meta:
row = {
"file_name": m.get("file_name"),
"file_path": m.get("file_path"),
"duration_sec": m.get("duration_sec"),
"sample_rate": m.get("sample_rate"),
"error": m.get("error"),
}
tm = m.get("tempo_meter") or {}
row["global_tempo_bpm"] = tm.get("global_tempo_bpm")
row["num_beats"] = tm.get("num_beats")
row["meter_label"] = tm.get("meter_label")
row["meter_confidence"] = tm.get("meter_confidence")
ch = m.get("chords") or {}
row["num_chord_segments"] = ch.get("num_chord_segments")
if ch.get("top_chords"):
top3 = ch["top_chords"][:3]
row["top_chords_summary"] = "; ".join(
f"{c['chord']} ({c['total_duration_sec']:.1f}s)" for c in top3
)
else:
row["top_chords_summary"] = None
mel = m.get("melody") or {}
row["melody_available"] = mel.get("melody_available")
pr = mel.get("pitch_range_notes") or [None, None]
row["melody_range_low"] = pr[0]
row["melody_range_high"] = pr[1]
row["melody_median_note"] = mel.get("median_note")
rows.append(row)
df = pd.DataFrame(rows)
df.to_csv(OUTPUT_CSV, index=False)
print(f"\n✅ Done. Analyzed {len(all_meta)} tracks.")
print(f"JSON metadata saved to: {OUTPUT_JSON}")
print(f"CSV metadata saved to : {OUTPUT_CSV}")
▶ Analyzing: Mediu Zhiga.wav
294/294 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step
▶ Analyzing: Ra Bacheeza.wav
294/294 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step
▶ Analyzing: [SP] Alfonso Ortiz Tirado - TE QUIERO DIJISTE.mp3
311/311 ━━━━━━━━━━━━━━━━━━━━ 7s 21ms/step
▶ Analyzing: [SP] Alvaro Carrillo - Pinotepa Nacional.mp3
292/292 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step
▶ Analyzing: [SP] Lagrimas Negras.mp3
126/126 ━━━━━━━━━━━━━━━━━━━━ 3s 21ms/step
▶ Analyzing: [SP] Los Panchos - Contigo.mp3
258/258 ━━━━━━━━━━━━━━━━━━━━ 6s 25ms/step
▶ Analyzing: [SP] Los Panchos - Jamas Jamas Jamas.mp3
283/283 ━━━━━━━━━━━━━━━━━━━━ 6s 22ms/step
▶ Analyzing: [SP] Los Panchos - Te Quiero Dijiste.mp3
229/229 ━━━━━━━━━━━━━━━━━━━━ 5s 21ms/step
▶ Analyzing: [SP] Soledad y el Mar - Natalia Lafourcade.mp3
338/338 ━━━━━━━━━━━━━━━━━━━━ 7s 21ms/step
▶ Analyzing: [ZAP] Binni Gula_za - Ni_bixi Dxi Zina.mp3
277/277 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step
▶ Analyzing: [ZAP] Mediu Zhiga.mp3
294/294 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step
▶ Analyzing: [ZAP] Ra Bacheeza.mp3
294/294 ━━━━━━━━━━━━━━━━━━━━ 6s 21ms/step
▶ Analyzing: [ZAP] Sabor a Mi - Trio Galenos Y Mario Carrillo.mp3
88/88 ━━━━━━━━━━━━━━━━━━━━ 2s 21ms/step
✅ Done. Analyzed 13 tracks.
JSON metadata saved to: /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips/track_metadata.json
CSV metadata saved to : /Users/souvikmandal/Documents/06_Teaching_Mentoring/LS100_comp_etho/2025/media/audio/music_tracks/Audio_Clips/track_metadata.csv