Music-Genre-Classification-Using-SVM/extract_features.py at main · devbhatnagar-mp4/Music-Genre-Classification-Using-SVM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, duration=30)
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
        contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0)
        tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr).T, axis=0)

        features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
        return features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def extract_features_from_directory(dataset_path):
    genres = os.listdir(dataset_path)
    data = []
    for genre in genres:
        genre_path = os.path.join(dataset_path, genre)
        if not os.path.isdir(genre_path):
            continue
        print(f"\nExtracting features from genre: {genre}")
        for file_name in tqdm(os.listdir(genre_path)):
            file_path = os.path.join(genre_path, file_name)
            features = extract_features(file_path)
            if features is not None:
                data.append([genre] + list(features))
    columns = ['label'] + [f'feature_{i}' for i in range(len(data[0]) - 1)]
    return pd.DataFrame(data, columns=columns)

if __name__ == "__main__":
    dataset_path = "Data/genres_original"
    print("Starting feature extraction...")
    df = extract_features_from_directory(dataset_path)
    df.to_csv("features.csv", index=False)
    print("Feature extraction complete! Saved as features.csv")