从 mp3 到特征:聚类音乐
From mp3 to features: clustering music
有没有人有兴趣提供一组 MP3 音乐?
我使用了 librosa、ffmpeg 和我编写的 Python 脚本,以便将单个 MP3 文件映射到 114 个数字特征(如速度、mfcc 等)的特征序列。
如果对 100.000 个 MP3 重复该过程,则可能会产生一组音乐。
问题是如何让人们将他们的 MP3 转换为功能?
另一个有趣的问题是,这些功能能否还原为 MP3?
这是脚本:
#!/bin/bash
ffmpeg -y -i "" -acodec pcm_u8 -ar 22050 "".wav
python prova.py "".wav
和 Python Wav 功能代码,prova.py:
# Beat tracking example
from __future__ import print_function
import librosa
import numpy as np
import sys
# ffmpeg -i song.mp3 -acodec pcm_u8 -ar 22050 song.wav
# 1. Get the file path to the included audio example
filename = sys.argv[1]
print(filename)
name, extension1, extension2 = filename.split(".")
print("Song name = " + name)
# 2. Load the audio as a waveform `y`
# Store the sampling rate as `sr`
y, sr = librosa.load(filename)
onset_env = librosa.onset.onset_strength(y, sr=sr)
tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 512
# Separate harmonics and percussives into two waveforms
y_harmonic, y_percussive = librosa.effects.hpss(y)
#print("y_armonic= " + str(len(y_harmonic)))
#print("y_percussive= " + str(len(y_percussive)))
# Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,
sr=sr)
# Compute MFCC features from the raw signal
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
# And the first-order differences (delta features)
mfcc_delta = librosa.feature.delta(mfcc)
# Stack and synchronize between beat events
# This time, we'll use the mean value (default) instead of median
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]),
beat_frames)
# Compute chroma features from the harmonic signal
chromagram = librosa.feature.chroma_cqt(y=y_harmonic,
sr=sr)
# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
beat_chroma = librosa.util.sync(chromagram,
beat_frames,
aggregate=np.median)
# Finally, stack all beat-synchronous features together
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])
print(str(mfcc.shape) + " " + str(mfcc_delta.shape) + " " + str(beat_mfcc_delta.shape) + " " + str(chromagram.shape) + " " + str(beat_chroma.shape) + str(beat_features.shape))
f = open(name + ".txt","w")
f.write(str(tempo)+ ',')
for i in range(0, len(mfcc)):
f.write(str(np.mean(mfcc[i]))+ ',')
for i in range(0, len(mfcc_delta)):
f.write(str(np.mean(mfcc_delta[i]))+ ',')
for i in range(0, len(beat_mfcc_delta)):
f.write(str(np.mean(beat_mfcc_delta[i]))+ ',')
for i in range(0, len(chromagram)):
f.write(str(np.mean(chromagram[i]))+ ',')
for i in range(0, len(beat_chroma)):
f.write(str(np.mean(beat_chroma[i]))+ ',')
for i in range(0, len(beat_features)):
f.write(str(np.mean(beat_features[i]))+ ',')
f.close()
这样,如果将特征集与其流派相结合,给定一首新歌曲,就可以自动判断它属于哪种流派。
要么您提供人们可以在他们的 MP3 上 运行 的代码(例如命令行工具或桌面应用程序),要么人们为您提供他们的 MP3 供您 运行 上的代码(例如 Web 应用程序或 HTTP API)。
有没有人有兴趣提供一组 MP3 音乐? 我使用了 librosa、ffmpeg 和我编写的 Python 脚本,以便将单个 MP3 文件映射到 114 个数字特征(如速度、mfcc 等)的特征序列。 如果对 100.000 个 MP3 重复该过程,则可能会产生一组音乐。 问题是如何让人们将他们的 MP3 转换为功能? 另一个有趣的问题是,这些功能能否还原为 MP3? 这是脚本:
#!/bin/bash
ffmpeg -y -i "" -acodec pcm_u8 -ar 22050 "".wav
python prova.py "".wav
和 Python Wav 功能代码,prova.py:
# Beat tracking example
from __future__ import print_function
import librosa
import numpy as np
import sys
# ffmpeg -i song.mp3 -acodec pcm_u8 -ar 22050 song.wav
# 1. Get the file path to the included audio example
filename = sys.argv[1]
print(filename)
name, extension1, extension2 = filename.split(".")
print("Song name = " + name)
# 2. Load the audio as a waveform `y`
# Store the sampling rate as `sr`
y, sr = librosa.load(filename)
onset_env = librosa.onset.onset_strength(y, sr=sr)
tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 512
# Separate harmonics and percussives into two waveforms
y_harmonic, y_percussive = librosa.effects.hpss(y)
#print("y_armonic= " + str(len(y_harmonic)))
#print("y_percussive= " + str(len(y_percussive)))
# Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,
sr=sr)
# Compute MFCC features from the raw signal
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
# And the first-order differences (delta features)
mfcc_delta = librosa.feature.delta(mfcc)
# Stack and synchronize between beat events
# This time, we'll use the mean value (default) instead of median
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]),
beat_frames)
# Compute chroma features from the harmonic signal
chromagram = librosa.feature.chroma_cqt(y=y_harmonic,
sr=sr)
# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
beat_chroma = librosa.util.sync(chromagram,
beat_frames,
aggregate=np.median)
# Finally, stack all beat-synchronous features together
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])
print(str(mfcc.shape) + " " + str(mfcc_delta.shape) + " " + str(beat_mfcc_delta.shape) + " " + str(chromagram.shape) + " " + str(beat_chroma.shape) + str(beat_features.shape))
f = open(name + ".txt","w")
f.write(str(tempo)+ ',')
for i in range(0, len(mfcc)):
f.write(str(np.mean(mfcc[i]))+ ',')
for i in range(0, len(mfcc_delta)):
f.write(str(np.mean(mfcc_delta[i]))+ ',')
for i in range(0, len(beat_mfcc_delta)):
f.write(str(np.mean(beat_mfcc_delta[i]))+ ',')
for i in range(0, len(chromagram)):
f.write(str(np.mean(chromagram[i]))+ ',')
for i in range(0, len(beat_chroma)):
f.write(str(np.mean(beat_chroma[i]))+ ',')
for i in range(0, len(beat_features)):
f.write(str(np.mean(beat_features[i]))+ ',')
f.close()
这样,如果将特征集与其流派相结合,给定一首新歌曲,就可以自动判断它属于哪种流派。
要么您提供人们可以在他们的 MP3 上 运行 的代码(例如命令行工具或桌面应用程序),要么人们为您提供他们的 MP3 供您 运行 上的代码(例如 Web 应用程序或 HTTP API)。