使用 Python 每秒对 wav 文件执行 FFT
Perform FFT for every second on wav file with Python
我有对 5 秒的 wav 文件执行 FFT 的代码。我不擅长 Python 所以我写了非常基本的代码来分割 wav 文件并每秒计算 FFT。有什么更方便的方法吗?
我也不确定它们是否显示了每个频率及其相关幅度,因为范围部分。我将一个信号分成 5 个部分,但我也可以将频率分成 5 个部分。
以数字结尾的变量名是我添加的,通常我每个人只有一个来计算wav上的整个FFT。任何建议都会很棒。 (因为有偏见,我删除了第一秒,你应该看看情节)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import scipy.io.wavfile as wavfile
import scipy
import scipy.fftpack
import numpy as np
from matplotlib import pyplot as plt
fs_rate, signal = wavfile.read("db1.wav")
#print ("Frequency sampling", fs_rate)
l_audio = len(signal.shape)
#print ("Channels", l_audio)
if l_audio == 2:
signal = signal.sum(axis=1) / 2
signal2 = signal + 480000000
N = signal2.shape[0]
#print ("Complete Samplings N", N)
secs = N / float(fs_rate)
#print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
#print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
#FFT1 = abs(scipy.fft(signal2[0:44100]))
FFT2 = abs(scipy.fft(signal2[44100:88200]))
FFT3 = abs(scipy.fft(signal2[88200:132300]))
FFT4 = abs(scipy.fft(signal2[132300:176400]))
FFT5 = abs(scipy.fft(signal2[176400:220500]))
#FFT_side1 = FFT1[range(N//20)] # one side FFT range
FFT_side2 = FFT2[range(N//20)] # one side FFT range
FFT_side3 = FFT3[range(N//20)] # one side FFT range
FFT_side4 = FFT4[range(N//20)] # one side FFT range
FFT_side5 = FFT5[range(N//20)] # one side FFT range
#freqs1 = scipy.fftpack.fftfreq(signal2[0:44100].size, t[1]-t[0])
freqs2 = scipy.fftpack.fftfreq(signal2[44100:88200].size, t[1]-t[0])
freqs3 = scipy.fftpack.fftfreq(signal2[88200:132300].size, t[1]-t[0])
freqs4 = scipy.fftpack.fftfreq(signal2[132300:176400].size, t[1]-t[0])
freqs5 = scipy.fftpack.fftfreq(signal2[176400:220500].size, t[1]-t[0])
#fft_freqs = np.array(freqs)
#freqs_side1 = freqs1[range(N//20)] # one side frequency range
freqs_side2 = freqs2[range(N//20)] # one side frequency range
freqs_side3 = freqs3[range(N//20)] # one side frequency range
freqs_side4 = freqs4[range(N//20)] # one side frequency range
freqs_side5 = freqs5[range(N//20)] # one side frequency range
#fft_freqs_side = np.array(freqs_side)
#abs(FFT_side1)
abs(FFT_side2)
abs(FFT_side3)
abs(FFT_side4)
abs(FFT_side5)
for a in range(60):
#FFT_side1[a] = 0
FFT_side2[a] = 0
FFT_side3[a] = 0
FFT_side4[a] = 0
FFT_side5[a] = 0
plt.subplot(611)
p1 = plt.plot(t, signal2, "r") # plotting the signal
plt.xlabel('Time')
plt.ylabel('Amplitude')
# plt.subplot(612)
# p3 = plt.plot(freqs_side1, FFT_side1, "b") # plotting the positive fft spectrum
# plt.xlabel('Frequency (Hz)')
# plt.ylabel('Amplitude')
plt.subplot(613)
p3 = plt.plot(freqs_side2, FFT_side2, "g") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')
plt.subplot(614)
p3 = plt.plot(freqs_side3, FFT_side3, "g") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')
plt.subplot(615)
p3 = plt.plot(freqs_side4, FFT_side4, "g") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')
plt.subplot(616)
p3 = plt.plot(freqs_side5, FFT_side5, "g") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')
plt.show()
之前Python没玩过FFT,趁机玩了一下。我录制了一些音频 — 在 Audacity 中,因为它也有很好的内置 FFT 功能,所以我有一个很好的参考来检查我是否得到 "right" 答案
import numpy as np
from numpy import fft
from scipy.io import wavfile
import matplotlib.pyplot as plt
# recording of me whistling at ~1.2 and 1khz
fs_rate, signal = wavfile.read("whistle-1170_970hz.wav")
# convert stereo to mono
signal = signal.mean(axis=1)
# generate time in seconds
t = np.arange(signal.shape[0]) / fs_rate
# plot everything
plt.plot(t, signal);
# create some plots
fig, axs = plt.subplots(
2, sharex=True, sharey=True,
figsize=(8, 6))
for ax, i in zip(axs, range(0, signal.shape[0], fs_rate)):
# pull out sample for this second
ss = signal[i:i + fs_rate]
# generate FFT and frequencies
sp = fft.fft(ss)
freq = fft.fftfreq(len(ss), 1 / fs_rate)
# plot the first few components
ax.plot(freq[:2000], np.abs(sp.real[:2000]));
这对我来说在正确的位置给出了图峰——底部的图。我无法理解您重复的代码和重复的索引操作,但我认为您可能做对了!
您正在寻找的内容可以通过使用 STFT
的标准命令在单通道中完成
这是一个 Python 示例,它接受任何 WAV 并按样本将其转换为 FFT。样本可以配置 (time_period
) 在 0.05 秒到 10 秒之间变化。
输出显示原始声音(对于最终样本)、FFT 输出(在桶中)、一维图像和输出的二维图像表示。
Python3 码:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import scipy.io.wavfile as wavfile
import scipy
import scipy.fftpack
from scipy.signal import argrelextrema
import numpy as np
from matplotlib import pyplot as plt
filename = "audio/pysynth_anthem.wav"
filename = "audio/pysynth_chopin.wav"
filename = "audio/menuet.wav"
filename = "audio/bach_violin.wav"
# ==============================================
time_period = 0.1 # FFT time period (in seconds). Can comfortably process time frames from 0.05 seconds - 10 seconds
# ==============================================
fs_rate, signal_original = wavfile.read(filename)
total_time = int(np.floor(len(signal_original)/fs_rate))
sample_range = np.arange(0,total_time,time_period)
total_samples = len(sample_range)
print ("Frequency sampling", fs_rate)
print ("total time: ", total_time)
print ("sample time period: ", time_period)
print ("total samples: ", total_samples)
output_array = []
for i in sample_range:
print ("Processing: %d / %d (%d%%)" % (i/time_period + 1, total_samples, (i/time_period + 1)*100/total_samples))
sample_start = int(i*fs_rate)
sample_end = int((i+time_period)*fs_rate)
signal = signal_original[sample_start:sample_end]
l_audio = len(signal.shape)
#print ("Channels", l_audio)
if l_audio == 2:
signal = signal.sum(axis=1) / 2
N = signal.shape[0]
#print ("Complete Samplings N", N)
secs = N / float(fs_rate)
# print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
#print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
FFT = abs(scipy.fft(signal))
FFT_side = FFT[range(int(N/2))] # one side FFT range
freqs = scipy.fftpack.fftfreq(signal.size, t[1]-t[0])
fft_freqs = np.array(freqs)
freqs_side = freqs[range(int(N/2))] # one side frequency range
fft_freqs_side = np.array(freqs_side)
# Reduce to 0-5000 Hz
bucket_size = 5
buckets = 16
FFT_side = FFT_side[0:bucket_size*buckets]
fft_freqs_side = fft_freqs_side[0:bucket_size*buckets]
# Combine frequencies into buckets
FFT_side = np.array([int(sum(FFT_side[current: current+bucket_size])) for current in range(0, len(FFT_side), bucket_size)])
fft_freqs_side = np.array([int(sum(fft_freqs_side[current: current+bucket_size])) for current in range(0, len(fft_freqs_side), bucket_size)])
# FFT_side: Normalize (0-1)
max_value = max(FFT_side)
if (max_value != 0):
FFT_side_norm = FFT_side / max_value
# Append to output array
output_array.append(FFT_side_norm)
# ============================================
# Plotting
plt.figure(figsize=(8,10))
plt.subplot(411)
plt.plot(t, signal, "g") # plotting the signal
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.subplot(412)
diff = np.diff(fft_freqs_side)
widths = np.hstack([diff, diff[-1]])
plt.bar(fft_freqs_side, abs(FFT_side_norm), width=widths) # plotting the positive fft spectrum
plt.xticks(fft_freqs_side, fft_freqs_side, rotation='vertical')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Count single-sided')
FFT_side_norm_line = FFT_side_norm.copy()
FFT_side_norm_line.resize( (1,buckets) )
plt.subplot(413)
plt.imshow(FFT_side_norm_line)
plt.axis('off')
plt.title('Image Representation (1D)')
width_img = int(np.sqrt(buckets))
height_img = int(np.ceil(buckets / int(np.sqrt(buckets))))
FFT_side_norm_rect = FFT_side_norm.copy()
FFT_side_norm_rect.resize( (width_img,height_img) )
plt.subplot(414)
plt.imshow(FFT_side_norm_rect)
plt.axis('off')
plt.title('Image Representation (2D): %d x %d' % (width_img,height_img))
plt.show()
# =======================================================
我有对 5 秒的 wav 文件执行 FFT 的代码。我不擅长 Python 所以我写了非常基本的代码来分割 wav 文件并每秒计算 FFT。有什么更方便的方法吗?
我也不确定它们是否显示了每个频率及其相关幅度,因为范围部分。我将一个信号分成 5 个部分,但我也可以将频率分成 5 个部分。
以数字结尾的变量名是我添加的,通常我每个人只有一个来计算wav上的整个FFT。任何建议都会很棒。 (因为有偏见,我删除了第一秒,你应该看看情节)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import scipy.io.wavfile as wavfile
import scipy
import scipy.fftpack
import numpy as np
from matplotlib import pyplot as plt
fs_rate, signal = wavfile.read("db1.wav")
#print ("Frequency sampling", fs_rate)
l_audio = len(signal.shape)
#print ("Channels", l_audio)
if l_audio == 2:
signal = signal.sum(axis=1) / 2
signal2 = signal + 480000000
N = signal2.shape[0]
#print ("Complete Samplings N", N)
secs = N / float(fs_rate)
#print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
#print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
#FFT1 = abs(scipy.fft(signal2[0:44100]))
FFT2 = abs(scipy.fft(signal2[44100:88200]))
FFT3 = abs(scipy.fft(signal2[88200:132300]))
FFT4 = abs(scipy.fft(signal2[132300:176400]))
FFT5 = abs(scipy.fft(signal2[176400:220500]))
#FFT_side1 = FFT1[range(N//20)] # one side FFT range
FFT_side2 = FFT2[range(N//20)] # one side FFT range
FFT_side3 = FFT3[range(N//20)] # one side FFT range
FFT_side4 = FFT4[range(N//20)] # one side FFT range
FFT_side5 = FFT5[range(N//20)] # one side FFT range
#freqs1 = scipy.fftpack.fftfreq(signal2[0:44100].size, t[1]-t[0])
freqs2 = scipy.fftpack.fftfreq(signal2[44100:88200].size, t[1]-t[0])
freqs3 = scipy.fftpack.fftfreq(signal2[88200:132300].size, t[1]-t[0])
freqs4 = scipy.fftpack.fftfreq(signal2[132300:176400].size, t[1]-t[0])
freqs5 = scipy.fftpack.fftfreq(signal2[176400:220500].size, t[1]-t[0])
#fft_freqs = np.array(freqs)
#freqs_side1 = freqs1[range(N//20)] # one side frequency range
freqs_side2 = freqs2[range(N//20)] # one side frequency range
freqs_side3 = freqs3[range(N//20)] # one side frequency range
freqs_side4 = freqs4[range(N//20)] # one side frequency range
freqs_side5 = freqs5[range(N//20)] # one side frequency range
#fft_freqs_side = np.array(freqs_side)
#abs(FFT_side1)
abs(FFT_side2)
abs(FFT_side3)
abs(FFT_side4)
abs(FFT_side5)
for a in range(60):
#FFT_side1[a] = 0
FFT_side2[a] = 0
FFT_side3[a] = 0
FFT_side4[a] = 0
FFT_side5[a] = 0
plt.subplot(611)
p1 = plt.plot(t, signal2, "r") # plotting the signal
plt.xlabel('Time')
plt.ylabel('Amplitude')
# plt.subplot(612)
# p3 = plt.plot(freqs_side1, FFT_side1, "b") # plotting the positive fft spectrum
# plt.xlabel('Frequency (Hz)')
# plt.ylabel('Amplitude')
plt.subplot(613)
p3 = plt.plot(freqs_side2, FFT_side2, "g") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')
plt.subplot(614)
p3 = plt.plot(freqs_side3, FFT_side3, "g") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')
plt.subplot(615)
p3 = plt.plot(freqs_side4, FFT_side4, "g") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')
plt.subplot(616)
p3 = plt.plot(freqs_side5, FFT_side5, "g") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')
plt.show()
之前Python没玩过FFT,趁机玩了一下。我录制了一些音频 — 在 Audacity 中,因为它也有很好的内置 FFT 功能,所以我有一个很好的参考来检查我是否得到 "right" 答案
import numpy as np
from numpy import fft
from scipy.io import wavfile
import matplotlib.pyplot as plt
# recording of me whistling at ~1.2 and 1khz
fs_rate, signal = wavfile.read("whistle-1170_970hz.wav")
# convert stereo to mono
signal = signal.mean(axis=1)
# generate time in seconds
t = np.arange(signal.shape[0]) / fs_rate
# plot everything
plt.plot(t, signal);
# create some plots
fig, axs = plt.subplots(
2, sharex=True, sharey=True,
figsize=(8, 6))
for ax, i in zip(axs, range(0, signal.shape[0], fs_rate)):
# pull out sample for this second
ss = signal[i:i + fs_rate]
# generate FFT and frequencies
sp = fft.fft(ss)
freq = fft.fftfreq(len(ss), 1 / fs_rate)
# plot the first few components
ax.plot(freq[:2000], np.abs(sp.real[:2000]));
这对我来说在正确的位置给出了图峰——底部的图。我无法理解您重复的代码和重复的索引操作,但我认为您可能做对了!
您正在寻找的内容可以通过使用 STFT
的标准命令在单通道中完成这是一个 Python 示例,它接受任何 WAV 并按样本将其转换为 FFT。样本可以配置 (time_period
) 在 0.05 秒到 10 秒之间变化。
输出显示原始声音(对于最终样本)、FFT 输出(在桶中)、一维图像和输出的二维图像表示。
Python3 码:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import scipy.io.wavfile as wavfile
import scipy
import scipy.fftpack
from scipy.signal import argrelextrema
import numpy as np
from matplotlib import pyplot as plt
filename = "audio/pysynth_anthem.wav"
filename = "audio/pysynth_chopin.wav"
filename = "audio/menuet.wav"
filename = "audio/bach_violin.wav"
# ==============================================
time_period = 0.1 # FFT time period (in seconds). Can comfortably process time frames from 0.05 seconds - 10 seconds
# ==============================================
fs_rate, signal_original = wavfile.read(filename)
total_time = int(np.floor(len(signal_original)/fs_rate))
sample_range = np.arange(0,total_time,time_period)
total_samples = len(sample_range)
print ("Frequency sampling", fs_rate)
print ("total time: ", total_time)
print ("sample time period: ", time_period)
print ("total samples: ", total_samples)
output_array = []
for i in sample_range:
print ("Processing: %d / %d (%d%%)" % (i/time_period + 1, total_samples, (i/time_period + 1)*100/total_samples))
sample_start = int(i*fs_rate)
sample_end = int((i+time_period)*fs_rate)
signal = signal_original[sample_start:sample_end]
l_audio = len(signal.shape)
#print ("Channels", l_audio)
if l_audio == 2:
signal = signal.sum(axis=1) / 2
N = signal.shape[0]
#print ("Complete Samplings N", N)
secs = N / float(fs_rate)
# print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
#print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
FFT = abs(scipy.fft(signal))
FFT_side = FFT[range(int(N/2))] # one side FFT range
freqs = scipy.fftpack.fftfreq(signal.size, t[1]-t[0])
fft_freqs = np.array(freqs)
freqs_side = freqs[range(int(N/2))] # one side frequency range
fft_freqs_side = np.array(freqs_side)
# Reduce to 0-5000 Hz
bucket_size = 5
buckets = 16
FFT_side = FFT_side[0:bucket_size*buckets]
fft_freqs_side = fft_freqs_side[0:bucket_size*buckets]
# Combine frequencies into buckets
FFT_side = np.array([int(sum(FFT_side[current: current+bucket_size])) for current in range(0, len(FFT_side), bucket_size)])
fft_freqs_side = np.array([int(sum(fft_freqs_side[current: current+bucket_size])) for current in range(0, len(fft_freqs_side), bucket_size)])
# FFT_side: Normalize (0-1)
max_value = max(FFT_side)
if (max_value != 0):
FFT_side_norm = FFT_side / max_value
# Append to output array
output_array.append(FFT_side_norm)
# ============================================
# Plotting
plt.figure(figsize=(8,10))
plt.subplot(411)
plt.plot(t, signal, "g") # plotting the signal
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.subplot(412)
diff = np.diff(fft_freqs_side)
widths = np.hstack([diff, diff[-1]])
plt.bar(fft_freqs_side, abs(FFT_side_norm), width=widths) # plotting the positive fft spectrum
plt.xticks(fft_freqs_side, fft_freqs_side, rotation='vertical')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Count single-sided')
FFT_side_norm_line = FFT_side_norm.copy()
FFT_side_norm_line.resize( (1,buckets) )
plt.subplot(413)
plt.imshow(FFT_side_norm_line)
plt.axis('off')
plt.title('Image Representation (1D)')
width_img = int(np.sqrt(buckets))
height_img = int(np.ceil(buckets / int(np.sqrt(buckets))))
FFT_side_norm_rect = FFT_side_norm.copy()
FFT_side_norm_rect.resize( (width_img,height_img) )
plt.subplot(414)
plt.imshow(FFT_side_norm_rect)
plt.axis('off')
plt.title('Image Representation (2D): %d x %d' % (width_img,height_img))
plt.show()
# =======================================================