设置一个带有序列的数组元素(MFCC+fastDTW, Python)
Setting an array element with a sequence (MFCC+fastDTW, Python)
我想使用 RPi 和 python 构建语音命令项目。我使用 MFCC 和 fastDTW 来匹配那个声音,但我收到了这个错误,我不知道如何修复它。这里的代码...
def fastdtw(x, y, radius=1, dist=lambda a, b: abs(a - b)):
min_time_size = radius + 2
if len(x) < min_time_size or len(y) < min_time_size:
return dtw(x, y, window = None, dist=dist)
x_shrinked = __reduce_by_half(x)
y_shrinked = __reduce_by_half(y)
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
window = __expand_window(path, len(x), len(y), radius)
return dtw(x, y, window, dist=dist)
def dtw(x, y, window=None, dist=lambda a, b: abs(a - b)):
len_x, len_y = len(x), len(y)
if window is None:
window = [(i, j) for i in xrange(len_x) for j in xrange(len_y)]
window = [(i + 1, j + 1) for i, j in window]
D = np.full((len_x+1, len_y+1), np.inf, dtype=('f4, i4, i4'))
D[0, 0] = (0, 0, 0)
for i, j in window:
D[i, j] = min((D[i-1, j][0], i-1, j), (D[i, j-1][0], i, j-1), (D[i-1, j-1][0], i-1, j-1), key=lambda a: a[0])
D[i, j][0] += dist(x[i-1], y[j-1])
path = []
i, j = len_x, len_y
while not (i == j == 0):
path.append((i-1, j-1))
i, j = D[i, j][1], D[i, j][2]
path.reverse()
return (D[len_x, len_y][0], path)
运行 文件:
from __future__ import absolute_import, division, print_function, unicode_literals
from features import mfcc
from features import logfbank
import scipy.io.wavfile as wav
import time
from numpy.linalg import norm
import unittest
import numpy as np
from fastdtw import fastdtw, dtw
import bisect
from six.moves import xrange
from collections import defaultdict
start = time.time()
(rate1,sig1) = wav.read("/home/pi/OpenCalculator.wav")
(rate2,sig2) = wav.read("/home/pi/voiceCommand.wav")
mfcc1 = mfcc(sig1,rate1)
mfcc2 = mfcc(sig2,rate2)
dist, path = fastdtw(mfcc1, mfcc2)
elapsed = time.time()-start
这是错误信息:
Traceback (most recent call last):
File "/home/pi/test.py", line 23, in <module>
dist, path = fastdtw(mfcc1, mfcc2)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 16, in fastdtw
return dtw(x, y, window = None, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 34, in dtw
D[i, j][0] += dist(x[i-1], y[j-1])
ValueError: setting an array element with a sequence.
*** mccc 的输出是 numpy 数组形式。
请帮助....
mfcc1
和 mfcc2
应该是列表或 numpy 数组。他们的类型正确吗?
您需要重新定义距离以使用特征向量而不是数字(默认距离适用于数字,而不是向量):
def mfcc_dist(a,b):
dist = 0
for x, y in zip(a,b):
dist = dist + (x - y) * (x - y)
return sqrt(dist)
dist, path = fastdtw(mfcc1, mfcc2, dist=mfcc_dist)
您也可以使用numpy.linalg.norm(a-b)
。
我想使用 RPi 和 python 构建语音命令项目。我使用 MFCC 和 fastDTW 来匹配那个声音,但我收到了这个错误,我不知道如何修复它。这里的代码...
def fastdtw(x, y, radius=1, dist=lambda a, b: abs(a - b)):
min_time_size = radius + 2
if len(x) < min_time_size or len(y) < min_time_size:
return dtw(x, y, window = None, dist=dist)
x_shrinked = __reduce_by_half(x)
y_shrinked = __reduce_by_half(y)
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
window = __expand_window(path, len(x), len(y), radius)
return dtw(x, y, window, dist=dist)
def dtw(x, y, window=None, dist=lambda a, b: abs(a - b)):
len_x, len_y = len(x), len(y)
if window is None:
window = [(i, j) for i in xrange(len_x) for j in xrange(len_y)]
window = [(i + 1, j + 1) for i, j in window]
D = np.full((len_x+1, len_y+1), np.inf, dtype=('f4, i4, i4'))
D[0, 0] = (0, 0, 0)
for i, j in window:
D[i, j] = min((D[i-1, j][0], i-1, j), (D[i, j-1][0], i, j-1), (D[i-1, j-1][0], i-1, j-1), key=lambda a: a[0])
D[i, j][0] += dist(x[i-1], y[j-1])
path = []
i, j = len_x, len_y
while not (i == j == 0):
path.append((i-1, j-1))
i, j = D[i, j][1], D[i, j][2]
path.reverse()
return (D[len_x, len_y][0], path)
运行 文件:
from __future__ import absolute_import, division, print_function, unicode_literals
from features import mfcc
from features import logfbank
import scipy.io.wavfile as wav
import time
from numpy.linalg import norm
import unittest
import numpy as np
from fastdtw import fastdtw, dtw
import bisect
from six.moves import xrange
from collections import defaultdict
start = time.time()
(rate1,sig1) = wav.read("/home/pi/OpenCalculator.wav")
(rate2,sig2) = wav.read("/home/pi/voiceCommand.wav")
mfcc1 = mfcc(sig1,rate1)
mfcc2 = mfcc(sig2,rate2)
dist, path = fastdtw(mfcc1, mfcc2)
elapsed = time.time()-start
这是错误信息:
Traceback (most recent call last):
File "/home/pi/test.py", line 23, in <module>
dist, path = fastdtw(mfcc1, mfcc2)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 20, in fastdtw
distance, path = fastdtw(x_shrinked, y_shrinked, radius=radius, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 16, in fastdtw
return dtw(x, y, window = None, dist=dist)
File "build/bdist.linux-armv7l/egg/fastdtw.py", line 34, in dtw
D[i, j][0] += dist(x[i-1], y[j-1])
ValueError: setting an array element with a sequence.
*** mccc 的输出是 numpy 数组形式。 请帮助....
mfcc1
和 mfcc2
应该是列表或 numpy 数组。他们的类型正确吗?
您需要重新定义距离以使用特征向量而不是数字(默认距离适用于数字,而不是向量):
def mfcc_dist(a,b):
dist = 0
for x, y in zip(a,b):
dist = dist + (x - y) * (x - y)
return sqrt(dist)
dist, path = fastdtw(mfcc1, mfcc2, dist=mfcc_dist)
您也可以使用numpy.linalg.norm(a-b)
。