PCA: TypeError: can only concatenate list (not "int") to list
PCA: TypeError: can only concatenate list (not "int") to list
我试图按照 this post 的建议解决方案在我的数据集上安装 PCA。该代码适用于形状为 (150, 8)
的 iris
数据,如下所示:
array([[ 1.7837721 , -1.23464679, 4.27808537, ..., 0.63061657,
-1.79849625, -1.41574397],
[-0.35396307, -0.13400175, 3.91751182, ..., -0.58928302,
-0.15735542, -0.99157312],
[-0.20380491, -1.06074392, 4.65814864, ..., 2.19686369,
0.14920164, 2.33371106],
...,
[-1.05079672, 1.46836264, 5.41970214, ..., 0.32847349,
0.27133141, 1.01266607],
[ 0.19569856, 0.57739573, 3.84749973, ..., 0.02400556,
-0.08193678, 0.51223263],
[ 0.04905765, 0.66314259, 6.22608157, ..., 0.60076934,
-0.56890579, -0.23642103]])
但是,使用我的形状 (3475, 29)
的数据时发现错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-292-5661ffbde57b> in <module>
38 # data = array([randn(8) for k in range(150)])
39 data[:50, 2:4] += 5
---> 40 data[50:, 2:5] += 5
41
42 """ visualize """
TypeError: can only concatenate list (not "int") to list
我的数据(形状 (3475, 29)
)如下所示:
array([[58.5, 27.0, 88.5, ..., nan, 0.0, -3.0],
[58.5, 27.0, 88.5, ..., nan, 0.0, -3.0],
[47.0, 45.0, 92.0, ..., 1.6, -0.649519052838329,
-1.1249999999999998],
...,
[46.0, 44.5, 98.0, ..., 2.5, 0.0, -1.3],
[46.0, 40.0, 98.0, ..., 2.5, 0.0, -1.3],
[46.5, 44.5, 76.5, ..., 17.767857142857142, -0.4788774197473401,
-1.4219984343829701]], dtype=object)
使用的代码:
# SO - doug - my data
from numpy import array, dot, mean, std, empty, argsort
from numpy.linalg import eigh, solve
from numpy.random import randn
from matplotlib.pyplot import subplots, show
def cov(X):
"""
Covariance matrix
note: specifically for mean-centered data
note: numpy's `cov` uses N-1 as normalization
"""
return dot(X.T, X) / X.shape[0]
# N = data.shape[1]
# C = empty((N, N))
# for j in range(N):
# C[j, j] = mean(data[:, j] * data[:, j])
# for k in range(j + 1, N):
# C[j, k] = C[k, j] = mean(data[:, j] * data[:, k])
# return C
def pca(data, pc_count = None):
"""
Principal component analysis using eigenvalues
note: this mean-centers and auto-scales the data (in-place)
"""
data -= mean(data, 0)
data /= std(data, 0)
C = cov(data)
E, V = eigh(C)
key = argsort(E)[::-1][:pc_count]
E, V = E[key], V[:, key]
U = dot(data, V) # used to be dot(V.T, data.T).T
return U, E, V
""" test data """
# data = array([randn(8) for k in range(150)])
data = my_data1 # Using my own data
data[:50, 2:4] += 5
data[50:, 2:5] += 5
""" visualize """
trans = pca(data, 3)[0]
fig, (ax1, ax2) = subplots(1, 2)
ax1.scatter(data[:50, 0], data[:50, 1], c = 'r')
ax1.scatter(data[50:, 0], data[50:, 1], c = 'b')
ax2.scatter(trans[:50, 0], trans[:50, 1], c = 'r')
ax2.scatter(trans[50:, 0], trans[50:, 1], c = 'b')
show()
什么
data[:50, 2:4] += 5
data[50:, 2:5] += 5
做吗?
我试着用
替换这两行
data = [data[:50, 2:4] += 5]
data = [data[50:, 2:5] += 5]
基于,返回
File "<ipython-input-296-5d80e1852b4e>", line 42
data = [data[:50, 2:4] += 5]
^
SyntaxError: invalid syntax
感谢任何建议!
如果data
是二维数值数组,
data[:50, 2:4]
选择数组的一个切片(技术上是 view
),并且
data[:50, 2:4] += 5
将 5 添加到该切片的所有元素 - 并修改 data
.
但是
TypeError: can only concatenate list (not "int") to list
表示 data
包含一个或多个列表,而不仅仅是数字。对于列表,+
不是(数学)加法,而是 join/concatenate。 [1,2,3]+[4]
.
关于你的第二次尝试:
[data[:50, 2:4] += 5]
与
非常不同
[i + 1]
[]
列一个清单。您不能在列表中执行赋值,例如 =
或 +=
。因此出现语法错误。
您显示的 (3475, 29) 数组将 dtype
列为 object
。这是一个强有力的指标,表明它包含数字以外的东西(或除了数字之外)。根据错误,那必须是一个列表。
所以你需要清理mydata
。
我试图按照 this post 的建议解决方案在我的数据集上安装 PCA。该代码适用于形状为 (150, 8)
的 iris
数据,如下所示:
array([[ 1.7837721 , -1.23464679, 4.27808537, ..., 0.63061657,
-1.79849625, -1.41574397],
[-0.35396307, -0.13400175, 3.91751182, ..., -0.58928302,
-0.15735542, -0.99157312],
[-0.20380491, -1.06074392, 4.65814864, ..., 2.19686369,
0.14920164, 2.33371106],
...,
[-1.05079672, 1.46836264, 5.41970214, ..., 0.32847349,
0.27133141, 1.01266607],
[ 0.19569856, 0.57739573, 3.84749973, ..., 0.02400556,
-0.08193678, 0.51223263],
[ 0.04905765, 0.66314259, 6.22608157, ..., 0.60076934,
-0.56890579, -0.23642103]])
但是,使用我的形状 (3475, 29)
的数据时发现错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-292-5661ffbde57b> in <module>
38 # data = array([randn(8) for k in range(150)])
39 data[:50, 2:4] += 5
---> 40 data[50:, 2:5] += 5
41
42 """ visualize """
TypeError: can only concatenate list (not "int") to list
我的数据(形状 (3475, 29)
)如下所示:
array([[58.5, 27.0, 88.5, ..., nan, 0.0, -3.0],
[58.5, 27.0, 88.5, ..., nan, 0.0, -3.0],
[47.0, 45.0, 92.0, ..., 1.6, -0.649519052838329,
-1.1249999999999998],
...,
[46.0, 44.5, 98.0, ..., 2.5, 0.0, -1.3],
[46.0, 40.0, 98.0, ..., 2.5, 0.0, -1.3],
[46.5, 44.5, 76.5, ..., 17.767857142857142, -0.4788774197473401,
-1.4219984343829701]], dtype=object)
使用的代码:
# SO - doug - my data
from numpy import array, dot, mean, std, empty, argsort
from numpy.linalg import eigh, solve
from numpy.random import randn
from matplotlib.pyplot import subplots, show
def cov(X):
"""
Covariance matrix
note: specifically for mean-centered data
note: numpy's `cov` uses N-1 as normalization
"""
return dot(X.T, X) / X.shape[0]
# N = data.shape[1]
# C = empty((N, N))
# for j in range(N):
# C[j, j] = mean(data[:, j] * data[:, j])
# for k in range(j + 1, N):
# C[j, k] = C[k, j] = mean(data[:, j] * data[:, k])
# return C
def pca(data, pc_count = None):
"""
Principal component analysis using eigenvalues
note: this mean-centers and auto-scales the data (in-place)
"""
data -= mean(data, 0)
data /= std(data, 0)
C = cov(data)
E, V = eigh(C)
key = argsort(E)[::-1][:pc_count]
E, V = E[key], V[:, key]
U = dot(data, V) # used to be dot(V.T, data.T).T
return U, E, V
""" test data """
# data = array([randn(8) for k in range(150)])
data = my_data1 # Using my own data
data[:50, 2:4] += 5
data[50:, 2:5] += 5
""" visualize """
trans = pca(data, 3)[0]
fig, (ax1, ax2) = subplots(1, 2)
ax1.scatter(data[:50, 0], data[:50, 1], c = 'r')
ax1.scatter(data[50:, 0], data[50:, 1], c = 'b')
ax2.scatter(trans[:50, 0], trans[:50, 1], c = 'r')
ax2.scatter(trans[50:, 0], trans[50:, 1], c = 'b')
show()
什么
data[:50, 2:4] += 5
data[50:, 2:5] += 5
做吗?
我试着用
替换这两行data = [data[:50, 2:4] += 5]
data = [data[50:, 2:5] += 5]
基于
File "<ipython-input-296-5d80e1852b4e>", line 42
data = [data[:50, 2:4] += 5]
^
SyntaxError: invalid syntax
感谢任何建议!
如果data
是二维数值数组,
data[:50, 2:4]
选择数组的一个切片(技术上是 view
),并且
data[:50, 2:4] += 5
将 5 添加到该切片的所有元素 - 并修改 data
.
但是
TypeError: can only concatenate list (not "int") to list
表示 data
包含一个或多个列表,而不仅仅是数字。对于列表,+
不是(数学)加法,而是 join/concatenate。 [1,2,3]+[4]
.
关于你的第二次尝试:
[data[:50, 2:4] += 5]
与
非常不同[i + 1]
[]
列一个清单。您不能在列表中执行赋值,例如 =
或 +=
。因此出现语法错误。
您显示的 (3475, 29) 数组将 dtype
列为 object
。这是一个强有力的指标,表明它包含数字以外的东西(或除了数字之外)。根据错误,那必须是一个列表。
所以你需要清理mydata
。