在 Python 中为 PCA 在 y 轴上绘制方差标度
Plotting variance scale on y-axis for PCA in Python
我正在尝试进行 PCA 分析,但无法在 y 轴上正确绘制方差。
我有数据,我为你导出的
{1: {0: 242.0, 1: 290.0, 2: 340.0, 3: 363.0, 4: 430.0, 5: 450.0, 6: 500.0, 7: 390.0, 8: 450.0, 9: 500.0, 10: 475.0, 11: 500.0, 12: 500.0, 13: 600.0, 14: 600.0, 15: 700.0, 16: 700.0, 17: 610.0, 18: 650.0, 19: 575.0, 20: 685.0, 21: 620.0, 22: 680.0, 23: 700.0, 24: 725.0, 25: 720.0, 26: 714.0, 27: 850.0, 28: 1000.0, 29: 920.0, 30: 955.0, 31: 925.0, 32: 975.0, 33: 950.0, 34: 40.0, 35: 69.0, 36: 78.0, 37: 87.0, 38: 120.0, 39: 0.0, 40: 110.0, 41: 120.0, 42: 150.0, 43: 145.0, 44: 160.0, 45: 140.0, 46: 160.0, 47: 169.0, 48: 161.0, 49: 200.0, 50: 180.0, 51: 290.0, 52: 272.0, 53: 390.0, 54: 6.7, 55: 7.5, 56: 7.0, 57: 9.7, 58: 9.8, 59: 8.7, 60: 10.0, 61: 9.9, 62: 9.8, 63: 12.2, 64: 13.4, 65: 12.2, 66: 19.7, 67: 19.9, 68: 200.0, 69: 300.0, 70: 300.0, 71: 300.0, 72: 430.0, 73: 345.0, 74: 456.0, 75: 510.0, 76: 540.0, 77: 500.0, 78: 567.0, 79: 770.0, 80: 950.0, 81: 1250.0, 82: 1600.0, 83: 1550.0, 84: 1650.0}, 2: {0: 23.2, 1: 24.0, 2: 23.9, 3: 26.3, 4: 26.5, 5: 26.8, 6: 26.8, 7: 27.6, 8: 27.6, 9: 28.5, 10: 28.4, 11: 28.7, 12: 29.1, 13: 29.4, 14: 29.4, 15: 30.4, 16: 30.4, 17: 30.9, 18: 31.0, 19: 31.3, 20: 31.4, 21: 31.5, 22: 31.8, 23: 31.9, 24: 31.8, 25: 32.0, 26: 32.7, 27: 32.8, 28: 33.5, 29: 35.0, 30: 35.0, 31: 36.2, 32: 37.4, 33: 38.0, 34: 12.9, 35: 16.5, 36: 17.5, 37: 18.2, 38: 18.6, 39: 19.0, 40: 19.1, 41: 19.4, 42: 20.4, 43: 20.5, 44: 20.5, 45: 21.0, 46: 21.1, 47: 22.0, 48: 22.0, 49: 22.1, 50: 23.6, 51: 24.0, 52: 25.0, 53: 29.5, 54: 9.3, 55: 10.0, 56: 10.1, 57: 10.4, 58: 10.7, 59: 10.8, 60: 11.3, 61: 11.3, 62: 11.4, 63: 11.5, 64: 11.7, 65: 12.1, 66: 13.2, 67: 13.8, 68: 30.0, 69: 31.7, 70: 32.7, 71: 34.8, 72: 35.5, 73: 36.0, 74: 40.0, 75: 40.0, 76: 40.1, 77: 42.0, 78: 43.2, 79: 44.8, 80: 48.3, 81: 52.0, 82: 56.0, 83: 56.0, 84: 59.0}, 3: {0: 25.4, 1: 26.3, 2: 26.5, 3: 29.0, 4: 29.0, 5: 29.7, 6: 29.7, 7: 30.0, 8: 30.0, 9: 30.7, 10: 31.0, 11: 31.0, 12: 31.5, 13: 32.0, 14: 32.0, 15: 33.0, 16: 33.0, 17: 33.5, 18: 33.5, 19: 34.0, 20: 34.0, 21: 34.5, 22: 35.0, 23: 35.0, 24: 35.0, 25: 35.0, 26: 36.0, 27: 36.0, 28: 37.0, 29: 38.5, 30: 38.5, 31: 39.5, 32: 41.0, 33: 41.0, 34: 14.1, 35: 18.2, 36: 18.8, 37: 19.8, 38: 20.0, 39: 20.5, 40: 20.8, 41: 21.0, 42: 22.0, 43: 22.0, 44: 22.5, 45: 22.5, 46: 22.5, 47: 24.0, 48: 23.4, 49: 23.5, 50: 25.2, 51: 26.0, 52: 27.0, 53: 31.7, 54: 9.8, 55: 10.5, 56: 10.6, 57: 11.0, 58: 11.2, 59: 11.3, 60: 11.8, 61: 11.8, 62: 12.0, 63: 12.2, 64: 12.4, 65: 13.0, 66: 14.3, 67: 15.0, 68: 32.3, 69: 34.0, 70: 35.0, 71: 37.3, 72: 38.0, 73: 38.5, 74: 42.5, 75: 42.5, 76: 43.0, 77: 45.0, 78: 46.0, 79: 48.0, 80: 51.7, 81: 56.0, 82: 60.0, 83: 60.0, 84: 63.4}, 4: {0: 30.0, 1: 31.2, 2: 31.1, 3: 33.5, 4: 34.0, 5: 34.7, 6: 34.5, 7: 35.0, 8: 35.1, 9: 36.2, 10: 36.2, 11: 36.2, 12: 36.4, 13: 37.2, 14: 37.2, 15: 38.3, 16: 38.5, 17: 38.6, 18: 38.7, 19: 39.5, 20: 39.2, 21: 39.7, 22: 40.6, 23: 40.5, 24: 40.9, 25: 40.6, 26: 41.5, 27: 41.6, 28: 42.6, 29: 44.1, 30: 44.0, 31: 45.3, 32: 45.9, 33: 46.5, 34: 16.2, 35: 20.3, 36: 21.2, 37: 22.2, 38: 22.2, 39: 22.8, 40: 23.1, 41: 23.7, 42: 24.7, 43: 24.3, 44: 25.3, 45: 25.0, 46: 25.0, 47: 27.2, 48: 26.7, 49: 26.8, 50: 27.9, 51: 29.2, 52: 30.6, 53: 35.0, 54: 10.8, 55: 11.6, 56: 11.6, 57: 12.0, 58: 12.4, 59: 12.6, 60: 13.1, 61: 13.1, 62: 13.2, 63: 13.4, 64: 13.5, 65: 13.8, 66: 15.2, 67: 16.2, 68: 34.8, 69: 37.8, 70: 38.8, 71: 39.8, 72: 40.5, 73: 41.0, 74: 45.5, 75: 45.5, 76: 45.8, 77: 48.0, 78: 48.7, 79: 51.2, 80: 55.1, 81: 59.7, 82: 64.0, 83: 64.0, 84: 68.0}, 5: {0: 38.4, 1: 40.0, 2: 39.8, 3: 38.0, 4: 36.6, 5: 39.2, 6: 41.1, 7: 36.2, 8: 39.9, 9: 39.3, 10: 39.4, 11: 39.7, 12: 37.8, 13: 40.2, 14: 41.5, 15: 38.8, 16: 38.8, 17: 40.5, 18: 37.4, 19: 38.3, 20: 40.8, 21: 39.1, 22: 38.1, 23: 40.1, 24: 40.0, 25: 40.3, 26: 39.8, 27: 40.6, 28: 44.5, 29: 40.9, 30: 41.1, 31: 41.4, 32: 40.6, 33: 37.9, 34: 25.6, 35: 26.1, 36: 26.3, 37: 25.3, 38: 28.0, 39: 28.4, 40: 26.7, 41: 25.8, 42: 23.5, 43: 27.3, 44: 27.8, 45: 26.2, 46: 25.6, 47: 27.7, 48: 25.9, 49: 27.6, 50: 25.4, 51: 30.4, 52: 28.0, 53: 27.1, 54: 16.1, 55: 17.0, 56: 14.9, 57: 18.3, 58: 16.8, 59: 15.7, 60: 16.9, 61: 16.9, 62: 16.7, 63: 15.6, 64: 18.0, 65: 16.5, 66: 18.9, 67: 18.1, 68: 16.0, 69: 15.1, 70: 15.3, 71: 15.8, 72: 18.0, 73: 15.6, 74: 16.0, 75: 15.0, 76: 17.0, 77: 14.5, 78: 16.0, 79: 15.0, 80: 16.2, 81: 17.9, 82: 15.0, 83: 15.0, 84: 15.9}, 6: {0: 13.4, 1: 13.8, 2: 15.1, 3: 13.3, 4: 15.1, 5: 14.2, 6: 15.3, 7: 13.4, 8: 13.8, 9: 13.7, 10: 14.1, 11: 13.3, 12: 12.0, 13: 13.9, 14: 15.0, 15: 13.8, 16: 13.5, 17: 13.3, 18: 14.8, 19: 14.1, 20: 13.7, 21: 13.3, 22: 15.1, 23: 13.8, 24: 14.8, 25: 15.0, 26: 14.1, 27: 14.9, 28: 15.5, 29: 14.3, 30: 14.3, 31: 14.9, 32: 14.7, 33: 13.7, 34: 14.0, 35: 13.9, 36: 13.7, 37: 14.3, 38: 16.1, 39: 14.7, 40: 14.7, 41: 13.9, 42: 15.2, 43: 14.6, 44: 15.1, 45: 13.3, 46: 15.2, 47: 14.1, 48: 13.6, 49: 15.4, 50: 14.0, 51: 15.4, 52: 15.6, 53: 15.3, 54: 9.7, 55: 10.0, 56: 9.9, 57: 11.5, 58: 10.3, 59: 10.2, 60: 9.8, 61: 8.9, 62: 8.7, 63: 10.4, 64: 9.4, 65: 9.1, 66: 13.6, 67: 11.6, 68: 9.7, 69: 11.0, 70: 11.3, 71: 10.1, 72: 11.3, 73: 9.7, 74: 9.5, 75: 9.8, 76: 11.2, 77: 10.2, 78: 10.0, 79: 10.5, 80: 11.2, 81: 11.7, 82: 9.6, 83: 9.6, 84: 11.0}}
导入库
import pandas as pd
import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
上面给出的数据,但这是代码
fishes = pd.read_csv("fish.csv", header=None, index_col=False, skiprows=1, usecols=range(1,7))
fishes.head()
创建定标器
scaler = StandardScaler()
创建 PCA 实例
pca = PCA()
创建管道
pipeline = make_pipeline(scaler, pca)
将管道安装到 'samples'
pipeline.fit(fishes)
绘制解释方差
features = range(pca.n_components_)
plt.bar(features, pca.explained_variance_)
plt.xlabel('PCA feature')
plt.ylabel('variance')
plt.xticks(features)
plt.show()
我现在的输出是这样的,没有意义。
如果我没理解错的话,Y 轴上的 PCA 方差应该是 100%。我的前三个因素并不能用这样的规模解释太多。就算这里的1=10%,加起来还是不是100%
是我做错了什么(不太可能)还是我需要手动调整 y 轴的比例?我的错误在哪里?谢谢。
而不是绘制 pca.explained_variance_,尝试绘制:
pca.explained_variance_ratio_
总和为 1。方差解释总和为 100%,但值取决于您的数据,除非您将它们表示为比率。
我正在尝试进行 PCA 分析,但无法在 y 轴上正确绘制方差。
我有数据,我为你导出的
{1: {0: 242.0, 1: 290.0, 2: 340.0, 3: 363.0, 4: 430.0, 5: 450.0, 6: 500.0, 7: 390.0, 8: 450.0, 9: 500.0, 10: 475.0, 11: 500.0, 12: 500.0, 13: 600.0, 14: 600.0, 15: 700.0, 16: 700.0, 17: 610.0, 18: 650.0, 19: 575.0, 20: 685.0, 21: 620.0, 22: 680.0, 23: 700.0, 24: 725.0, 25: 720.0, 26: 714.0, 27: 850.0, 28: 1000.0, 29: 920.0, 30: 955.0, 31: 925.0, 32: 975.0, 33: 950.0, 34: 40.0, 35: 69.0, 36: 78.0, 37: 87.0, 38: 120.0, 39: 0.0, 40: 110.0, 41: 120.0, 42: 150.0, 43: 145.0, 44: 160.0, 45: 140.0, 46: 160.0, 47: 169.0, 48: 161.0, 49: 200.0, 50: 180.0, 51: 290.0, 52: 272.0, 53: 390.0, 54: 6.7, 55: 7.5, 56: 7.0, 57: 9.7, 58: 9.8, 59: 8.7, 60: 10.0, 61: 9.9, 62: 9.8, 63: 12.2, 64: 13.4, 65: 12.2, 66: 19.7, 67: 19.9, 68: 200.0, 69: 300.0, 70: 300.0, 71: 300.0, 72: 430.0, 73: 345.0, 74: 456.0, 75: 510.0, 76: 540.0, 77: 500.0, 78: 567.0, 79: 770.0, 80: 950.0, 81: 1250.0, 82: 1600.0, 83: 1550.0, 84: 1650.0}, 2: {0: 23.2, 1: 24.0, 2: 23.9, 3: 26.3, 4: 26.5, 5: 26.8, 6: 26.8, 7: 27.6, 8: 27.6, 9: 28.5, 10: 28.4, 11: 28.7, 12: 29.1, 13: 29.4, 14: 29.4, 15: 30.4, 16: 30.4, 17: 30.9, 18: 31.0, 19: 31.3, 20: 31.4, 21: 31.5, 22: 31.8, 23: 31.9, 24: 31.8, 25: 32.0, 26: 32.7, 27: 32.8, 28: 33.5, 29: 35.0, 30: 35.0, 31: 36.2, 32: 37.4, 33: 38.0, 34: 12.9, 35: 16.5, 36: 17.5, 37: 18.2, 38: 18.6, 39: 19.0, 40: 19.1, 41: 19.4, 42: 20.4, 43: 20.5, 44: 20.5, 45: 21.0, 46: 21.1, 47: 22.0, 48: 22.0, 49: 22.1, 50: 23.6, 51: 24.0, 52: 25.0, 53: 29.5, 54: 9.3, 55: 10.0, 56: 10.1, 57: 10.4, 58: 10.7, 59: 10.8, 60: 11.3, 61: 11.3, 62: 11.4, 63: 11.5, 64: 11.7, 65: 12.1, 66: 13.2, 67: 13.8, 68: 30.0, 69: 31.7, 70: 32.7, 71: 34.8, 72: 35.5, 73: 36.0, 74: 40.0, 75: 40.0, 76: 40.1, 77: 42.0, 78: 43.2, 79: 44.8, 80: 48.3, 81: 52.0, 82: 56.0, 83: 56.0, 84: 59.0}, 3: {0: 25.4, 1: 26.3, 2: 26.5, 3: 29.0, 4: 29.0, 5: 29.7, 6: 29.7, 7: 30.0, 8: 30.0, 9: 30.7, 10: 31.0, 11: 31.0, 12: 31.5, 13: 32.0, 14: 32.0, 15: 33.0, 16: 33.0, 17: 33.5, 18: 33.5, 19: 34.0, 20: 34.0, 21: 34.5, 22: 35.0, 23: 35.0, 24: 35.0, 25: 35.0, 26: 36.0, 27: 36.0, 28: 37.0, 29: 38.5, 30: 38.5, 31: 39.5, 32: 41.0, 33: 41.0, 34: 14.1, 35: 18.2, 36: 18.8, 37: 19.8, 38: 20.0, 39: 20.5, 40: 20.8, 41: 21.0, 42: 22.0, 43: 22.0, 44: 22.5, 45: 22.5, 46: 22.5, 47: 24.0, 48: 23.4, 49: 23.5, 50: 25.2, 51: 26.0, 52: 27.0, 53: 31.7, 54: 9.8, 55: 10.5, 56: 10.6, 57: 11.0, 58: 11.2, 59: 11.3, 60: 11.8, 61: 11.8, 62: 12.0, 63: 12.2, 64: 12.4, 65: 13.0, 66: 14.3, 67: 15.0, 68: 32.3, 69: 34.0, 70: 35.0, 71: 37.3, 72: 38.0, 73: 38.5, 74: 42.5, 75: 42.5, 76: 43.0, 77: 45.0, 78: 46.0, 79: 48.0, 80: 51.7, 81: 56.0, 82: 60.0, 83: 60.0, 84: 63.4}, 4: {0: 30.0, 1: 31.2, 2: 31.1, 3: 33.5, 4: 34.0, 5: 34.7, 6: 34.5, 7: 35.0, 8: 35.1, 9: 36.2, 10: 36.2, 11: 36.2, 12: 36.4, 13: 37.2, 14: 37.2, 15: 38.3, 16: 38.5, 17: 38.6, 18: 38.7, 19: 39.5, 20: 39.2, 21: 39.7, 22: 40.6, 23: 40.5, 24: 40.9, 25: 40.6, 26: 41.5, 27: 41.6, 28: 42.6, 29: 44.1, 30: 44.0, 31: 45.3, 32: 45.9, 33: 46.5, 34: 16.2, 35: 20.3, 36: 21.2, 37: 22.2, 38: 22.2, 39: 22.8, 40: 23.1, 41: 23.7, 42: 24.7, 43: 24.3, 44: 25.3, 45: 25.0, 46: 25.0, 47: 27.2, 48: 26.7, 49: 26.8, 50: 27.9, 51: 29.2, 52: 30.6, 53: 35.0, 54: 10.8, 55: 11.6, 56: 11.6, 57: 12.0, 58: 12.4, 59: 12.6, 60: 13.1, 61: 13.1, 62: 13.2, 63: 13.4, 64: 13.5, 65: 13.8, 66: 15.2, 67: 16.2, 68: 34.8, 69: 37.8, 70: 38.8, 71: 39.8, 72: 40.5, 73: 41.0, 74: 45.5, 75: 45.5, 76: 45.8, 77: 48.0, 78: 48.7, 79: 51.2, 80: 55.1, 81: 59.7, 82: 64.0, 83: 64.0, 84: 68.0}, 5: {0: 38.4, 1: 40.0, 2: 39.8, 3: 38.0, 4: 36.6, 5: 39.2, 6: 41.1, 7: 36.2, 8: 39.9, 9: 39.3, 10: 39.4, 11: 39.7, 12: 37.8, 13: 40.2, 14: 41.5, 15: 38.8, 16: 38.8, 17: 40.5, 18: 37.4, 19: 38.3, 20: 40.8, 21: 39.1, 22: 38.1, 23: 40.1, 24: 40.0, 25: 40.3, 26: 39.8, 27: 40.6, 28: 44.5, 29: 40.9, 30: 41.1, 31: 41.4, 32: 40.6, 33: 37.9, 34: 25.6, 35: 26.1, 36: 26.3, 37: 25.3, 38: 28.0, 39: 28.4, 40: 26.7, 41: 25.8, 42: 23.5, 43: 27.3, 44: 27.8, 45: 26.2, 46: 25.6, 47: 27.7, 48: 25.9, 49: 27.6, 50: 25.4, 51: 30.4, 52: 28.0, 53: 27.1, 54: 16.1, 55: 17.0, 56: 14.9, 57: 18.3, 58: 16.8, 59: 15.7, 60: 16.9, 61: 16.9, 62: 16.7, 63: 15.6, 64: 18.0, 65: 16.5, 66: 18.9, 67: 18.1, 68: 16.0, 69: 15.1, 70: 15.3, 71: 15.8, 72: 18.0, 73: 15.6, 74: 16.0, 75: 15.0, 76: 17.0, 77: 14.5, 78: 16.0, 79: 15.0, 80: 16.2, 81: 17.9, 82: 15.0, 83: 15.0, 84: 15.9}, 6: {0: 13.4, 1: 13.8, 2: 15.1, 3: 13.3, 4: 15.1, 5: 14.2, 6: 15.3, 7: 13.4, 8: 13.8, 9: 13.7, 10: 14.1, 11: 13.3, 12: 12.0, 13: 13.9, 14: 15.0, 15: 13.8, 16: 13.5, 17: 13.3, 18: 14.8, 19: 14.1, 20: 13.7, 21: 13.3, 22: 15.1, 23: 13.8, 24: 14.8, 25: 15.0, 26: 14.1, 27: 14.9, 28: 15.5, 29: 14.3, 30: 14.3, 31: 14.9, 32: 14.7, 33: 13.7, 34: 14.0, 35: 13.9, 36: 13.7, 37: 14.3, 38: 16.1, 39: 14.7, 40: 14.7, 41: 13.9, 42: 15.2, 43: 14.6, 44: 15.1, 45: 13.3, 46: 15.2, 47: 14.1, 48: 13.6, 49: 15.4, 50: 14.0, 51: 15.4, 52: 15.6, 53: 15.3, 54: 9.7, 55: 10.0, 56: 9.9, 57: 11.5, 58: 10.3, 59: 10.2, 60: 9.8, 61: 8.9, 62: 8.7, 63: 10.4, 64: 9.4, 65: 9.1, 66: 13.6, 67: 11.6, 68: 9.7, 69: 11.0, 70: 11.3, 71: 10.1, 72: 11.3, 73: 9.7, 74: 9.5, 75: 9.8, 76: 11.2, 77: 10.2, 78: 10.0, 79: 10.5, 80: 11.2, 81: 11.7, 82: 9.6, 83: 9.6, 84: 11.0}}
导入库
import pandas as pd
import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
上面给出的数据,但这是代码
fishes = pd.read_csv("fish.csv", header=None, index_col=False, skiprows=1, usecols=range(1,7))
fishes.head()
创建定标器
scaler = StandardScaler()
创建 PCA 实例
pca = PCA()
创建管道
pipeline = make_pipeline(scaler, pca)
将管道安装到 'samples'
pipeline.fit(fishes)
绘制解释方差
features = range(pca.n_components_)
plt.bar(features, pca.explained_variance_)
plt.xlabel('PCA feature')
plt.ylabel('variance')
plt.xticks(features)
plt.show()
我现在的输出是这样的,没有意义。
如果我没理解错的话,Y 轴上的 PCA 方差应该是 100%。我的前三个因素并不能用这样的规模解释太多。就算这里的1=10%,加起来还是不是100%
是我做错了什么(不太可能)还是我需要手动调整 y 轴的比例?我的错误在哪里?谢谢。
而不是绘制 pca.explained_variance_,尝试绘制:
pca.explained_variance_ratio_
总和为 1。方差解释总和为 100%,但值取决于您的数据,除非您将它们表示为比率。