如何使用 pandas 将两个 CSV 文档合并在一起并绘制条形图?
How to merge two CSV documents together using pandas and plot a bar graph?
数据 1:shanghaiData.csv
数据 2:timesData.csv
我刚刚开始学习使用来自 CSV 和可视化的数据。我正在尝试合并两个 CSV 表,然后在特定大学的条形图上绘制两个世界排名。
我的代码是:
import pandas as pd
import matplotlib
times_df = pd.read_csv("timesData.csv")
shanghai_df = pd.read_csv("shanghaiData.csv")
combined = times_df.merge(shanghai_df, on='university_name', how='left')
combined = combined.loc[combined['university_name']=='Harvard University']
combined.plot(
kind = 'bar',
x = 'year_y',
y = ['world_rank_x', 'world_rank_y']
)
TypeError Traceback (most recent call last)
<ipython-input-1-e360afadaed4> in <module>
3 combined.loc[combined['university_name']=='Harvard University']
4 combined = combined.loc[combined['university_name']=='Harvard University']
----> 5 combined.plot(
6 kind = 'bar',
7 x = 'year_y',
/usr/lib/python3.9/site-packages/pandas/plotting/_core.py in __call__(self, *args, **kwargs)
970 data.columns = label_name
971
--> 972 return plot_backend.plot(data, kind=kind, **kwargs)
973
974 __call__.__doc__ = __doc__
/usr/lib/python3.9/site-packages/pandas/plotting/_matplotlib/__init__.py in plot(data, kind, **kwargs)
69 kwargs["ax"] = getattr(ax, "left_ax", ax)
70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
72 plot_obj.draw()
73 return plot_obj.result
/usr/lib/python3.9/site-packages/pandas/plotting/_matplotlib/core.py in generate(self)
284 def generate(self):
285 self._args_adjust()
--> 286 self._compute_plot_data()
287 self._setup_subplots()
288 self._make_plot()
/usr/lib/python3.9/site-packages/pandas/plotting/_matplotlib/core.py in _compute_plot_data(self)
451 # no non-numeric frames or series allowed
452 if is_empty:
--> 453 raise TypeError("no numeric data to plot")
454
455 self.data = numeric_data.apply(self._convert_to_ndarray)
TypeError: no numeric data to plot
如果我尝试将年份转换为整数,我仍然会遇到错误。
combined['year_y'] = combined['year_y'].astype(int)
我不确定你的输出以及为什么要绘制条形图,但我更改了你的代码并更正了它。
更改这行代码:
import matplotlib.pyplot as plt
plt.bar(combined['year_y'], combined['world_rank_x'])
最后:
import pandas as pd
import matplotlib.pyplot as plt
times_df = pd.read_csv("timesData.csv")
shanghai_df = pd.read_csv("shanghaiData.csv")
combined = times_df.merge(shanghai_df, on='university_name', how='left')
combined = combined.loc[combined['university_name']=='Harvard University']
plt.bar(combined['year_y'], combined['world_rank_x'])
如果你想在一个图中有两个条形图,你可以使用这个代码:
import numpy as np
import matplotlib.pyplot as plt
barWidth = 0.25
fig = plt.subplots(figsize =(12, 8))
world_rank_x = [2, 2, 2, 2, 2]
world_rank_y = [6, 6, 6, 6, 6]
br1 = np.arange(len(world_rank_x))
br2 = [x + barWidth for x in br1]
plt.bar(br1, world_rank_x, color ='r', width = barWidth,
edgecolor ='grey', label ='world_rank_x')
plt.bar(br2, world_rank_y, color ='g', width = barWidth,
edgecolor ='grey', label ='world_rank_y')
plt.xlabel('Year', fontweight ='bold', fontsize = 15)
plt.ylabel('world rank', fontweight ='bold', fontsize = 15)
plt.xticks([r+(barWidth)/2 for r in range(len(world_rank_x))],
['2015', '2016', '2017', '2018', '2019'])
plt.legend()
plt.show()
输出:
数据 1:shanghaiData.csv
数据 2:timesData.csv
我刚刚开始学习使用来自 CSV 和可视化的数据。我正在尝试合并两个 CSV 表,然后在特定大学的条形图上绘制两个世界排名。
我的代码是:
import pandas as pd
import matplotlib
times_df = pd.read_csv("timesData.csv")
shanghai_df = pd.read_csv("shanghaiData.csv")
combined = times_df.merge(shanghai_df, on='university_name', how='left')
combined = combined.loc[combined['university_name']=='Harvard University']
combined.plot(
kind = 'bar',
x = 'year_y',
y = ['world_rank_x', 'world_rank_y']
)
TypeError Traceback (most recent call last)
<ipython-input-1-e360afadaed4> in <module>
3 combined.loc[combined['university_name']=='Harvard University']
4 combined = combined.loc[combined['university_name']=='Harvard University']
----> 5 combined.plot(
6 kind = 'bar',
7 x = 'year_y',
/usr/lib/python3.9/site-packages/pandas/plotting/_core.py in __call__(self, *args, **kwargs)
970 data.columns = label_name
971
--> 972 return plot_backend.plot(data, kind=kind, **kwargs)
973
974 __call__.__doc__ = __doc__
/usr/lib/python3.9/site-packages/pandas/plotting/_matplotlib/__init__.py in plot(data, kind, **kwargs)
69 kwargs["ax"] = getattr(ax, "left_ax", ax)
70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
72 plot_obj.draw()
73 return plot_obj.result
/usr/lib/python3.9/site-packages/pandas/plotting/_matplotlib/core.py in generate(self)
284 def generate(self):
285 self._args_adjust()
--> 286 self._compute_plot_data()
287 self._setup_subplots()
288 self._make_plot()
/usr/lib/python3.9/site-packages/pandas/plotting/_matplotlib/core.py in _compute_plot_data(self)
451 # no non-numeric frames or series allowed
452 if is_empty:
--> 453 raise TypeError("no numeric data to plot")
454
455 self.data = numeric_data.apply(self._convert_to_ndarray)
TypeError: no numeric data to plot
如果我尝试将年份转换为整数,我仍然会遇到错误。
combined['year_y'] = combined['year_y'].astype(int)
我不确定你的输出以及为什么要绘制条形图,但我更改了你的代码并更正了它。 更改这行代码:
import matplotlib.pyplot as plt
plt.bar(combined['year_y'], combined['world_rank_x'])
最后:
import pandas as pd
import matplotlib.pyplot as plt
times_df = pd.read_csv("timesData.csv")
shanghai_df = pd.read_csv("shanghaiData.csv")
combined = times_df.merge(shanghai_df, on='university_name', how='left')
combined = combined.loc[combined['university_name']=='Harvard University']
plt.bar(combined['year_y'], combined['world_rank_x'])
如果你想在一个图中有两个条形图,你可以使用这个代码:
import numpy as np
import matplotlib.pyplot as plt
barWidth = 0.25
fig = plt.subplots(figsize =(12, 8))
world_rank_x = [2, 2, 2, 2, 2]
world_rank_y = [6, 6, 6, 6, 6]
br1 = np.arange(len(world_rank_x))
br2 = [x + barWidth for x in br1]
plt.bar(br1, world_rank_x, color ='r', width = barWidth,
edgecolor ='grey', label ='world_rank_x')
plt.bar(br2, world_rank_y, color ='g', width = barWidth,
edgecolor ='grey', label ='world_rank_y')
plt.xlabel('Year', fontweight ='bold', fontsize = 15)
plt.ylabel('world rank', fontweight ='bold', fontsize = 15)
plt.xticks([r+(barWidth)/2 for r in range(len(world_rank_x))],
['2015', '2016', '2017', '2018', '2019'])
plt.legend()
plt.show()
输出: