运行 MatplotLib Python 代码导入错误

Running MatplotLib Python Code ImportErrors

我正在尝试 运行 Randy Olson 的代码 - 授予女性学士学位的百分比。 http://www.randalolson.com/2014/06/28/how-to-make-beautiful-data-visualizations-in-python-with-matplotlib/

完整代码(由 Randy Olson 编写,显然不是我):

from pandas import read_csv  

# Read the data into a pandas DataFrame.  
gender_degree_data = read_csv("http://www.randalolson.com/wp-content/uploads/percent-bachelors-degrees-women-usa.csv")  

# These are the "Tableau 20" colors as RGB.  
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),  
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),  
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),  
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),  
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]  

# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.  
for i in range(len(tableau20)):  
    r, g, b = tableau20[i]  
    tableau20[i] = (r / 255., g / 255., b / 255.)  

# You typically want your plot to be ~1.33x wider than tall. This plot is a rare  
# exception because of the number of lines being plotted on it.  
# Common sizes: (10, 7.5) and (12, 9)  
figure(figsize=(12, 14))  

# Remove the plot frame lines. They are unnecessary chartjunk.  
ax = subplot(111)  
ax.spines["top"].set_visible(False)  
ax.spines["bottom"].set_visible(False)  
ax.spines["right"].set_visible(False)  
ax.spines["left"].set_visible(False)  

# Ensure that the axis ticks only show up on the bottom and left of the plot.  
# Ticks on the right and top of the plot are generally unnecessary chartjunk.  
ax.get_xaxis().tick_bottom()  
ax.get_yaxis().tick_left()  

# Limit the range of the plot to only where the data is.  
# Avoid unnecessary whitespace.  
ylim(0, 90)  
xlim(1968, 2014)  

# Make sure your axis ticks are large enough to be easily read.  
# You don't want your viewers squinting to read your plot.  
yticks(range(0, 91, 10), [str(x) + "%" for x in range(0, 91, 10)], fontsize=14)  
xticks(fontsize=14)  

# Provide tick lines across the plot to help your viewers trace along  
# the axis ticks. Make sure that the lines are light and small so they  
# don't obscure the primary data lines.  
for y in range(10, 91, 10):  
    plot(range(1968, 2012), [y] * len(range(1968, 2012)), "--", lw=0.5, color="black", alpha=0.3)  

# Remove the tick marks; they are unnecessary with the tick lines we just plotted.  
plt.tick_params(axis="both", which="both", bottom="off", top="off",  
                labelbottom="on", left="off", right="off", labelleft="on")  

# Now that the plot is prepared, it's time to actually plot the data!  
# Note that I plotted the majors in order of the highest % in the final year.  
majors = ['Health Professions', 'Public Administration', 'Education', 'Psychology',  
          'Foreign Languages', 'English', 'Communications\nand Journalism',  
          'Art and Performance', 'Biology', 'Agriculture',  
          'Social Sciences and History', 'Business', 'Math and Statistics',  
          'Architecture', 'Physical Sciences', 'Computer Science',  
          'Engineering']  

for rank, column in enumerate(majors):  
    # Plot each line separately with its own color, using the Tableau 20  
    # color set in order.  
    plot(gender_degree_data.Year.values,  
            gender_degree_data[column.replace("\n", " ")].values,  
            lw=2.5, color=tableau20[rank])  

    # Add a text label to the right end of every line. Most of the code below  
    # is adding specific offsets y position because some labels overlapped.  
    y_pos = gender_degree_data[column.replace("\n", " ")].values[-1] - 0.5  
    if column == "Foreign Languages":  
        y_pos += 0.5  
    elif column == "English":  
        y_pos -= 0.5  
    elif column == "Communications\nand Journalism":  
        y_pos += 0.75  
    elif column == "Art and Performance":  
        y_pos -= 0.25  
    elif column == "Agriculture":  
        y_pos += 1.25  
    elif column == "Social Sciences and History":  
        y_pos += 0.25  
    elif column == "Business":  
        y_pos -= 0.75  
    elif column == "Math and Statistics":  
        y_pos += 0.75  
    elif column == "Architecture":  
        y_pos -= 0.75  
    elif column == "Computer Science":  
        y_pos += 0.75  
    elif column == "Engineering":  
        y_pos -= 0.25  

    # Again, make sure that all labels are large enough to be easily read  
    # by the viewer.  
    text(2011.5, y_pos, column, fontsize=14, color=tableau20[rank])  

# matplotlib's title() call centers the title on the plot, but not the graph,  
# so I used the text() call to customize where the title goes.  

# Make the title big enough so it spans the entire plot, but don't make it  
# so big that it requires two lines to show.  

# Note that if the title is descriptive enough, it is unnecessary to include  
# axis labels; they are self-evident, in this plot's case.  
text(1995, 93, "Percentage of Bachelor's degrees conferred to women in the U.S.A."  
       ", by major (1970-2012)", fontsize=17, ha="center")  

# Always include your data source(s) and copyright notice! And for your  
# data sources, tell your viewers exactly where the data came from,  
# preferably with a direct link to the data. Just telling your viewers  
# that you used data from the "U.S. Census Bureau" is completely useless:  
# the U.S. Census Bureau provides all kinds of data, so how are your  
# viewers supposed to know which data set you used?  
text(1966, -8, "Data source: nces.ed.gov/programs/digest/2013menu_tables.asp"  
       "\nAuthor: Randy Olson (randalolson.com / @randal_olson)"  
       "\nNote: Some majors are missing because the historical data "  
       "is not available for them", fontsize=10)  

# Finally, save the figure as a PNG.  
# You can also save it as a PDF, JPEG, etc.  
# Just change the file extension in this call.  
# bbox_inches="tight" removes all the extra whitespace on the edges of your plot.  
savefig("percent-bachelors-degrees-women-usa.png", bbox_inches="tight");  

我通过 Anaconda 安装 Python 时拥有所有依赖项。不过,我不确定如何通过 IPython Notebook 运行 它,希望我能解决这个问题。我在导入时遇到问题 我有:

from pandas import read_csv  
from matplotlib import *
from matplotlib.figure import figure

但我一直收到 TypeError: 'module' object is not callableImportError: cannot import name figure

我知道这是一个非常基本的 Python 问题,但我不确定该怎么做。我想要一个包含多条线并具有交互式悬停工具的线图,这似乎是我能找到的最好的例子。如果有人知道如何解决这个问题,或者甚至知道其他易于使用新数据操作的已编写交互式线图的示例,请告诉我!

编辑:

使用

from pandas import read_csv  
from matplotlib import *
from matplotlib.figure import Figure
import pandas

和相同的代码:

完整追溯

runfile('C:/Users/jbyrusb/Documents/Python Scripts/Disputes/WomenDegreesExample.py', wdir='C:/Users/jbyrusb/Documents/Python Scripts/Disputes')
Traceback (most recent call last):

  File "<ipython-input-30-1b99e15a9df1>", line 1, in <module>
    runfile('C:/Users/jbyrusb/Documents/Python Scripts/Disputes/WomenDegreesExample.py', wdir='C:/Users/jbyrusb/Documents/Python Scripts/Disputes')

  File "C:\Users\jbyrusb\AppData\Local\Continuum\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
    execfile(filename, namespace)

  File "C:\Users\jbyrusb\AppData\Local\Continuum\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 71, in execfile
    exec(compile(scripttext, filename, 'exec'), glob, loc)

  File "C:/Users/jbyrusb/Documents/Python Scripts/Disputes/WomenDegreesExample.py", line 33, in <module>
    figure(figsize=(12, 14))

TypeError: 'module' object is not callable

您的 link 中的示例调用 %pylab inline,其中一个 ipython 命令执行 from pylab import *

这实际上是演示 matplotlib 的最糟糕的方式,如果我可以挥动魔杖并将它从互联网和世界上删除,我会的。

总之,在原代码最前面加上from pylab import *应该可以解决问题

这是现代面向对象的 matplotlib 中的代码:

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas
import seaborn

seaborn.set(style='white')

# Read the data into a pandas DataFrame.
url = "http://www.randalolson.com/wp-content/uploads/percent-bachelors-degrees-women-usa.csv"
gender_degree_data = pandas.read_csv(url)

# These are the "Tableau 20" colors as RGB.
tableau20 = np.array([
    ( 31, 119, 180), (174, 199, 232), (255, 127,  14), (255, 187, 120),
    ( 44, 160,  44), (152, 223, 138), (214,  39,  40), (255, 152, 150),
    (148, 103, 189), (197, 176, 213), (140,  86,  75), (196, 156, 148),
    (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
    (188, 189,  34), (219, 219, 141), ( 23, 190, 207), (158, 218, 229)
]) / 255.

fig, ax = plt.subplots(figsize=(12, 14))

seaborn.despine(ax=ax, left=True, bottom=True)

ax.xaxis.tick_bottom()
ax.yaxis.tick_left()

ax.set_ylim(bottom=0, top=90)
ax.set_xlim(left=1968, right=2014)


ax.set_yticks(range(0, 91, 10))
ax.set_yticklabels([str(x) + "%" for x in range(0, 91, 10)])

for y in range(10, 91, 10):
    ax.plot(range(1968, 2012), [y] * len(range(1968, 2012)), "--", 
            lw=0.5, color="black", alpha=0.3)


ax.tick_params(axis="both", which="both", bottom="off", top="off", labelsize=14,
                labelbottom="on", left="off", right="off", labelleft="on")

majors = [
    'Health Professions', 'Public Administration', 'Education', 
    'Psychology','Foreign Languages', 'English', 
    'Communications\nand Journalism', 'Art and Performance',
    'Biology', 'Agriculture', 'Social Sciences and History', 
    'Business', 'Math and Statistics', 'Architecture',
    'Physical Sciences', 'Computer Science','Engineering'
]

offsets = {
    "Foreign Languages": +0.5,
    "English": -0.5,
    "Communications\nand Journalism": +0.75,
    "Art and Performance": -0.25,
    "Agriculture": +1.25,
    "Social Sciences and History": +0.25,
    "Business": -0.75,
    "Math and Statistics": +0.75,
    "Architecture": -0.75,
    "Computer Science": +0.75,
    "Engineering": -0.25,
}

for rank, column in enumerate(majors):
    ax.plot(gender_degree_data.Year.values,
            gender_degree_data[column.replace("\n", " ")].values,
            lw=2.5, color=tableau20[rank])

    y_pos = gender_degree_data[column.replace("\n", " ")].values[-1] - 0.5
    y_pos += offsets.get(column, 0)

    ax.text(2011.5, y_pos, column, fontsize=14, color=tableau20[rank])

ax.text(1995, 93, "Percentage of Bachelor's degrees conferred to women in the U.S.A."
       ", by major (1970-2012)", fontsize=17, ha="center")

ax.text(1966, -8, "Data source: nces.ed.gov/programs/digest/2013menu_tables.asp"
       "\nAuthor: Randy Olson (randalolson.com / @randal_olson)"
       "\nNote: Some majors are missing because the historical data "
       "is not available for them", fontsize=10)

fig.savefig("percent-bachelors-degrees-women-usa.png", bbox_inches="tight")

正如 Paul 指出的那样,使用 %pylab inline 是一种过时的做法,不应再使用。这是更新后的代码,可以 运行 在 IPython Notebook 之外,并且不会添加额外的 Seaborn 依赖项。

我还编写了一个仅使用 matplotlib 的示例。您可以在 matplotlib 库中找到它 here.

import matplotlib.pyplot as plt
import pandas as pd

# Read the data into a pandas DataFrame.  
gender_degree_data = pd.read_csv("http://www.randalolson.com/wp-content/uploads/percent-bachelors-degrees-women-usa.csv")  

# These are the "Tableau 20" colors as RGB.  
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),  
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),  
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),  
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),  
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]  

# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.  
for i in range(len(tableau20)):  
    r, g, b = tableau20[i]  
    tableau20[i] = (r / 255., g / 255., b / 255.)  

# You typically want your plot to be ~1.33x wider than tall. This plot is a rare  
# exception because of the number of lines being plotted on it.  
# Common sizes: (10, 7.5) and (12, 9)  
plt.figure(figsize=(12, 14))  

# Remove the plot frame lines. They are unnecessary chartjunk.  
ax = plt.subplot(111)  
ax.spines["top"].set_visible(False)  
ax.spines["bottom"].set_visible(False)  
ax.spines["right"].set_visible(False)  
ax.spines["left"].set_visible(False)  

# Ensure that the axis ticks only show up on the bottom and left of the plot.  
# Ticks on the right and top of the plot are generally unnecessary chartjunk.  
ax.get_xaxis().tick_bottom()  
ax.get_yaxis().tick_left()  

# Limit the range of the plot to only where the data is.  
# Avoid unnecessary whitespace.  
plt.ylim(0, 90)  
plt.xlim(1968, 2014)  

# Make sure your axis ticks are large enough to be easily read.  
# You don't want your viewers squinting to read your plot.  
plt.yticks(range(0, 91, 10), [str(x) + "%" for x in range(0, 91, 10)], fontsize=14)  
plt.xticks(fontsize=14)  

# Provide tick lines across the plot to help your viewers trace along  
# the axis ticks. Make sure that the lines are light and small so they  
# don't obscure the primary data lines.  
for y in range(10, 91, 10):  
    plt.plot(range(1968, 2012), [y] * len(range(1968, 2012)), "--", lw=0.5, color="black", alpha=0.3)  

# Remove the tick marks; they are unnecessary with the tick lines we just plotted.  
plt.tick_params(axis="both", which="both", bottom="off", top="off",  
                labelbottom="on", left="off", right="off", labelleft="on")  

# Now that the plot is prepared, it's time to actually plot the data!  
# Note that I plotted the majors in order of the highest % in the final year.  
majors = ['Health Professions', 'Public Administration', 'Education', 'Psychology',  
          'Foreign Languages', 'English', 'Communications\nand Journalism',  
          'Art and Performance', 'Biology', 'Agriculture',  
          'Social Sciences and History', 'Business', 'Math and Statistics',  
          'Architecture', 'Physical Sciences', 'Computer Science',  
          'Engineering']  

for rank, column in enumerate(majors):  
    # Plot each line separately with its own color, using the Tableau 20  
    # color set in order.  
    plt.plot(gender_degree_data.Year.values,  
            gender_degree_data[column.replace("\n", " ")].values,  
            lw=2.5, color=tableau20[rank])  

    # Add a text label to the right end of every line. Most of the code below  
    # is adding specific offsets y position because some labels overlapped.  
    y_pos = gender_degree_data[column.replace("\n", " ")].values[-1] - 0.5  
    if column == "Foreign Languages":  
        y_pos += 0.5  
    elif column == "English":  
        y_pos -= 0.5  
    elif column == "Communications\nand Journalism":  
        y_pos += 0.75  
    elif column == "Art and Performance":  
        y_pos -= 0.25  
    elif column == "Agriculture":  
        y_pos += 1.25  
    elif column == "Social Sciences and History":  
        y_pos += 0.25  
    elif column == "Business":  
        y_pos -= 0.75  
    elif column == "Math and Statistics":  
        y_pos += 0.75  
    elif column == "Architecture":  
        y_pos -= 0.75  
    elif column == "Computer Science":  
        y_pos += 0.75  
    elif column == "Engineering":  
        y_pos -= 0.25  

    # Again, make sure that all labels are large enough to be easily read  
    # by the viewer.  
    plt.text(2011.5, y_pos, column, fontsize=14, color=tableau20[rank])  

# matplotlib's title() call centers the title on the plot, but not the graph,  
# so I used the text() call to customize where the title goes.  

# Make the title big enough so it spans the entire plot, but don't make it  
# so big that it requires two lines to show.  

# Note that if the title is descriptive enough, it is unnecessary to include  
# axis labels; they are self-evident, in this plot's case.  
plt.text(1995, 93, "Percentage of Bachelor's degrees conferred to women in the U.S.A."  
       ", by major (1970-2012)", fontsize=17, ha="center")  

# Always include your data source(s) and copyright notice! And for your  
# data sources, tell your viewers exactly where the data came from,  
# preferably with a direct link to the data. Just telling your viewers  
# that you used data from the "U.S. Census Bureau" is completely useless:  
# the U.S. Census Bureau provides all kinds of data, so how are your  
# viewers supposed to know which data set you used?  
plt.text(1966, -8, "Data source: nces.ed.gov/programs/digest/2013menu_tables.asp"  
       "\nAuthor: Randy Olson (randalolson.com / @randal_olson)"  
       "\nNote: Some majors are missing because the historical data "  
       "is not available for them", fontsize=10)  

# Finally, save the figure as a PNG.  
# You can also save it as a PDF, JPEG, etc.  
# Just change the file extension in this call.  
# bbox_inches="tight" removes all the extra whitespace on the edges of your plot.  
plt.savefig("percent-bachelors-degrees-women-usa.png", bbox_inches="tight");  

最终结果如下所示:

我也用这个新代码更新了我的 blog post。感谢您提醒我注意这个问题!