matplotlib 绘图速度很慢
matplotlib very slow in plotting
我有多个函数,我在其中输入一个数组或字典以及一个路径作为参数,该函数会将图形保存到特定路径的路径中。
尽量减少示例,但这里有两个函数:
def valueChartPatterns(dict,path):
seen_values = Counter()
for data in dict.itervalues():
seen_values += Counter(data.values())
seen_values = seen_values.most_common()
seen_values_pct = map(itemgetter(1), tupleCounts2Percents(seen_values))
seen_values_pct = ['{:.2%}'.format(item)for item in seen_values_pct]
plt.figure()
numberchart = plt.bar(range(len(seen_values)), map(itemgetter(1), seen_values), width=0.9,align='center')
plt.xticks(range(len(seen_values)), map(itemgetter(0), seen_values))
plt.title('Values in Pattern Dataset')
plt.xlabel('Values in Data')
plt.ylabel('Occurrences')
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=6)
plt.tight_layout()
plt.savefig(path)
plt.clf()
def countryChartPatterns(dict,path):
seen_countries = Counter()
for data in dict.itervalues():
seen_countries += Counter(data.keys())
seen_countries = seen_countries.most_common()
seen_countries_percentage = map(itemgetter(1), tupleCounts2Percents(seen_countries))
seen_countries_percentage = ['{:.2%}'.format(item)for item in seen_countries_percentage]
yvals = map(itemgetter(1), seen_countries)
xvals = map(itemgetter(0), seen_countries)
plt.figure()
countrychart = plt.bar(range(len(seen_countries)), yvals, width=0.9,align='center')
plt.xticks(range(len(seen_countries)), xvals)
plt.title('Countries in Pattern Dataset')
plt.xlabel('Countries in Data')
plt.ylabel('Occurrences')
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=6)
plt.tight_layout()
plt.savefig(path)
plt.clf()
一个非常简单的示例字典,但实际字典包含 56000 个值:
dict = {"a": {"Germany": 20006.0, "United Kingdom": 20016.571428571428}, "b": {"Chad": 13000.0, "South Africa": 3000000.0},"c":{"Chad": 200061.0, "South Africa": 3000000.0}
}
在我的脚本中,我调用:
if __name__ == "__main__":
plt.close('all')
print "Starting pattern charting...\n"
countryChartPatterns(dict,'newPatternCountries.png'))
valueChartPatterns(dict,'newPatternValues.png'))
注意,我加载 import matplotlib.pyplot as plt
。
当 运行 这个脚本在 PyCharm 时,我在我的控制台中得到 Starting pattern charting...
但函数需要很长时间才能绘制。
我做错了什么?我应该使用直方图而不是条形图,因为这应该达到给出 countries/values 出现次数的相同目的吗?我能以某种方式更改我的 GUI 后端吗?欢迎任何建议。
这是我在上面的评论中提到的测试,导致:
Elapsed pre-processing = 13.79 s
Elapsed plotting = 0.17 s
Pre-processing / plotting = 83.3654562565
测试脚本:
import matplotlib.pylab as plt
from collections import Counter
from operator import itemgetter
import time
def countryChartPatterns(dict,path):
# pre-processing -------------------
t0 = time.time()
seen_countries = Counter()
for data in dict.itervalues():
seen_countries += Counter(data.keys())
seen_countries = seen_countries.most_common()
yvals = map(itemgetter(1), seen_countries)
xvals = map(itemgetter(0), seen_countries)
dt1 = time.time() - t0
print("Elapsed pre-processing = {0:.2f} s".format(dt1))
t0 = time.time()
# plotting -------------------
plt.figure()
countrychart = plt.bar(range(len(seen_countries)), yvals, width=0.9,align='center')
plt.xticks(range(len(seen_countries)), xvals)
plt.title('Countries in Pattern Dataset')
plt.xlabel('Countries in Data')
plt.ylabel('Occurrences')
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=6)
plt.tight_layout()
plt.savefig(path)
plt.clf()
dt2 = time.time() - t0
print("Elapsed plotting = {0:.2f} s".format(dt2))
print("Pre-processing / plotting = {}".format(dt1/dt2))
if __name__ == "__main__":
import random as rd
import numpy as np
countries = ["United States of America", "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua & Deps", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan"]
def item():
return {rd.choice(countries): np.random.randint(1e3), rd.choice(countries): np.random.randint(1e3)}
dict = {}
for i in range(1000000):
dict[i] = item()
print("Starting pattern charting...")
countryChartPatterns(dict,'newPatternCountries.png')
我有多个函数,我在其中输入一个数组或字典以及一个路径作为参数,该函数会将图形保存到特定路径的路径中。
尽量减少示例,但这里有两个函数:
def valueChartPatterns(dict,path):
seen_values = Counter()
for data in dict.itervalues():
seen_values += Counter(data.values())
seen_values = seen_values.most_common()
seen_values_pct = map(itemgetter(1), tupleCounts2Percents(seen_values))
seen_values_pct = ['{:.2%}'.format(item)for item in seen_values_pct]
plt.figure()
numberchart = plt.bar(range(len(seen_values)), map(itemgetter(1), seen_values), width=0.9,align='center')
plt.xticks(range(len(seen_values)), map(itemgetter(0), seen_values))
plt.title('Values in Pattern Dataset')
plt.xlabel('Values in Data')
plt.ylabel('Occurrences')
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=6)
plt.tight_layout()
plt.savefig(path)
plt.clf()
def countryChartPatterns(dict,path):
seen_countries = Counter()
for data in dict.itervalues():
seen_countries += Counter(data.keys())
seen_countries = seen_countries.most_common()
seen_countries_percentage = map(itemgetter(1), tupleCounts2Percents(seen_countries))
seen_countries_percentage = ['{:.2%}'.format(item)for item in seen_countries_percentage]
yvals = map(itemgetter(1), seen_countries)
xvals = map(itemgetter(0), seen_countries)
plt.figure()
countrychart = plt.bar(range(len(seen_countries)), yvals, width=0.9,align='center')
plt.xticks(range(len(seen_countries)), xvals)
plt.title('Countries in Pattern Dataset')
plt.xlabel('Countries in Data')
plt.ylabel('Occurrences')
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=6)
plt.tight_layout()
plt.savefig(path)
plt.clf()
一个非常简单的示例字典,但实际字典包含 56000 个值:
dict = {"a": {"Germany": 20006.0, "United Kingdom": 20016.571428571428}, "b": {"Chad": 13000.0, "South Africa": 3000000.0},"c":{"Chad": 200061.0, "South Africa": 3000000.0}
}
在我的脚本中,我调用:
if __name__ == "__main__":
plt.close('all')
print "Starting pattern charting...\n"
countryChartPatterns(dict,'newPatternCountries.png'))
valueChartPatterns(dict,'newPatternValues.png'))
注意,我加载 import matplotlib.pyplot as plt
。
当 运行 这个脚本在 PyCharm 时,我在我的控制台中得到 Starting pattern charting...
但函数需要很长时间才能绘制。
我做错了什么?我应该使用直方图而不是条形图,因为这应该达到给出 countries/values 出现次数的相同目的吗?我能以某种方式更改我的 GUI 后端吗?欢迎任何建议。
这是我在上面的评论中提到的测试,导致:
Elapsed pre-processing = 13.79 s
Elapsed plotting = 0.17 s
Pre-processing / plotting = 83.3654562565
测试脚本:
import matplotlib.pylab as plt
from collections import Counter
from operator import itemgetter
import time
def countryChartPatterns(dict,path):
# pre-processing -------------------
t0 = time.time()
seen_countries = Counter()
for data in dict.itervalues():
seen_countries += Counter(data.keys())
seen_countries = seen_countries.most_common()
yvals = map(itemgetter(1), seen_countries)
xvals = map(itemgetter(0), seen_countries)
dt1 = time.time() - t0
print("Elapsed pre-processing = {0:.2f} s".format(dt1))
t0 = time.time()
# plotting -------------------
plt.figure()
countrychart = plt.bar(range(len(seen_countries)), yvals, width=0.9,align='center')
plt.xticks(range(len(seen_countries)), xvals)
plt.title('Countries in Pattern Dataset')
plt.xlabel('Countries in Data')
plt.ylabel('Occurrences')
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=6)
plt.tight_layout()
plt.savefig(path)
plt.clf()
dt2 = time.time() - t0
print("Elapsed plotting = {0:.2f} s".format(dt2))
print("Pre-processing / plotting = {}".format(dt1/dt2))
if __name__ == "__main__":
import random as rd
import numpy as np
countries = ["United States of America", "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua & Deps", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan"]
def item():
return {rd.choice(countries): np.random.randint(1e3), rd.choice(countries): np.random.randint(1e3)}
dict = {}
for i in range(1000000):
dict[i] = item()
print("Starting pattern charting...")
countryChartPatterns(dict,'newPatternCountries.png')