使用此脚本实现多线程(或多处理?)?
Implement multithreading (or multiprocessing?) with this script?
首先让我声明我没有任何多线程方面的实际经验。我编写的这个脚本从文本文件中读取约 4,400 个地址,然后清理地址并对其进行地理编码。我哥哥提到了一些关于使用多线程来提高它的速度。我在网上读到,如果您只使用一个文本文件,多线程并没有太大的区别。如果我将单个文本文件拆分为 2 个文本文件,它会工作吗?无论如何,如果有人能告诉我如何对该脚本实施多线程或多处理以提高速度,我将非常感激。如果不可能,你能告诉我为什么吗?谢谢!
from geopy.geocoders import Bing
from geopy.exc import GeocoderTimedOut
geolocator = Bing('vadrPcGdNLSX5bPNL7tw~ySbwhthllg7rNA4VSJ-O4g~Ag28cbu9Slxp5Sh_AsBDuQ9WypPuEhl9pHVPCAkiPf4A9FgCBf3l0KyQTKKsLCHw')
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
def cleanAddress(dirty):
try:
clean = geolocator.geocode(dirty)
x = clean.address
address, city, zipcode, country = x.split(",")
address = address.lower()
if 'first' in address:
address = address.replace('first', '1st')
elif 'second' in address:
address = address.replace('second', '2nd')
elif 'third' in address:
address = address.replace('third', '3rd')
elif 'fourth' in address:
address = address.replace('fourth', '4th')
elif 'fifth' in address:
address = address.replace('fifth', '5th')
elif 'sixth' in address:
address = address.replace('ave', '')
address = address.replace('avenue', '')
address = address.replace('sixth', 'avenue of the americas')
elif '6th' in address:
address = address.replace('ave', '')
address = address.replace('avenue', '')
address = address.replace('6th', 'avenue of the americas')
elif 'seventh' in address:
address = address.replace('seventh', '7th')
elif 'fashion' in address:
address = address.replace('fashion', '7th')
elif 'eighth' in address:
address = address.replace('eighth', '8th')
elif 'ninth' in address:
address = address.replace('ninth', '9th')
elif 'tenth' in address:
address = address.replace('tenth', '10th')
elif 'eleventh' in address:
address = address.replace('eleventh', '11th')
zipcode = zipcode[3:]
print(address + ",", zipcode.lstrip() + ",", str(clean.latitude) + ",", str(clean.longitude))
except AttributeError:
print('Can not be cleaned')
except ValueError:
print('Can not be cleaned')
except GeocoderTimedOut as e:
print('Can not be cleaned')
def main():
root.update()
fpath = filedialog.askopenfilename()
f = open(fpath)
for line in f:
dirty = line + " nyc"
cleanAddress(dirty)
f.close()
if __name__ == '__main__':
main()
简短的回答是:不,你不能。
Python multiprocessing
库允许您通过将计算分布到多个进程来减少进行所有计算所需的时间。它可以加快整个 运行 脚本的速度,但前提是 CPU.
需要大量计算
在你的例子中,大部分时间都需要连接到网络服务,运行 为你提供地理位置信息,所以总执行时间取决于你或服务的互联网连接速度,而不是你的计算机整体。
首先让我声明我没有任何多线程方面的实际经验。我编写的这个脚本从文本文件中读取约 4,400 个地址,然后清理地址并对其进行地理编码。我哥哥提到了一些关于使用多线程来提高它的速度。我在网上读到,如果您只使用一个文本文件,多线程并没有太大的区别。如果我将单个文本文件拆分为 2 个文本文件,它会工作吗?无论如何,如果有人能告诉我如何对该脚本实施多线程或多处理以提高速度,我将非常感激。如果不可能,你能告诉我为什么吗?谢谢!
from geopy.geocoders import Bing
from geopy.exc import GeocoderTimedOut
geolocator = Bing('vadrPcGdNLSX5bPNL7tw~ySbwhthllg7rNA4VSJ-O4g~Ag28cbu9Slxp5Sh_AsBDuQ9WypPuEhl9pHVPCAkiPf4A9FgCBf3l0KyQTKKsLCHw')
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
def cleanAddress(dirty):
try:
clean = geolocator.geocode(dirty)
x = clean.address
address, city, zipcode, country = x.split(",")
address = address.lower()
if 'first' in address:
address = address.replace('first', '1st')
elif 'second' in address:
address = address.replace('second', '2nd')
elif 'third' in address:
address = address.replace('third', '3rd')
elif 'fourth' in address:
address = address.replace('fourth', '4th')
elif 'fifth' in address:
address = address.replace('fifth', '5th')
elif 'sixth' in address:
address = address.replace('ave', '')
address = address.replace('avenue', '')
address = address.replace('sixth', 'avenue of the americas')
elif '6th' in address:
address = address.replace('ave', '')
address = address.replace('avenue', '')
address = address.replace('6th', 'avenue of the americas')
elif 'seventh' in address:
address = address.replace('seventh', '7th')
elif 'fashion' in address:
address = address.replace('fashion', '7th')
elif 'eighth' in address:
address = address.replace('eighth', '8th')
elif 'ninth' in address:
address = address.replace('ninth', '9th')
elif 'tenth' in address:
address = address.replace('tenth', '10th')
elif 'eleventh' in address:
address = address.replace('eleventh', '11th')
zipcode = zipcode[3:]
print(address + ",", zipcode.lstrip() + ",", str(clean.latitude) + ",", str(clean.longitude))
except AttributeError:
print('Can not be cleaned')
except ValueError:
print('Can not be cleaned')
except GeocoderTimedOut as e:
print('Can not be cleaned')
def main():
root.update()
fpath = filedialog.askopenfilename()
f = open(fpath)
for line in f:
dirty = line + " nyc"
cleanAddress(dirty)
f.close()
if __name__ == '__main__':
main()
简短的回答是:不,你不能。
Python multiprocessing
库允许您通过将计算分布到多个进程来减少进行所有计算所需的时间。它可以加快整个 运行 脚本的速度,但前提是 CPU.
在你的例子中,大部分时间都需要连接到网络服务,运行 为你提供地理位置信息,所以总执行时间取决于你或服务的互联网连接速度,而不是你的计算机整体。