用';'下载txt分隔符并使用 python 转换为 .CSV
Download txt with ';' delimiter and convert to .CSV with python
我必须下载这个 .txt 文件:Link
然后我必须将它解析为 .csv 并删除所有 header。
我试过这样做,但它对我不起作用,这是我的代码:
import urllib
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)
在这段代码中,我没有对 header 做任何事情,因为即使是转换也无法正常工作。
urllib
中没有 request
命名空间。
替换此行
urllib.request.urlretrieve(url_of_file, outfilename)
有了这个
urllib.urlretrieve(url_of_file, outfilename)
已更新:
您需要导入 urllib.request
,而不仅仅是 urllib
。
此外,您需要以文本模式打开文件,而不是二进制模式(即 'rb' 或 'wb')。
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "r") as infile, open(csvfile, 'w') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)
您可以将此代码用于 Python 2:
import urllib
import csv
import urllib2
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
#urllib.request.urlretrieve(url_of_file, outfilename)
response = urllib2.urlopen(url_of_file)
output = open(outfilename,'wb')
output.write(response.read())
output.close()
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)
Python 3:
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv"
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, encoding='utf-8') as infile, open(csvfile, 'w', newline='') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)
我必须下载这个 .txt 文件:Link
然后我必须将它解析为 .csv 并删除所有 header。
我试过这样做,但它对我不起作用,这是我的代码:
import urllib
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)
在这段代码中,我没有对 header 做任何事情,因为即使是转换也无法正常工作。
urllib
中没有 request
命名空间。
替换此行
urllib.request.urlretrieve(url_of_file, outfilename)
有了这个
urllib.urlretrieve(url_of_file, outfilename)
已更新:
您需要导入 urllib.request
,而不仅仅是 urllib
。
此外,您需要以文本模式打开文件,而不是二进制模式(即 'rb' 或 'wb')。
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, "r") as infile, open(csvfile, 'w') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
out_csv.writerows(in_txt)
您可以将此代码用于 Python 2:
import urllib
import csv
import urllib2
outfilename = "temp.txt"
csvfile = "data.csv" #open('data.csv', 'wb')
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
#urllib.request.urlretrieve(url_of_file, outfilename)
response = urllib2.urlopen(url_of_file)
output = open(outfilename,'wb')
output.write(response.read())
output.close()
with open(outfilename, "rb") as infile, open(csvfile, 'wb') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)
Python 3:
import urllib.request
import csv
outfilename = "temp.txt"
csvfile = "data.csv"
url_of_file = "http://www.ceps.cz/_layouts/15/Ceps/_Pages/GraphData.aspx?mode=txt&from=1/1/2011%2012:00:00%20AM&to=1/2/2011%2011:59:59%20PM&hasinterval=True&sol=1&lang=ENG&agr=MI&fnc=AVG&ver=RT&"
urllib.request.urlretrieve(url_of_file, outfilename)
with open(outfilename, encoding='utf-8') as infile, open(csvfile, 'w', newline='') as outfile:
in_txt = csv.reader(infile, delimiter = ';')
out_csv = csv.writer(outfile)
i = 0
for row in in_txt:
i +=1
if i>3:
out_csv.writerow(row)