如何创建日志 `csv` 文件。其中包含与 link 对应的文件名
How to create log `csv` file. Which contain a filename corresponding with link
我在 csv 文件中有 urls
列表,其中包含 urls
类似这样的内容:
manual_name1 12344.pdf #pdf link
manual_name2 12334.pdf #pdf link
运行 之后的代码如下。下载的文件将另存为:
manual_name1.pdf
和 manual_name2.pdf
等等。
我想要一个 csv
日志文件,其中包含与其下载位置相对应的 pdf 文件的名称。在下面的例子中像这样
manual_name.pdf 12344.pdf #pdflink
manual_name2.pdf 12334.pdf #pdflink
代码如下:
import os
import csv
import requests
import time
write_path = '/Users/macossierra/Desktop/pdf' # ASSUMING THAT FOLDER EXISTS!
with open('this.csv', 'r') as csvfile:
spamreader = csv.reader(csvfile)
for link in spamreader:
if not link:
continue
print('-'*72)
pdf_file = '{}_{}.pdf'.format(link[0], int(time.time()))
with open(os.path.join(write_path, pdf_file), 'wb') as pdf:
try:
# Try to request PDF from URL
print('Trying To Connect with Link>>>>>. {}...'.format(link[1]))
a = requests.get(link[1], stream=True)
for block in a.iter_content(512):
if not block:
break
pdf.write(block)
print('File Downloaded Successfully.')
except requests.exceptions.RequestException as e: # This will catch ONLY Requests exceptions
print('REQUESTS ERROR:')
print(e) # This should tell you more details about the error
如果您只需要类似于上面列出的格式,您可以在下载 PDF 文件后立即将一行写入 CSV。
import os
import csv
import requests
import time
write_path = '/Users/macossierra/Desktop/pdf' # ASSUMING THAT FOLDER EXISTS!
with open('this.csv', 'r') as csvfile:
with open('log.csv', 'wb') as csv_out:
writer = csv.writer(csv_out)
spamreader = csv.reader(csvfile)
for link in spamreader:
if not link:
continue
print('-'*72)
pdf_file = '{}_{}.pdf'.format(link[0], int(time.time()))
with open(os.path.join(write_path, pdf_file), 'wb') as pdf:
try:
# Try to request PDF from URL
print('Trying to connect with link >>>>> {} ... '.format(link[1]))
a = requests.get(link[1], stream=True)
for block in a.iter_content(512):
if not block:
break
pdf.write(block)
print('File downloaded successfully.')
path = os.path.join(write_path, pdf_file)
writer.writerow([pdf_file, link[1], path, str(time.time())]) # writing content to our CSV log file
except requests.exceptions.RequestException as e: # This will catch ONLY Requests exceptions
print('REQUESTS ERROR:')
# This should tell you more details about the error log
print(e)
我在 csv 文件中有 urls
列表,其中包含 urls
类似这样的内容:
manual_name1 12344.pdf #pdf link
manual_name2 12334.pdf #pdf link
运行 之后的代码如下。下载的文件将另存为:
manual_name1.pdf
和 manual_name2.pdf
等等。
我想要一个 csv
日志文件,其中包含与其下载位置相对应的 pdf 文件的名称。在下面的例子中像这样
manual_name.pdf 12344.pdf #pdflink
manual_name2.pdf 12334.pdf #pdflink
代码如下:
import os
import csv
import requests
import time
write_path = '/Users/macossierra/Desktop/pdf' # ASSUMING THAT FOLDER EXISTS!
with open('this.csv', 'r') as csvfile:
spamreader = csv.reader(csvfile)
for link in spamreader:
if not link:
continue
print('-'*72)
pdf_file = '{}_{}.pdf'.format(link[0], int(time.time()))
with open(os.path.join(write_path, pdf_file), 'wb') as pdf:
try:
# Try to request PDF from URL
print('Trying To Connect with Link>>>>>. {}...'.format(link[1]))
a = requests.get(link[1], stream=True)
for block in a.iter_content(512):
if not block:
break
pdf.write(block)
print('File Downloaded Successfully.')
except requests.exceptions.RequestException as e: # This will catch ONLY Requests exceptions
print('REQUESTS ERROR:')
print(e) # This should tell you more details about the error
如果您只需要类似于上面列出的格式,您可以在下载 PDF 文件后立即将一行写入 CSV。
import os
import csv
import requests
import time
write_path = '/Users/macossierra/Desktop/pdf' # ASSUMING THAT FOLDER EXISTS!
with open('this.csv', 'r') as csvfile:
with open('log.csv', 'wb') as csv_out:
writer = csv.writer(csv_out)
spamreader = csv.reader(csvfile)
for link in spamreader:
if not link:
continue
print('-'*72)
pdf_file = '{}_{}.pdf'.format(link[0], int(time.time()))
with open(os.path.join(write_path, pdf_file), 'wb') as pdf:
try:
# Try to request PDF from URL
print('Trying to connect with link >>>>> {} ... '.format(link[1]))
a = requests.get(link[1], stream=True)
for block in a.iter_content(512):
if not block:
break
pdf.write(block)
print('File downloaded successfully.')
path = os.path.join(write_path, pdf_file)
writer.writerow([pdf_file, link[1], path, str(time.time())]) # writing content to our CSV log file
except requests.exceptions.RequestException as e: # This will catch ONLY Requests exceptions
print('REQUESTS ERROR:')
# This should tell you more details about the error log
print(e)