通过 URL 从 google 驱动器下载 pdf 文件(不受限制)
Download pdf file(Not restricted) from google drive through URL
import os
import requests
def download_file(download_url: str, filename: str):
"""
Download resume pdf file from storage
@param download_url: URL of reusme to be downloaded
@type download_url: str
@param filename: Name and location of file to be stored
@type filename: str
@return: None
@rtype: None
"""
file_request = requests.get(download_url)
with open(f'{filename}.pdf', 'wb+') as file:
file.write(file_request.content)
cand_id = "101"
time_current = "801"
file_location = f"{cand_id}_{time_current}"
download_file("https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", file_location)
cand_id = "201"
time_current = "901"
download_file("https://drive.google.com/file/d/0B1HXnM1lBuoqMzVhZjcwNTAtZWI5OS00ZDg3LWEyMzktNzZmYWY2Y2NhNWQx/view?hl=en&resourcekey=0-5DqnTtXPFvySMiWstuAYdA", file_location)
----------
- 第一个文件运行良好(即 101_801.pdf)
- 但是第二个无法在任何 pdf 中打开 reader(即
201_901.pdf)(错误:我们无法打开此文件)。
- 我的理解是我无法正确读写文件
来自对所有人开放的驱动器。如何读取和写入该文件?
- 我可以使用 google 驱动器 API 但是我们可以有更好的解决方案吗?
使用那个 ?
我尝试了代码,但也无法打开 PDF 文件。我建议尝试 gdown package。它易于使用,您甚至可以从 google 驱动器下载大文件。我在 class 中使用它为我的作业下载 .sql db 文件 (+-20Gb)。
如果您想在此代码上构建更多内容,那么您应该查看 Drive API。这是一个有据可查的快速 API.
我能够通过 python 中的 wget 找到解决方案。回答它以便它可以帮助将来的人。
import os
import wget
def download_candidate_resume(email: str, resume_url: str):
"""
This function is used to download resume from google drive and store on the local system
@param email: candidate email
@type email: str
@param resume_url: url of resume on google drive
@type resume_url: str
"""
file_extension = "pdf"
current_time = datetime.now()
file_name = f'{email}_{int(current_time.timestamp())}.{file_extension}'
temp_file_path = os.path.join(
os.getcwd(),
f'{email}_{int(current_time.timestamp())}.{file_extension}',
)
downloadable_resume_url = re.sub(
r"https://drive\.google\.com/file/d/(.*?)/.*?\?usp=sharing",
r"https://drive.google.com/uc?export=download&id=",
resume_url,
)
wget.download(downloadable_resume_url, out=temp_file_path)
import os
import requests
def download_file(download_url: str, filename: str):
"""
Download resume pdf file from storage
@param download_url: URL of reusme to be downloaded
@type download_url: str
@param filename: Name and location of file to be stored
@type filename: str
@return: None
@rtype: None
"""
file_request = requests.get(download_url)
with open(f'{filename}.pdf', 'wb+') as file:
file.write(file_request.content)
cand_id = "101"
time_current = "801"
file_location = f"{cand_id}_{time_current}"
download_file("https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", file_location)
cand_id = "201"
time_current = "901"
download_file("https://drive.google.com/file/d/0B1HXnM1lBuoqMzVhZjcwNTAtZWI5OS00ZDg3LWEyMzktNzZmYWY2Y2NhNWQx/view?hl=en&resourcekey=0-5DqnTtXPFvySMiWstuAYdA", file_location)
----------
- 第一个文件运行良好(即 101_801.pdf)
- 但是第二个无法在任何 pdf 中打开 reader(即 201_901.pdf)(错误:我们无法打开此文件)。
- 我的理解是我无法正确读写文件 来自对所有人开放的驱动器。如何读取和写入该文件?
- 我可以使用 google 驱动器 API 但是我们可以有更好的解决方案吗? 使用那个 ?
我尝试了代码,但也无法打开 PDF 文件。我建议尝试 gdown package。它易于使用,您甚至可以从 google 驱动器下载大文件。我在 class 中使用它为我的作业下载 .sql db 文件 (+-20Gb)。
如果您想在此代码上构建更多内容,那么您应该查看 Drive API。这是一个有据可查的快速 API.
我能够通过 python 中的 wget 找到解决方案。回答它以便它可以帮助将来的人。
import os
import wget
def download_candidate_resume(email: str, resume_url: str):
"""
This function is used to download resume from google drive and store on the local system
@param email: candidate email
@type email: str
@param resume_url: url of resume on google drive
@type resume_url: str
"""
file_extension = "pdf"
current_time = datetime.now()
file_name = f'{email}_{int(current_time.timestamp())}.{file_extension}'
temp_file_path = os.path.join(
os.getcwd(),
f'{email}_{int(current_time.timestamp())}.{file_extension}',
)
downloadable_resume_url = re.sub(
r"https://drive\.google\.com/file/d/(.*?)/.*?\?usp=sharing",
r"https://drive.google.com/uc?export=download&id=",
resume_url,
)
wget.download(downloadable_resume_url, out=temp_file_path)