我需要使用 Python Selenium 下载锚点的 href 属性中引用的图像
I need to download images referenced in the href attribute of anchors using Python Selenium
应按顺序获取检索到的链接,以便进行进一步处理。这是我到目前为止尝试过的:
lay = driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div[2]/div/div[1]/div/div')
fig = lay.find_elements_by_class_name('_2Mc8_')
for link in fig:
href = link.get_attribute("href")
print href
for ab in href:
ab = driver.get(href)
dwn = driver.find_element_by_xpath('//*[@id="app"]/div/div[3]/div/div[1]/div[1]/header/div[2]/div[3]/a/span')
dwn.click()
time.sleep(2)
我前段时间有一个这样的项目。您要做的是打开一种流并将对象下载到该流,然后再次关闭它。
def requests_image(file_url):
i = requests.get(file_url)
if i.status_code == requests.codes.ok:
#save the file as a temporary name. Note that this is a static name, so I won't be able to run two threads at the same time.
with iopen("images/TEMP_file_name", 'wb') as file:
file.write(i.content)
#this is to get the correct extension of the file. Handy when you can't derive it from the URL.
ext = imghdr.what("images/TEMP_file_name")
file.close()
uidname = str(int(time.time()))[-8:]
##Create unique filename using UNIXTIME[-8:] (last 8 chars of unixtime in S)
filename = uidname+"."+ext
#Now that the stream is closed, rename it from the static name to a unique name. I chose to use time since epoch. Add the extension and you're good to go.
os.rename("images/TEMP_file_name", "images/"+filename)
return(filename)
else:
return False
以上是我要使用的功能。简单地调用它:
fname = requests_image(href)
if fname: #truthy statement
pass #if you have a succesfull file returned it might be needed to store it in a database. use the fname variable for this.
else:
pass# if the filename returns false (might be if the link turns out to be invalid) log it and investigate if it's essential to know.
lay = driver.find_element_by_xpath('//')
fig = lay.find_elements_by_class_name('_2Mc8_')
for link in fig:
href = link.get_attribute("href")
print href
for ab in href:
driver.get(ab)
dwn = driver.find_element_by_xpath('//')
dwn.click()
time.sleep(2)
应按顺序获取检索到的链接,以便进行进一步处理。这是我到目前为止尝试过的:
lay = driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div[2]/div/div[1]/div/div')
fig = lay.find_elements_by_class_name('_2Mc8_')
for link in fig:
href = link.get_attribute("href")
print href
for ab in href:
ab = driver.get(href)
dwn = driver.find_element_by_xpath('//*[@id="app"]/div/div[3]/div/div[1]/div[1]/header/div[2]/div[3]/a/span')
dwn.click()
time.sleep(2)
我前段时间有一个这样的项目。您要做的是打开一种流并将对象下载到该流,然后再次关闭它。
def requests_image(file_url):
i = requests.get(file_url)
if i.status_code == requests.codes.ok:
#save the file as a temporary name. Note that this is a static name, so I won't be able to run two threads at the same time.
with iopen("images/TEMP_file_name", 'wb') as file:
file.write(i.content)
#this is to get the correct extension of the file. Handy when you can't derive it from the URL.
ext = imghdr.what("images/TEMP_file_name")
file.close()
uidname = str(int(time.time()))[-8:]
##Create unique filename using UNIXTIME[-8:] (last 8 chars of unixtime in S)
filename = uidname+"."+ext
#Now that the stream is closed, rename it from the static name to a unique name. I chose to use time since epoch. Add the extension and you're good to go.
os.rename("images/TEMP_file_name", "images/"+filename)
return(filename)
else:
return False
以上是我要使用的功能。简单地调用它:
fname = requests_image(href)
if fname: #truthy statement
pass #if you have a succesfull file returned it might be needed to store it in a database. use the fname variable for this.
else:
pass# if the filename returns false (might be if the link turns out to be invalid) log it and investigate if it's essential to know.
lay = driver.find_element_by_xpath('//')
fig = lay.find_elements_by_class_name('_2Mc8_')
for link in fig:
href = link.get_attribute("href")
print href
for ab in href:
driver.get(ab)
dwn = driver.find_element_by_xpath('//')
dwn.click()
time.sleep(2)