urllib.request.urlretrieve 从 Instagram 下载错误的文件
urllib.request.urlretrieve downloading the wrong files from instagram
当尝试从 instagram 进行网络抓取时,代码会从 post url 中提取图像,即使它是视频。我很困惑为什么它采用随机 jpg 而不是 else 语句中的 mp4。
time.sleep(5)
posts = []
links = driver.find_elements_by_tag_name('a')
for link in links:
post = link.get_attribute('href')
if '/p/' in post:
posts.append(post)
#get videos and images
download_url = ''
for post in posts:
driver.get(post)
shortcode = driver.current_url.split("/")[-2]
time.sleep(7)
if driver.find_element_by_css_selector("img[style='object-fit: cover;']") is not None:
download_url = driver.find_element_by_css_selector("img[style='object-fit: cover;']").get_attribute('src')
urllib.request.urlretrieve( download_url, '{}.jpg'.format(shortcode))
else:
download_url = driver.find_element_by_css_selector("video[type='video/mp4']").get_attribute('src')
urllib.request.urlretrieve( download_url, '{}.mp4'.format(shortcode))
time.sleep(5)
我想我们可以改用“Try-Except”块:
try:
download_url = driver.find_element(By.CSS_SELECTOR, "img[style='object-fit: cover;']").get_attribute('src')
print(download_url)
urlretrieve(download_url, '{}.jpg'.format(shortcode))
except:
download_url = driver.find_element(By.CSS_SELECTOR, "video[type='video/mp4']").get_attribute('src')
print(download_url)
urlretrieve(download_url, '{}.mp4'.format(shortcode))
当尝试从 instagram 进行网络抓取时,代码会从 post url 中提取图像,即使它是视频。我很困惑为什么它采用随机 jpg 而不是 else 语句中的 mp4。
time.sleep(5)
posts = []
links = driver.find_elements_by_tag_name('a')
for link in links:
post = link.get_attribute('href')
if '/p/' in post:
posts.append(post)
#get videos and images
download_url = ''
for post in posts:
driver.get(post)
shortcode = driver.current_url.split("/")[-2]
time.sleep(7)
if driver.find_element_by_css_selector("img[style='object-fit: cover;']") is not None:
download_url = driver.find_element_by_css_selector("img[style='object-fit: cover;']").get_attribute('src')
urllib.request.urlretrieve( download_url, '{}.jpg'.format(shortcode))
else:
download_url = driver.find_element_by_css_selector("video[type='video/mp4']").get_attribute('src')
urllib.request.urlretrieve( download_url, '{}.mp4'.format(shortcode))
time.sleep(5)
我想我们可以改用“Try-Except”块:
try:
download_url = driver.find_element(By.CSS_SELECTOR, "img[style='object-fit: cover;']").get_attribute('src')
print(download_url)
urlretrieve(download_url, '{}.jpg'.format(shortcode))
except:
download_url = driver.find_element(By.CSS_SELECTOR, "video[type='video/mp4']").get_attribute('src')
print(download_url)
urlretrieve(download_url, '{}.mp4'.format(shortcode))