Python 获取 Instagram 用户所有帖子的 URL
Python Get the URL of all posts of an Instagram user
我写了一小段代码来从 Instagram 下载图片。这是代码:
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as req
import urllib.request as reqq
from selenium import webdriver
import os
browser = webdriver.Chrome("D:\Python_Files\Programs\chromedriver.exe")
url = "https://www.instagram.com/p/CFRY7X2AnOx/"
browser.get(url)
image_url = browser.find_element_by_class_name('KL4Bh').find_element_by_tag_name('img').get_attribute('src')
reqq.urlretrieve(image_url,"D:\instaimg.jpg")
这很好用。但是这样只能下载一张图片。有没有什么办法可以获取一个用户所有帖子的URL,从而可以下载该用户发布的所有图片?
这应该有效:
browser.get("https://instagram.com/"+username)
# Click on the first post
browser.find_element_by_xpath("/html/body/div[1]/section/main/div/div[3]/article/div/div/div[1]/div[1]/a").click()
# Wait a second or two for the post to load using either the `time` module or whatever way you want
posts = []
while True:
try:
posts.append(browser.current_url)
arrow = browser.find_element_by_class_name("coreSpriteRightPaginationArrow")
arrow.click()
except:
# Arrow was not found, so you must be on the last post
break
print(posts)
我写了一小段代码来从 Instagram 下载图片。这是代码:
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as req
import urllib.request as reqq
from selenium import webdriver
import os
browser = webdriver.Chrome("D:\Python_Files\Programs\chromedriver.exe")
url = "https://www.instagram.com/p/CFRY7X2AnOx/"
browser.get(url)
image_url = browser.find_element_by_class_name('KL4Bh').find_element_by_tag_name('img').get_attribute('src')
reqq.urlretrieve(image_url,"D:\instaimg.jpg")
这很好用。但是这样只能下载一张图片。有没有什么办法可以获取一个用户所有帖子的URL,从而可以下载该用户发布的所有图片?
这应该有效:
browser.get("https://instagram.com/"+username)
# Click on the first post
browser.find_element_by_xpath("/html/body/div[1]/section/main/div/div[3]/article/div/div/div[1]/div[1]/a").click()
# Wait a second or two for the post to load using either the `time` module or whatever way you want
posts = []
while True:
try:
posts.append(browser.current_url)
arrow = browser.find_element_by_class_name("coreSpriteRightPaginationArrow")
arrow.click()
except:
# Arrow was not found, so you must be on the last post
break
print(posts)