尝试从带有链接的 .csv 中抓取 Instagram Post 数据 - 硕士论文
Trying to Scrape Instagram Post Data from .csv with links - For Masters Thesis
我正在尝试从 .csv 中的 collection 链接中抓取 instagram post 数据(点赞数、标题、标签、提及和评论数)以进行数据分析对我的硕士论文。但是我遇到了无法找到 xpath
或 element
的错误。这是错误消息:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button"}
这是我使用 selenium 编写的代码块:
def scrape_post_data():
influencerpostsdata = []
# Specify the path to chromedriver.exe
chromedriver_path = r"C:\Users\stuar\Instagram Scraper\ChromeDrivers\chromedriver.exe"
driver = webdriver.Chrome(executable_path=chromedriver_path)
time.sleep(2)
# Open the webpage
url = "https://www.instagram.com"
driver.get(url)
time.sleep(3)
# Alert number 1
time.sleep(5)
alert = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Accept All")]'))).click()
# Target Username Entry
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
# Enter Username and Password
login_username = str(enter_loginusername_entry.get())
login_password = str(enter_password_entry.get())
username.clear()
username.send_keys(login_username)
password.clear()
password.send_keys(login_password)
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
# Alert number 2
time.sleep(5)
alert2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
# Alert number 3
time.sleep(5)
alert3 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
with open(r"C:\Users\stuar\Instagram Scraper\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)
for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)
try:
# This captures the standard like count.
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button""").text.split()[0]
post_type = 'photo'
except:
# This captures the like count for videos which is stored
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/span""").text.split()[0]
post_type = 'video'
age = driver.find_element_by_css_selector('a time').text
comment = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span""").text
hashtags = find_hashtags(comment)
mentions = find_mentions(comment)
post_details = {'link': url, 'type': post_type, 'likes/views': likes,
'age': age, 'comment': comment, 'hashtags': hashtags,
'mentions': mentions}
time.sleep(10)
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerposts)
print(df)
df.to_csv('influencerpostsdata.csv')
driver.close()
不用担心,我已经解决了问题..
with open(r"C:\Users\stuar\Instagram Scraper\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)
for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)
try:
likes = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/section[2]/div/div/a/span')
except Exception as e:
likes = None
try:
likes2 = likes.text
except Exception as e:
likes2 = None
time.sleep(20)
try:
age = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[2]/a/time')
except Exception as e:
age = None
try:
age2 = age.text
except Exception as e:
age2 = None
time.sleep(20)
try:
caption = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[1]/ul/div/li/div/div/div[2]/span')
except Exception as e:
caption = None
try:
caption2 = caption.text
except Exception as e:
caption2 = None
time.sleep(20)
try:
AccountName = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/header/div[2]/div[1]/div/span/a')
except Exception as e:
AccountName = None
try:
AccountName2 = AccountName.text
except Exception as e:
AccountName2 = None
time.sleep(20)
post_details = {'Username': AccountName2,'Caption': caption2, 'Likes/Views': likes2,
'Age': age2 }
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerpostsdata)
print(df)
df.to_csv('influencerpostsdata.csv')
driver.close()
我正在尝试从 .csv 中的 collection 链接中抓取 instagram post 数据(点赞数、标题、标签、提及和评论数)以进行数据分析对我的硕士论文。但是我遇到了无法找到 xpath
或 element
的错误。这是错误消息:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button"}
这是我使用 selenium 编写的代码块:
def scrape_post_data():
influencerpostsdata = []
# Specify the path to chromedriver.exe
chromedriver_path = r"C:\Users\stuar\Instagram Scraper\ChromeDrivers\chromedriver.exe"
driver = webdriver.Chrome(executable_path=chromedriver_path)
time.sleep(2)
# Open the webpage
url = "https://www.instagram.com"
driver.get(url)
time.sleep(3)
# Alert number 1
time.sleep(5)
alert = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Accept All")]'))).click()
# Target Username Entry
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
# Enter Username and Password
login_username = str(enter_loginusername_entry.get())
login_password = str(enter_password_entry.get())
username.clear()
username.send_keys(login_username)
password.clear()
password.send_keys(login_password)
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[type='submit']"))).click()
# Alert number 2
time.sleep(5)
alert2 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
# Alert number 3
time.sleep(5)
alert3 = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Not Now")]'))).click()
with open(r"C:\Users\stuar\Instagram Scraper\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)
for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)
try:
# This captures the standard like count.
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/button""").text.split()[0]
post_type = 'photo'
except:
# This captures the like count for videos which is stored
likes = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/span""").text.split()[0]
post_type = 'video'
age = driver.find_element_by_css_selector('a time').text
comment = driver.find_element_by_xpath("""//*[@id="react-root"]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span""").text
hashtags = find_hashtags(comment)
mentions = find_mentions(comment)
post_details = {'link': url, 'type': post_type, 'likes/views': likes,
'age': age, 'comment': comment, 'hashtags': hashtags,
'mentions': mentions}
time.sleep(10)
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerposts)
print(df)
df.to_csv('influencerpostsdata.csv')
driver.close()
不用担心,我已经解决了问题..
with open(r"C:\Users\stuar\Instagram Scraper\SourceCode/influencerpostlinks1.csv",'r') as csv_file:
csv_reading = csv.reader(csv_file)
for line in csv_reading:
links = line[1]
try:
Page = driver.get(links)
except Exception as e:
Page = None
time.sleep(20)
try:
likes = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/section[2]/div/div/a/span')
except Exception as e:
likes = None
try:
likes2 = likes.text
except Exception as e:
likes2 = None
time.sleep(20)
try:
age = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[2]/a/time')
except Exception as e:
age = None
try:
age2 = age.text
except Exception as e:
age2 = None
time.sleep(20)
try:
caption = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/div[3]/div[1]/ul/div/li/div/div/div[2]/span')
except Exception as e:
caption = None
try:
caption2 = caption.text
except Exception as e:
caption2 = None
time.sleep(20)
try:
AccountName = driver.find_element_by_xpath('/html/body/div[1]/section/main/div/div[1]/article/header/div[2]/div[1]/div/span/a')
except Exception as e:
AccountName = None
try:
AccountName2 = AccountName.text
except Exception as e:
AccountName2 = None
time.sleep(20)
post_details = {'Username': AccountName2,'Caption': caption2, 'Likes/Views': likes2,
'Age': age2 }
#turning data into a .csv file
influencerpostsdata.append(post_details)
df = pd.DataFrame(influencerpostsdata)
print(df)
df.to_csv('influencerpostsdata.csv')
driver.close()