Linkedin 的网页抓取
Web scraping for Linkedin
我目前正在使用 selenium 为 Linkedin Web Scraping 做一个大学项目。以下是相同的代码:
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.keys import Keys
from parsel import Selector
driver = webdriver.Chrome('location of web driver')
driver.get('https://www.linkedin.com')
# username
username = driver.find_element_by_id('session_key')
username.send_keys('Linkedin Username')
sleep(0.5)
# password
password = driver.find_element_by_id('session_password')
password.send_keys('Linkedin Password')
sleep(0.5)
#submit value
sign_in_button = driver.find_element_by_xpath('//*[@type="submit"]')
sign_in_button.click()
sleep(0.5)
driver.get('https://www.google.com/') #Navigate to google to search the profile
# locate search form by_name
search_query = driver.find_element_by_name('q')
# send_keys() to simulate the search text key strokes
search_query.send_keys('https://www.linkedin.com/in/khushi-thakkar-906b56188/')
sleep(0.5)
search_query.send_keys(Keys.RETURN)
sleep(3)
# locate the first link
search_person = driver.find_element_by_class_name('yuRUbf')
search_person.click()
#Experience
experience = driver.find_elements_by_css_selector('#experience-section .pv-profile-section')
for item in experience:
print(item.text)
print("")
#Education
education = driver.find_elements_by_css_selector('#education-section .pv-profile-section')
for item in education:
print(item.text)
print("")
#Certification
certification = driver.find_elements_by_css_selector('#certifications-section .pv-profile-section')
for item in certification:
print(item.text)
print("")
当我抓取体验部分时,它完美地提取了信息。但是当我对教育和认证部分执行相同操作时 - 它显示一个空列表。请帮忙!
我认为问题出在您的 css 选择器上。我自己试了一下,它无法在 html 主体
上找到任何元素
修正你的 css 选择器,你会没事的
#Education
education = driver.find_elements_by_css_selector('#education-section li')
#Certification
certification = driver.find_elements_by_css_selector('#certifications-section li')
我目前正在使用 selenium 为 Linkedin Web Scraping 做一个大学项目。以下是相同的代码:
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.keys import Keys
from parsel import Selector
driver = webdriver.Chrome('location of web driver')
driver.get('https://www.linkedin.com')
# username
username = driver.find_element_by_id('session_key')
username.send_keys('Linkedin Username')
sleep(0.5)
# password
password = driver.find_element_by_id('session_password')
password.send_keys('Linkedin Password')
sleep(0.5)
#submit value
sign_in_button = driver.find_element_by_xpath('//*[@type="submit"]')
sign_in_button.click()
sleep(0.5)
driver.get('https://www.google.com/') #Navigate to google to search the profile
# locate search form by_name
search_query = driver.find_element_by_name('q')
# send_keys() to simulate the search text key strokes
search_query.send_keys('https://www.linkedin.com/in/khushi-thakkar-906b56188/')
sleep(0.5)
search_query.send_keys(Keys.RETURN)
sleep(3)
# locate the first link
search_person = driver.find_element_by_class_name('yuRUbf')
search_person.click()
#Experience
experience = driver.find_elements_by_css_selector('#experience-section .pv-profile-section')
for item in experience:
print(item.text)
print("")
#Education
education = driver.find_elements_by_css_selector('#education-section .pv-profile-section')
for item in education:
print(item.text)
print("")
#Certification
certification = driver.find_elements_by_css_selector('#certifications-section .pv-profile-section')
for item in certification:
print(item.text)
print("")
当我抓取体验部分时,它完美地提取了信息。但是当我对教育和认证部分执行相同操作时 - 它显示一个空列表。请帮忙!
我认为问题出在您的 css 选择器上。我自己试了一下,它无法在 html 主体
上找到任何元素修正你的 css 选择器,你会没事的
#Education
education = driver.find_elements_by_css_selector('#education-section li')
#Certification
certification = driver.find_elements_by_css_selector('#certifications-section li')