Python-Webscrape Linkedin 列表
Python-Webscrape Linkedin Listing
代码中的 url 指向 LinkedIn 上的多个列表
我只想获取每个列表的 link/href,但输出结果为空白。我只想要每个列表的 html。
import pandas as pd
from bs4 import BeautifulSoup
import csv
import requests
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'}
url='https://www.linkedin.com/jobs/search/?currentJobId=2213597199&geoId=103644278&keywords=cyber%20analyst&location=United%20States&start=25'
r=requests.get(url,headers)
soup=BeautifulSoup(r.content,'html.parser')
listing=soup.find_all('div',class_="job-card-container relative job-card-list job-card-container--clickable job-card-list--underline-title-on-hover jobs-search-results-list__list-item--active jobs-search-two-pane__job-card-container--viewport-tracking-1")
for info in listing:
link= info.find('a',href=True)
print(link)
根据评论中的建议,您可能想 selenium
试一试。
以下是获取所有工作机会的方法,请点击链接:
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)
url = "https://www.linkedin.com/jobs/search/?currentJobId=2213597199&geoId=103644278&keywords=cyber%20analyst&location=United%20States&start=0&redirect=false"
driver.get(url)
time.sleep(2)
elements = driver.find_elements_by_class_name("result-card__full-card-link")
job_links = [e.get_attribute("href") for e in elements]
for job_link in job_links:
print(job_link)
输出:
https://www.linkedin.com/jobs/view/cyber-threat-intelligence-analyst-at-linkedin-2261917520?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=1&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
https://www.linkedin.com/jobs/view/cyber-security-analyst-at-modis-2273028250?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=2&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
https://www.linkedin.com/jobs/view/jr-python-cyber-analyst-ts-sci-at-deloitte-2265989857?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=3&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
https://www.linkedin.com/jobs/view/cyber-security-analyst-at-modis-2307968344?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=4&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
https://www.linkedin.com/jobs/view/entry-level-cyber-security-analyst-at-hcl-technologies-2271846580?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=5&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
and so on ..
您想要的 class 在这里:
代码中的 url 指向 LinkedIn 上的多个列表
我只想获取每个列表的 link/href,但输出结果为空白。我只想要每个列表的 html。
import pandas as pd
from bs4 import BeautifulSoup
import csv
import requests
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'}
url='https://www.linkedin.com/jobs/search/?currentJobId=2213597199&geoId=103644278&keywords=cyber%20analyst&location=United%20States&start=25'
r=requests.get(url,headers)
soup=BeautifulSoup(r.content,'html.parser')
listing=soup.find_all('div',class_="job-card-container relative job-card-list job-card-container--clickable job-card-list--underline-title-on-hover jobs-search-results-list__list-item--active jobs-search-two-pane__job-card-container--viewport-tracking-1")
for info in listing:
link= info.find('a',href=True)
print(link)
根据评论中的建议,您可能想 selenium
试一试。
以下是获取所有工作机会的方法,请点击链接:
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)
url = "https://www.linkedin.com/jobs/search/?currentJobId=2213597199&geoId=103644278&keywords=cyber%20analyst&location=United%20States&start=0&redirect=false"
driver.get(url)
time.sleep(2)
elements = driver.find_elements_by_class_name("result-card__full-card-link")
job_links = [e.get_attribute("href") for e in elements]
for job_link in job_links:
print(job_link)
输出:
https://www.linkedin.com/jobs/view/cyber-threat-intelligence-analyst-at-linkedin-2261917520?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=1&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
https://www.linkedin.com/jobs/view/cyber-security-analyst-at-modis-2273028250?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=2&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
https://www.linkedin.com/jobs/view/jr-python-cyber-analyst-ts-sci-at-deloitte-2265989857?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=3&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
https://www.linkedin.com/jobs/view/cyber-security-analyst-at-modis-2307968344?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=4&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
https://www.linkedin.com/jobs/view/entry-level-cyber-security-analyst-at-hcl-technologies-2271846580?refId=b5cf1ce3-d032-4aaa-8810-26d4782cc34d&position=5&pageNum=0&trk=public_jobs_job-result-card_result-card_full-click
and so on ..
您想要的 class 在这里: