在 python 中使用 beautifulsoup 抓取 IMDB.com 但无法从电影 link 中获取 href
Scraping IMDB.com with beautifulsoup in python but can't get href from movie link
我正在尝试获取电影的 href link(例如:在 IMDB 上搜索钢铁侠),但我似乎无法获取。当我 运行 代码时,我一直得到 "None" 但是如果我删除 .get('href'),代码将 return html 的整行(包括我想要的link)。感谢您对此提供的任何帮助。谢谢!
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin # For joining next page url with base url
search_terms = input("What movie do you want to know about?\n> ").split()
url = "http://www.imdb.com/find?ref_=nv_sr_fn&q=" + '+'.join(search_terms) + '&s=all'
def scrape_find_next_page(url):
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
next_page = soup.find('td', 'result_text').get('href')
return next_page
next_page_url = scrape_find_next_page(url)
您正在尝试从 td
获取 href
,该属性不存在。您需要获取包含 href
属性的 a
标签
next_page = soup.find('td', 'result_text').find('a').get('href')
我正在尝试获取电影的 href link(例如:在 IMDB 上搜索钢铁侠),但我似乎无法获取。当我 运行 代码时,我一直得到 "None" 但是如果我删除 .get('href'),代码将 return html 的整行(包括我想要的link)。感谢您对此提供的任何帮助。谢谢!
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin # For joining next page url with base url
search_terms = input("What movie do you want to know about?\n> ").split()
url = "http://www.imdb.com/find?ref_=nv_sr_fn&q=" + '+'.join(search_terms) + '&s=all'
def scrape_find_next_page(url):
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
next_page = soup.find('td', 'result_text').get('href')
return next_page
next_page_url = scrape_find_next_page(url)
您正在尝试从 td
获取 href
,该属性不存在。您需要获取包含 href
属性的 a
标签
next_page = soup.find('td', 'result_text').find('a').get('href')