Python: href 标签类型错误
Python: href tag TypeError
我尝试了 运行 我的网络抓取代码。有时它工作正常,但有时它会给我回溯类型错误代码。我想知道是什么导致了错误代码?
错误信息如下:
Traceback (most recent call last):
File "D:\python-learning\listings.py", line 22, in <module>
pageLink='https://www.vancouverforsale.ca'+getData(pageLink)
File "D:\python-learning\listings.py", line 17, in getData
return nextLink['href']
TypeError: 'NoneType' object is not subscriptable
from bs4 import BeautifulSoup
import lxml
import requests
def getData(url):
html_text = requests.get(url).text
soup = BeautifulSoup(html_text,'lxml')
listings = soup.find_all('div', class_ = 'row property results')
for listing in listings:
address = listing.find('a', class_ = 'address').text
price = listing.find('a', class_ = 'price').text
print(address)
print(price)
#find next page
nextLink=soup.find('a', string='Next »')
return nextLink['href']
pageLink='https://www.vancouverforsale.ca/search/results/?
city=Langley®ion=all&list_price_min=50000&list_price_max=a
ll&beds_min=all&baths_min=all&type=con'
count=0
while count<3:
pageLink='https://www.vancouverforsale.ca'+getData(pageLink)
count+=1
在尝试获取 ['href']
之前,您必须检查 nextLink
是否不是 None
next_link = soup.find('a', string='Next »')
if next_link:
return 'https://www.vancouverforsale.ca' + next_link['href']
当 nextLink
是 None
那么它可以 return None
并且你必须在主循环中检查它
for count in range(3):
page_link = get_data(page_link)
if not page_link:
break
完整的工作代码
import requests
from bs4 import BeautifulSoup
#import urllib.parse
# PEP8: `lower_case_names` for functions and variables
def get_data(url):
response = requests.get(url)
#print(response.status_code)
soup = BeautifulSoup(response.text, 'lxml')
listings = soup.find_all('div', class_='row property results')
for listing in listings:
address = listing.find('a', class_='address').text.strip() # PEP8: `=` without spaces inside `()`
price = listing.find('a', class_='price').text.replace('▲', '').replace('▼', '').strip()
print('address:', address)
print('price :', price)
print('---')
# find next page
next_link = soup.find('a', string='Next »')
if next_link:
#return urllib.parse.urljoin('https://www.vancouverforsale.ca', next_link['href'])
return 'https://www.vancouverforsale.ca' + next_link['href']
# --- main ---
page_link = 'https://www.vancouverforsale.ca/search/results/?city=Langley®ion=all&list_price_min=50000&list_price_max=all&beds_min=all&baths_min=all&type=con'
#while True:
for count in range(3):
page_link = get_data(page_link)
if not page_link:
break
结果:
address: 19681 75 Avenue, Langley
price : ,695,000
---
address: 20806 52a Avenue, Langley
price : ,649,900
---
address: 20804 52a Avenue, Langley
price : ,649,900
---
address: 7138 210 Street Unit 43, Langley
price : ,638,000
---
address: 8567 204 Street Unit 13, Langley
price : ,624,999
---
address: 19842 75b Avenue, Langley
price : ,599,000
---
address: 8567 204 Street Unit 1, Langley
price : ,598,000
---
address: 8258 202 Street, Langley
price : ,588,800
---
address: 7138 210 Street Unit 59, Langley
price : ,579,000
---
address: 8567 204 Street Unit 3, Langley
price : ,499,900
---
address: 7429 197 Street, Langley
price : ,489,900
---
address: 22981 Billy Brown Road, Langley
price : ,399,000
---
address: 23168 Billy Brown Road, Langley
price : ,399,000
---
address: 26718 32 Avenue, Langley
price : ,399,000
---
address: 20327 82 Avenue, Langley
price : ,395,000
---
address: 8567 204 Street Unit 7, Langley
price : ,390,000
---
address: 20873 71b Avenue, Langley
price : ,388,000
---
address: 20321 80 Avenue Unit 27, Langley
price : ,370,000
---
address: 20924 80a Avenue, Langley
price : ,350,000
---
address: 20463 70 Avenue Unit 2, Langley
price : ,349,900
---
address: 23189 Francis Avenue Unit 203, Langley
price : ,349,000
---
address: 20576 84a Avenue, Langley
price : ,349,000
---
address: 20451 84 Avenue Unit 10, Langley
price : ,348,000
---
address: 7138 210 Street Unit 85, Langley
price : ,348,000
---
address: 19897 75a Avenue Unit 46, Langley
price : ,325,000
---
address: 9567 217a Street Unit 3, Langley
price : ,299,900
---
address: 20321 80 Avenue Unit 45, Langley
price : ,299,900
---
address: 9762 182a Street Unit 21, Langley
price : ,298,888
---
address: 8450 204 Street Unit 29, Langley
price : ,258,000
---
address: 20770 97b Avenue Unit 3, Langley
price : ,250,000
---
我尝试了 运行 我的网络抓取代码。有时它工作正常,但有时它会给我回溯类型错误代码。我想知道是什么导致了错误代码?
错误信息如下:
Traceback (most recent call last):
File "D:\python-learning\listings.py", line 22, in <module>
pageLink='https://www.vancouverforsale.ca'+getData(pageLink)
File "D:\python-learning\listings.py", line 17, in getData
return nextLink['href']
TypeError: 'NoneType' object is not subscriptable
from bs4 import BeautifulSoup
import lxml
import requests
def getData(url):
html_text = requests.get(url).text
soup = BeautifulSoup(html_text,'lxml')
listings = soup.find_all('div', class_ = 'row property results')
for listing in listings:
address = listing.find('a', class_ = 'address').text
price = listing.find('a', class_ = 'price').text
print(address)
print(price)
#find next page
nextLink=soup.find('a', string='Next »')
return nextLink['href']
pageLink='https://www.vancouverforsale.ca/search/results/?
city=Langley®ion=all&list_price_min=50000&list_price_max=a
ll&beds_min=all&baths_min=all&type=con'
count=0
while count<3:
pageLink='https://www.vancouverforsale.ca'+getData(pageLink)
count+=1
在尝试获取 ['href']
nextLink
是否不是 None
next_link = soup.find('a', string='Next »')
if next_link:
return 'https://www.vancouverforsale.ca' + next_link['href']
当 nextLink
是 None
那么它可以 return None
并且你必须在主循环中检查它
for count in range(3):
page_link = get_data(page_link)
if not page_link:
break
完整的工作代码
import requests
from bs4 import BeautifulSoup
#import urllib.parse
# PEP8: `lower_case_names` for functions and variables
def get_data(url):
response = requests.get(url)
#print(response.status_code)
soup = BeautifulSoup(response.text, 'lxml')
listings = soup.find_all('div', class_='row property results')
for listing in listings:
address = listing.find('a', class_='address').text.strip() # PEP8: `=` without spaces inside `()`
price = listing.find('a', class_='price').text.replace('▲', '').replace('▼', '').strip()
print('address:', address)
print('price :', price)
print('---')
# find next page
next_link = soup.find('a', string='Next »')
if next_link:
#return urllib.parse.urljoin('https://www.vancouverforsale.ca', next_link['href'])
return 'https://www.vancouverforsale.ca' + next_link['href']
# --- main ---
page_link = 'https://www.vancouverforsale.ca/search/results/?city=Langley®ion=all&list_price_min=50000&list_price_max=all&beds_min=all&baths_min=all&type=con'
#while True:
for count in range(3):
page_link = get_data(page_link)
if not page_link:
break
结果:
address: 19681 75 Avenue, Langley
price : ,695,000
---
address: 20806 52a Avenue, Langley
price : ,649,900
---
address: 20804 52a Avenue, Langley
price : ,649,900
---
address: 7138 210 Street Unit 43, Langley
price : ,638,000
---
address: 8567 204 Street Unit 13, Langley
price : ,624,999
---
address: 19842 75b Avenue, Langley
price : ,599,000
---
address: 8567 204 Street Unit 1, Langley
price : ,598,000
---
address: 8258 202 Street, Langley
price : ,588,800
---
address: 7138 210 Street Unit 59, Langley
price : ,579,000
---
address: 8567 204 Street Unit 3, Langley
price : ,499,900
---
address: 7429 197 Street, Langley
price : ,489,900
---
address: 22981 Billy Brown Road, Langley
price : ,399,000
---
address: 23168 Billy Brown Road, Langley
price : ,399,000
---
address: 26718 32 Avenue, Langley
price : ,399,000
---
address: 20327 82 Avenue, Langley
price : ,395,000
---
address: 8567 204 Street Unit 7, Langley
price : ,390,000
---
address: 20873 71b Avenue, Langley
price : ,388,000
---
address: 20321 80 Avenue Unit 27, Langley
price : ,370,000
---
address: 20924 80a Avenue, Langley
price : ,350,000
---
address: 20463 70 Avenue Unit 2, Langley
price : ,349,900
---
address: 23189 Francis Avenue Unit 203, Langley
price : ,349,000
---
address: 20576 84a Avenue, Langley
price : ,349,000
---
address: 20451 84 Avenue Unit 10, Langley
price : ,348,000
---
address: 7138 210 Street Unit 85, Langley
price : ,348,000
---
address: 19897 75a Avenue Unit 46, Langley
price : ,325,000
---
address: 9567 217a Street Unit 3, Langley
price : ,299,900
---
address: 20321 80 Avenue Unit 45, Langley
price : ,299,900
---
address: 9762 182a Street Unit 21, Langley
price : ,298,888
---
address: 8450 204 Street Unit 29, Langley
price : ,258,000
---
address: 20770 97b Avenue Unit 3, Langley
price : ,250,000
---