使用 bs4 时出现问题:'NoneType' 对象不可订阅
Problem while using bs4: 'NoneType' object is not subscriptable
我正在尝试抓取 Goodreads 页面以获取一本书的所有版本,但是当我 运行 代码时出现此错误:
Traceback (most recent call last):
File "C:/xxx/PycharmProjects/wikipedia_pageview/isbn.py", line 141, in <module>
ed_details = get_editions_details(isbn)
File "C:/xxx/PycharmProjects/wikipedia_pageview/isbn.py", line 79, in get_editions_details
if ed_link := f"https://www.goodreads.com{ed_item['href']}":...
TypeError: 'NoneType' object is not subscriptable
出于这个原因,我尝试在所选区域设置条件,但它们不起作用。
代码:
def get_editions_details(isbn):
# Create the search URL with the ISBN of the book
data = {'q': isbn}
book_url = get_page("https://www.goodreads.com/search", data)
#print(book_url)
# Parse the markup with Beautiful Soup
soup = bs(book_url.text, 'lxml')
# Retrieve from the book's page the link for other editions
# and the total number of editions
if ed_item := soup.find("div", class_="otherEditionsLink"):
if ed_item := ed_item.find("a"):
print(ed_item)
else:
pass
if ed_item:
ed_num = ed_item.text.strip().split(' ')[-1].strip('()')
if ed_link := f"https://www.goodreads.com{ed_item['href']}":#capire...
print(ed_link)
else:
pass
return((ed_link, int(ed_num), isbn))
if __name__ == "__main__":
try:
os.mkdir('./urls_files')
except Exception:
pass
isbns = get_isbn()
for isbn in isbns:
ed_details = get_editions_details(isbn)
get_editions_urls(ed_details)
会发生什么?
您的示例中的缩进似乎不正确,不会处理错误或丢失的 isbn 或版本链接。
如何修复?
在确定 ed_item
中存在 href
的那一刻将值分配给 ed_link
和 ed_num
,否则将它们设置为 None
或 0
或以其他方式处理这些问题:
def get_editions_details(isbn):
data = {'q': isbn}
book_url = requests.get("https://www.goodreads.com/search", data)
soup = bs(book_url.text, 'lxml')
ed_link = None
ed_num = 0
if ed_item := soup.find("div", class_="otherEditionsLink"):
if ed_item := ed_item.find("a"):
ed_link = f"https://www.goodreads.com{ed_item['href']}"
ed_num = ed_item.text.strip().split(' ')[-1].strip('()')
else:
pass
return((ed_link, int(ed_num), isbn))
if __name__ == "__main__":
#just as example to simulate an error
ed_details = get_editions_details(1)
if ed_details[0]:
get_editions_urls(ed_details)
else:
print(f'no editionlinks for isbn:{ed_details[2]}')
我正在尝试抓取 Goodreads 页面以获取一本书的所有版本,但是当我 运行 代码时出现此错误:
Traceback (most recent call last):
File "C:/xxx/PycharmProjects/wikipedia_pageview/isbn.py", line 141, in <module>
ed_details = get_editions_details(isbn)
File "C:/xxx/PycharmProjects/wikipedia_pageview/isbn.py", line 79, in get_editions_details
if ed_link := f"https://www.goodreads.com{ed_item['href']}":...
TypeError: 'NoneType' object is not subscriptable
出于这个原因,我尝试在所选区域设置条件,但它们不起作用。 代码:
def get_editions_details(isbn):
# Create the search URL with the ISBN of the book
data = {'q': isbn}
book_url = get_page("https://www.goodreads.com/search", data)
#print(book_url)
# Parse the markup with Beautiful Soup
soup = bs(book_url.text, 'lxml')
# Retrieve from the book's page the link for other editions
# and the total number of editions
if ed_item := soup.find("div", class_="otherEditionsLink"):
if ed_item := ed_item.find("a"):
print(ed_item)
else:
pass
if ed_item:
ed_num = ed_item.text.strip().split(' ')[-1].strip('()')
if ed_link := f"https://www.goodreads.com{ed_item['href']}":#capire...
print(ed_link)
else:
pass
return((ed_link, int(ed_num), isbn))
if __name__ == "__main__":
try:
os.mkdir('./urls_files')
except Exception:
pass
isbns = get_isbn()
for isbn in isbns:
ed_details = get_editions_details(isbn)
get_editions_urls(ed_details)
会发生什么?
您的示例中的缩进似乎不正确,不会处理错误或丢失的 isbn 或版本链接。
如何修复?
在确定 ed_item
中存在 href
的那一刻将值分配给 ed_link
和 ed_num
,否则将它们设置为 None
或 0
或以其他方式处理这些问题:
def get_editions_details(isbn):
data = {'q': isbn}
book_url = requests.get("https://www.goodreads.com/search", data)
soup = bs(book_url.text, 'lxml')
ed_link = None
ed_num = 0
if ed_item := soup.find("div", class_="otherEditionsLink"):
if ed_item := ed_item.find("a"):
ed_link = f"https://www.goodreads.com{ed_item['href']}"
ed_num = ed_item.text.strip().split(' ')[-1].strip('()')
else:
pass
return((ed_link, int(ed_num), isbn))
if __name__ == "__main__":
#just as example to simulate an error
ed_details = get_editions_details(1)
if ed_details[0]:
get_editions_urls(ed_details)
else:
print(f'no editionlinks for isbn:{ed_details[2]}')