AttributeError: 'NoneType' object has no attribute 'get_text', I've tried some other answers from the platform,but it is useless
AttributeError: 'NoneType' object has no attribute 'get_text', I've tried some other answers from the platform,but it is useless
这是我的代码:
enter code here
import urllib.request
import re
from bs4 import BeautifulSoup
URLdict=dict()
class M1905:
def __init__(self,baseurl):
self.baseURL=baseurl
self.user_agent = 'Chrome/58.0(compatible;MSIE 5.5; Windows 10)'
self.headers = {'User-Agent': self.user_agent}
def getPage(self,pageNum):
url=self.baseURL+'?refresh=1321407488&page='+str(pageNum)
request=urllib.request.Request(url,headers=self.headers)
response=urllib.request.urlopen(request)
first=response.read().decode('utf-8')
BSobj = BeautifulSoup(first, "html.parser")
for a in BSobj.findAll("a", href=True):
if re.findall('/news/', a['href']):
URLdict[a['href']] = a.get_text()
for link, title in URLdict.items():
print(title, ":", link)
ContentRequest = urllib.request.Request(link, headers=self.headers)
ContentResponse = urllib.request.urlopen(ContentRequest)
ContentHTMLText = ContentResponse.read().decode('utf-8')
ContentBSobj = BeautifulSoup(ContentHTMLText, "html.parser")
Content = ContentBSobj.find("div", {"class": "mod-content"})
print(Content.get_text())
return first
baseURL='http://www.1905.com/list-p-catid-221.html'
m1905=M1905(baseURL)
m1905.getPage(1)
当我执行它的时候,它从网站上得到了一些消息。但是,这些消息的结尾有一个错误。可以看到:
enter code here
Traceback (most recent call last):
File "C:/Users/Heidy/PycharmProjects/untitled1/m1905.py", line 33, in
<module>
m1905.getPage(1)
File "C:/Users/Heidy/PycharmProjects/untitled1/m1905.py", line 29, in
getPage
print(Content.get_text())
AttributeError: 'NoneType' object has no attribute 'get_text'
我的代码有什么问题?等一个好人!谢谢!
还有一个问题是"how to delete the js code",我执行的时候,我的消息里有一些,像这样:
我发不了两个链接,所以我删除“//”。
var ATLASCONFIG = {
编号:“1191699”,
上一个url:"httpwww.1905.com/news/20170610/1191700.shtml#p1",
接下来url:"httpwww.1905.com/news/20170610/1191700.shtml#p1",
shareIframe:"http://www.1905.com/api/share2.php?id=1191699&title=%E3%80%8A%E6%B7%B1%E5%A4%9C%E9%A3%9F%E5%A0%82%E3%80%8B%E6%9B%9
D%E3%80%8A%E9%B1%BC%E6%9D%BE%E9%A5%AD%E3%80%8B%E5%89%A7%E7%85%A7+%E5%BE%90% E5%A8%87%E5%88%98%E6%98%8A%E7%84%B6%E7%BB%93%E7%BC%98&url=http%3A%2F%2Fwww.1905.com%2F%2Fnewgallery%2Fhdpic%2F1191699.shtml&img=http%3A%2F%2Fimage11.m1905.cn%2Fuploadfile%2F2017%2F0610%2F20170610020519474659_watermark.jpg&app_id= www&sign=af43563e8eacab2280a4a84f8bb016cb"
}
Content = ContentBSobj.find("div", {"class": "mod-content"})
如果CotnentBSobj
下方没有div
标签mod-content
class,ContentBSobj.find(..)
returnsNone
尝试从 None
对象的 get_text
属性访问导致 AttributeError
:
>>> Content.get_text
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'get_text'
为防止异常,您应该在调用 get_text()
:
之前检查 none
Content = ContentBSobj.find("div", {"class": "mod-content"})
if Content is not None:
print(Content.get_text())
这是我的代码:
enter code here
import urllib.request
import re
from bs4 import BeautifulSoup
URLdict=dict()
class M1905:
def __init__(self,baseurl):
self.baseURL=baseurl
self.user_agent = 'Chrome/58.0(compatible;MSIE 5.5; Windows 10)'
self.headers = {'User-Agent': self.user_agent}
def getPage(self,pageNum):
url=self.baseURL+'?refresh=1321407488&page='+str(pageNum)
request=urllib.request.Request(url,headers=self.headers)
response=urllib.request.urlopen(request)
first=response.read().decode('utf-8')
BSobj = BeautifulSoup(first, "html.parser")
for a in BSobj.findAll("a", href=True):
if re.findall('/news/', a['href']):
URLdict[a['href']] = a.get_text()
for link, title in URLdict.items():
print(title, ":", link)
ContentRequest = urllib.request.Request(link, headers=self.headers)
ContentResponse = urllib.request.urlopen(ContentRequest)
ContentHTMLText = ContentResponse.read().decode('utf-8')
ContentBSobj = BeautifulSoup(ContentHTMLText, "html.parser")
Content = ContentBSobj.find("div", {"class": "mod-content"})
print(Content.get_text())
return first
baseURL='http://www.1905.com/list-p-catid-221.html'
m1905=M1905(baseURL)
m1905.getPage(1)
当我执行它的时候,它从网站上得到了一些消息。但是,这些消息的结尾有一个错误。可以看到:
enter code here
Traceback (most recent call last):
File "C:/Users/Heidy/PycharmProjects/untitled1/m1905.py", line 33, in
<module>
m1905.getPage(1)
File "C:/Users/Heidy/PycharmProjects/untitled1/m1905.py", line 29, in
getPage
print(Content.get_text())
AttributeError: 'NoneType' object has no attribute 'get_text'
我的代码有什么问题?等一个好人!谢谢! 还有一个问题是"how to delete the js code",我执行的时候,我的消息里有一些,像这样: 我发不了两个链接,所以我删除“//”。
var ATLASCONFIG = { 编号:“1191699”, 上一个url:"httpwww.1905.com/news/20170610/1191700.shtml#p1", 接下来url:"httpwww.1905.com/news/20170610/1191700.shtml#p1", shareIframe:"http://www.1905.com/api/share2.php?id=1191699&title=%E3%80%8A%E6%B7%B1%E5%A4%9C%E9%A3%9F%E5%A0%82%E3%80%8B%E6%9B%9 D%E3%80%8A%E9%B1%BC%E6%9D%BE%E9%A5%AD%E3%80%8B%E5%89%A7%E7%85%A7+%E5%BE%90% E5%A8%87%E5%88%98%E6%98%8A%E7%84%B6%E7%BB%93%E7%BC%98&url=http%3A%2F%2Fwww.1905.com%2F%2Fnewgallery%2Fhdpic%2F1191699.shtml&img=http%3A%2F%2Fimage11.m1905.cn%2Fuploadfile%2F2017%2F0610%2F20170610020519474659_watermark.jpg&app_id= www&sign=af43563e8eacab2280a4a84f8bb016cb" }
Content = ContentBSobj.find("div", {"class": "mod-content"})
如果CotnentBSobj
下方没有div
标签mod-content
class,ContentBSobj.find(..)
returnsNone
尝试从 None
对象的 get_text
属性访问导致 AttributeError
:
>>> Content.get_text
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'get_text'
为防止异常,您应该在调用 get_text()
:
Content = ContentBSobj.find("div", {"class": "mod-content"})
if Content is not None:
print(Content.get_text())