如何查找 BeautifulSoup 中的字符串
How to find string like in BeautifulSoup
如果我只知道 bs4 中兄弟姐妹标题的一半,我该如何抓取?
from urllib.request import urlopen
from bs4 import BeautifulSoup as BS
from urllib import request
import pandas as pd
import os
import re
html = request.urlopen(https://en.wikipedia.org/wiki/Charles_Ehresmann)
bs = BS(html.read(), 'html.parser')
data = pd.DataFrame({''known for':[],)}
try:
name = bs.find('h1').text
except:
name = ''
try:
known = bs.find('th',string = 'Known.*').next_element.text #?
except:
known = ''
谢谢你的想法
您可以使用 :contains 和 next_sibling
from bs4 import BeautifulSoup as bs
import requests
r = requests.get('https://en.wikipedia.org/wiki/Charles_Ehresmann')
soup = bs(r.text, 'lxml')
print(soup.select_one('th:contains("Known")').next_sibling.get_text('\n').split('\n'))
不是列表:
print(soup.select_one('th:contains("Known")').next_sibling.get_text('\n'))
如果我只知道 bs4 中兄弟姐妹标题的一半,我该如何抓取?
from urllib.request import urlopen
from bs4 import BeautifulSoup as BS
from urllib import request
import pandas as pd
import os
import re
html = request.urlopen(https://en.wikipedia.org/wiki/Charles_Ehresmann)
bs = BS(html.read(), 'html.parser')
data = pd.DataFrame({''known for':[],)}
try:
name = bs.find('h1').text
except:
name = ''
try:
known = bs.find('th',string = 'Known.*').next_element.text #?
except:
known = ''
谢谢你的想法
您可以使用 :contains 和 next_sibling
from bs4 import BeautifulSoup as bs
import requests
r = requests.get('https://en.wikipedia.org/wiki/Charles_Ehresmann')
soup = bs(r.text, 'lxml')
print(soup.select_one('th:contains("Known")').next_sibling.get_text('\n').split('\n'))
不是列表:
print(soup.select_one('th:contains("Known")').next_sibling.get_text('\n'))