Python 在 table 行中查找数据
Python find data in table row
假设我有一个 html table 有 10 列和 100 行我想要做的就是使用 Beautifulsoup 来查找数据如果退出并打印完整行。
import urllib3
from bs4 import BeautifulSou
http = urllib3.PoolManager()
url "https://en.wikipedia.org/wiki/List_of_Asian_countries_by_area"
response = http.request('GET', url)
soup = BeautifulSoup(response.data, "html.parser")
for tr in soup.find_all('tr')[2:]:
tds = tr.find_all('td')
print(tds)
编辑:
import urllib3
import re
from bs4 import BeautifulSoup
def searchTableForCountry(country):
outlist = []
http = urllib3.PoolManager()
url = "https://en.wikipedia.org/wiki/List_of_Asian_countries_by_area"
response = http.request('GET', url)
soup = BeautifulSoup(response.data, "html.parser")
for td in soup.find_all('tr'):
if(len(td.find_all('a', {'href': re.compile(r'/wiki/' + country)})) >=1):
outlist.append(td.text.replace('\n',' ').replace('\xa0',' '))
print(''.join(outlist))
return outlist
searchTableForCountry('China')
#2 China 9,596,961 excludes Hong Kong, Macau, Taiwan and disputed areas/islands
假设我有一个 html table 有 10 列和 100 行我想要做的就是使用 Beautifulsoup 来查找数据如果退出并打印完整行。
import urllib3
from bs4 import BeautifulSou
http = urllib3.PoolManager()
url "https://en.wikipedia.org/wiki/List_of_Asian_countries_by_area"
response = http.request('GET', url)
soup = BeautifulSoup(response.data, "html.parser")
for tr in soup.find_all('tr')[2:]:
tds = tr.find_all('td')
print(tds)
编辑:
import urllib3
import re
from bs4 import BeautifulSoup
def searchTableForCountry(country):
outlist = []
http = urllib3.PoolManager()
url = "https://en.wikipedia.org/wiki/List_of_Asian_countries_by_area"
response = http.request('GET', url)
soup = BeautifulSoup(response.data, "html.parser")
for td in soup.find_all('tr'):
if(len(td.find_all('a', {'href': re.compile(r'/wiki/' + country)})) >=1):
outlist.append(td.text.replace('\n',' ').replace('\xa0',' '))
print(''.join(outlist))
return outlist
searchTableForCountry('China')
#2 China 9,596,961 excludes Hong Kong, Macau, Taiwan and disputed areas/islands