所有行都期望第 0 行
All the rows expect 0th Row
from bs4 import BeautifulSoup
import urllib2
from lxml.html import fromstring
import re
import csv
import pandas as pd
wiki = "http://en.wikipedia.org/wiki/List_of_Test_cricket_records"
header = {'User-Agent': 'Mozilla/5.0'} #Needed to prevent 403 error on Wikipedia
req = urllib2.Request(wiki,headers=header)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page)
try:
table = soup.find_all('table')[1]
except AttributeError as e:
print 'No tables found, exiting'
#gets all the tr tags
try:
rows = table.find_all('tr')
except AttributeError as e:
print 'No table rows found, exiting'
#gets only the 0th row
try:
first = table.find_all('tr')[0]
except AttributeError as e:
print 'No table row found, exiting'
#how to get all rows expect the 0th one??
try:
allRows = table.find_all('tr')
except AttributeError as e:
print 'No table row found, exiting'
print allRows
我正在寻找一种方法来获取除第 0 行以外的所有行?
我知道如何获得第 0 行或任何特定行。但我希望每个 'tr' 标签/行都期望第 0 行。
有什么建议
find_all()
returns a ResultSet
instance which is a subclass of a list which you can slice:
table.find_all('tr')[1:]
from bs4 import BeautifulSoup
import urllib2
from lxml.html import fromstring
import re
import csv
import pandas as pd
wiki = "http://en.wikipedia.org/wiki/List_of_Test_cricket_records"
header = {'User-Agent': 'Mozilla/5.0'} #Needed to prevent 403 error on Wikipedia
req = urllib2.Request(wiki,headers=header)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page)
try:
table = soup.find_all('table')[1]
except AttributeError as e:
print 'No tables found, exiting'
#gets all the tr tags
try:
rows = table.find_all('tr')
except AttributeError as e:
print 'No table rows found, exiting'
#gets only the 0th row
try:
first = table.find_all('tr')[0]
except AttributeError as e:
print 'No table row found, exiting'
#how to get all rows expect the 0th one??
try:
allRows = table.find_all('tr')
except AttributeError as e:
print 'No table row found, exiting'
print allRows
我正在寻找一种方法来获取除第 0 行以外的所有行? 我知道如何获得第 0 行或任何特定行。但我希望每个 'tr' 标签/行都期望第 0 行。
有什么建议
find_all()
returns a ResultSet
instance which is a subclass of a list which you can slice:
table.find_all('tr')[1:]