如何使用 python flask 从维基百科页面获取 table 数据
How to get table data from Wikipedia page using python flask
我尝试使用 BeautifulSoup 和 Pandas 从 IBM 抓取 wiki table。但我无法使用烧瓶显示结果。可能是什么原因?
这是我的代码:
from flask import Flask
app = Flask(__name__)
import pandas as pd
import requests
from bs4 import BeautifulSoup
@app.route('/')
def table():
url = "https://en.wikipedia.org/wiki/IBM"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table=soup.find('table',{'class':'wikitable float-left'}).tbody
rows=table.find_all('tr')
columns=[v.text.replace('\n', '')for v in rows [0].find_all('th')]
return(columns)
df= pd.DataFrame(columns=columns)
for i in range(1, len(rows)):
tds=rows[i].find_all('td')
if len(tds)==6:
values=[tds[0].text.replace('\n',''),tds[1].text.replace('\n',''),tds[2].text.replace('\n',''), tds[3].text.replace('\n',''),tds[4].text.replace('\n',''),tds[5].text.replace('\n', ' ').replace('\n', ' ')]
df=df.append(pd.Series(values,index=columns),ignore_index=True)
return(df)
if __name__ == '__main__':
app.run(debug=True)
这是您的 python 文件:
from flask import Flask, render_template
app = Flask(__name__)
import pandas as pd
import requests
from bs4 import BeautifulSoup
@app.route('/')
def table():
url = "https://en.wikipedia.org/wiki/IBM"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table=soup.find('table',{'class':'wikitable float-left'}).tbody
rows=table.find_all('tr')
columns=[v.text.replace('\n', '')for v in rows [0].find_all('th')]
# return(columns)
df= pd.DataFrame(columns=columns)
for i in range(1, len(rows)):
tds=rows[i].find_all('td')
if len(tds)==6:
values=[tds[0].text.replace('\n',''),tds[1].text.replace('\n',''),tds[2].text.replace('\n',''), tds[3].text.replace('\n',''),tds[4].text.replace('\n',''),tds[5].text.replace('\n', ' ').replace('\n', ' ')]
df=df.append(pd.Series(values,index=columns),ignore_index=True)
return render_template('index.html', tables=[df.to_html(classes='data')], titles=df.columns.values)
if __name__ == '__main__':
app.run(debug=True)
这是您的 html 文件,它应该位于您的 project/templates 路径中:
{% for table in tables %}
{{titles[loop.index]}}
{{ table|safe }}
{% endfor %}
使用 pandas.to_html 和 return 将数据帧转换为 html。
from flask import Flask,render_template
app = Flask(__name__)
import pandas as pd
import requests
from bs4 import BeautifulSoup
@app.route('/')
def table():
data=[]
url = "https://en.wikipedia.org/wiki/IBM"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table=soup.find('table',{'class':'wikitable float-left'})
rows=table.find_all('tr')
for row in rows:
data.append([cell.text.replace('\n', '')for cell in row.find_all(['th', 'td'])])
df = pd.DataFrame(data[1:],columns=data[0])
return df.to_html(header="true", table_id="table")
if __name__ == '__main__':
app.run(debug=True)
我尝试使用 BeautifulSoup 和 Pandas 从 IBM 抓取 wiki table。但我无法使用烧瓶显示结果。可能是什么原因?
这是我的代码:
from flask import Flask
app = Flask(__name__)
import pandas as pd
import requests
from bs4 import BeautifulSoup
@app.route('/')
def table():
url = "https://en.wikipedia.org/wiki/IBM"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table=soup.find('table',{'class':'wikitable float-left'}).tbody
rows=table.find_all('tr')
columns=[v.text.replace('\n', '')for v in rows [0].find_all('th')]
return(columns)
df= pd.DataFrame(columns=columns)
for i in range(1, len(rows)):
tds=rows[i].find_all('td')
if len(tds)==6:
values=[tds[0].text.replace('\n',''),tds[1].text.replace('\n',''),tds[2].text.replace('\n',''), tds[3].text.replace('\n',''),tds[4].text.replace('\n',''),tds[5].text.replace('\n', ' ').replace('\n', ' ')]
df=df.append(pd.Series(values,index=columns),ignore_index=True)
return(df)
if __name__ == '__main__':
app.run(debug=True)
这是您的 python 文件:
from flask import Flask, render_template
app = Flask(__name__)
import pandas as pd
import requests
from bs4 import BeautifulSoup
@app.route('/')
def table():
url = "https://en.wikipedia.org/wiki/IBM"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table=soup.find('table',{'class':'wikitable float-left'}).tbody
rows=table.find_all('tr')
columns=[v.text.replace('\n', '')for v in rows [0].find_all('th')]
# return(columns)
df= pd.DataFrame(columns=columns)
for i in range(1, len(rows)):
tds=rows[i].find_all('td')
if len(tds)==6:
values=[tds[0].text.replace('\n',''),tds[1].text.replace('\n',''),tds[2].text.replace('\n',''), tds[3].text.replace('\n',''),tds[4].text.replace('\n',''),tds[5].text.replace('\n', ' ').replace('\n', ' ')]
df=df.append(pd.Series(values,index=columns),ignore_index=True)
return render_template('index.html', tables=[df.to_html(classes='data')], titles=df.columns.values)
if __name__ == '__main__':
app.run(debug=True)
这是您的 html 文件,它应该位于您的 project/templates 路径中:
{% for table in tables %}
{{titles[loop.index]}}
{{ table|safe }}
{% endfor %}
使用 pandas.to_html 和 return 将数据帧转换为 html。
from flask import Flask,render_template
app = Flask(__name__)
import pandas as pd
import requests
from bs4 import BeautifulSoup
@app.route('/')
def table():
data=[]
url = "https://en.wikipedia.org/wiki/IBM"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table=soup.find('table',{'class':'wikitable float-left'})
rows=table.find_all('tr')
for row in rows:
data.append([cell.text.replace('\n', '')for cell in row.find_all(['th', 'td'])])
df = pd.DataFrame(data[1:],columns=data[0])
return df.to_html(header="true", table_id="table")
if __name__ == '__main__':
app.run(debug=True)