在 Azure 上列出 object 到 DataFrame 到 MySQL 数据库
List object to DataFrame to MySQL DB on Azure
我使用 Selenium 和 Pandas 从网站上抓取 table,生成列表 object。然后我尝试将列表转换为 DataFrame 以将其写入 MySQL.
当我打印我的抓取结果时,它是一个很好的表格格式,具有清晰的行号和列 headers 但是当我这样做时 'len' 结果是 1.
我尝试了很多方法,确实需要一些帮助。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#import csv
from bs4 import BeautifulSoup
import pandas as pd
from pandas import DataFrame
import time
from sqlalchemy import create_engine
# base url
baseurl = "https://bitcoincharts.com/charts/bitstampUSD#igDailyztgSzm1g10zm2g25zv"
# selenium arguments
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path="C:/Users/mande/OneDrive/Knowledge/Python/chromedriver.exe")
# navigates to website
driver.get(baseurl)
# clicks "show raw data"
rawdata = driver.find_element_by_xpath("/html/body/div[5]/div/div[2]/a").click()
print("Sleeping 10 seconds")
time.sleep(10)
print("Continue")
soup = BeautifulSoup(driver.page_source, 'lxml')
tables = soup.find(id='chart_table')
df = pd.read_html(str(tables), header=0)
print(df[0])
driver.quit()
print(type(df))
print(len(df))
df2 = pd.DataFrame([df])
df2.columns = ['Rownumber', 'Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (USD)', 'Weighted Price']
engine = create_engine("mysql+pymysql://user:password@hostname/dbname"
.format(user="user",
pw="password",
db="dbname"))
DataFrame.to_sql(df2, name='bitcoin', con = engine, if_exists='append', chunksize=10000, index=False)
console:
ValueError: Length mismatch: Expected axis has 1 element, new values have 9 elements
替换这两行:
df2 = pd.DataFrame([df])
df2.columns = ['Rownumber', 'Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (USD)', 'Weighted Price']
和
df2 = df[0]
帮我修好了。
我使用 Selenium 和 Pandas 从网站上抓取 table,生成列表 object。然后我尝试将列表转换为 DataFrame 以将其写入 MySQL.
当我打印我的抓取结果时,它是一个很好的表格格式,具有清晰的行号和列 headers 但是当我这样做时 'len' 结果是 1.
我尝试了很多方法,确实需要一些帮助。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#import csv
from bs4 import BeautifulSoup
import pandas as pd
from pandas import DataFrame
import time
from sqlalchemy import create_engine
# base url
baseurl = "https://bitcoincharts.com/charts/bitstampUSD#igDailyztgSzm1g10zm2g25zv"
# selenium arguments
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.headless = True
options.add_argument("--window-size=1920,1200")
driver = webdriver.Chrome(options=options, executable_path="C:/Users/mande/OneDrive/Knowledge/Python/chromedriver.exe")
# navigates to website
driver.get(baseurl)
# clicks "show raw data"
rawdata = driver.find_element_by_xpath("/html/body/div[5]/div/div[2]/a").click()
print("Sleeping 10 seconds")
time.sleep(10)
print("Continue")
soup = BeautifulSoup(driver.page_source, 'lxml')
tables = soup.find(id='chart_table')
df = pd.read_html(str(tables), header=0)
print(df[0])
driver.quit()
print(type(df))
print(len(df))
df2 = pd.DataFrame([df])
df2.columns = ['Rownumber', 'Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (USD)', 'Weighted Price']
engine = create_engine("mysql+pymysql://user:password@hostname/dbname"
.format(user="user",
pw="password",
db="dbname"))
DataFrame.to_sql(df2, name='bitcoin', con = engine, if_exists='append', chunksize=10000, index=False)
console:
ValueError: Length mismatch: Expected axis has 1 element, new values have 9 elements
替换这两行:
df2 = pd.DataFrame([df])
df2.columns = ['Rownumber', 'Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume (BTC)', 'Volume (USD)', 'Weighted Price']
和
df2 = df[0]
帮我修好了。