使用 python 从 excel 文件导入数据到 SQL 服务器
Importing data from an excel file using python into SQL Server
我发现其他一些问题与我得到的错误类似,但无法根据答案找出解决方法。我试图在 python 的帮助下将 excel 文件导入 SQL 服务器。这是我写的代码:
import pandas as pd
import numpy as np
import pandas.io.sql
import pyodbc
import xlrd
server = "won't disclose private info"
db = 'private info'
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + Server + ';DATABASE=' +
db + ';Trusted_Connection=yes')
cursor = conn.cursor()
book = xlrd.open_workbook("Daily Flash.xlsx")
sheet = book.sheet_by_name("Sheet1")
query1 = """CREATE TABLE [LEAF].[MK] ([LEAF][Lease_Number] varchar(255),
[LEAF][Start_Date] varchar(255), [LEAF][Report_Status] varchar(255), [LEAF]
[Status_Date] varchar(255), [LEAF][Current_Status] varchar(255), [LEAF]
[Sales_Rep] varchar(255), [LEAF][Customer_Name] varchar(255),[LEAF]
[Total_Finance] varchar(255),
[LEAF][Rate_Class] varchar(255) ,[LEAF][Supplier_Name] varchar(255) ,[LEAF]
[DecisionStatus] varchar(255))"""
query = """INSERT INTO [LEAF].[MK] (Lease_Number, Start_Date, Report_Status,
Status_Date, Current_Status, Sales_Rep, Customer_Name,Total_Finance,
Rate_Class,Supplier_Name,DecisionStatus) VALUES (%s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s)"""
for r in range(1, sheet.nrows):
Lease_Number = sheet.cell(r,0).value
Start_Date = sheet.cell(r,1).value
Report_Status = sheet.cell(r,2).value
Status_Date = sheet.cell(r,3).value
Current_Status= sheet.cell(r,4).value
Sales_Rep = sheet.cell(r,5).value
Customer_Name = sheet.cell(r,6).value
Total_Financed= sheet.cell(r,7).value
Rate_Class = sheet.cell(r,8).value
Supplier_Name = sheet.cell(r,9).value
DecisionStatus= sheet.cell(r,10).value
values = (Lease_Number, Start_Date, Report_Status, Status_Date,
Current_Status, Sales_Rep, Customer_Name, Total_Financed, Rate_Class,
Supplier_Name, DecisionStatus)
cursor.execute(query1)
cursor.execute(query, values)
database.commit()
database.close()
database.commit()
我收到的错误信息是:
ProgrammingError Traceback (most recent call last)
<ipython-input-24-c525ebf0af73> in <module>()
16
17 # Execute sql Query
---> 18 cursor.execute(query, values)
19
20 # Commit the transaction
ProgrammingError: ('The SQL contains 0 parameter markers, but 11 parameters
were supplied', 'HY000')
有人可以向我解释一下这个问题以及我该如何解决吗?谢谢!
更新:
根据下面的评论,我已收到该错误消息。我还修改了我的查询,因为我试图向其中插入值的 table 以前没有创建过,所以我更新了我的代码以尝试创建它。
但是,现在我收到错误消息:
ProgrammingError: ('42000', '[42000] [Microsoft][ODBC SQL Server Driver][SQL
Server]The specified schema name "dbo" either does not exist or you do not
have permission to use it. (2760) (SQLExecDirectW)')
我尝试通过编写 CREATE [HELLO][MK] 而不是仅仅 CREATE MK 来稍微改变它,但这告诉我 MK 已经在数据库中......接下来我应该采取什么步骤?
根据我们聊天中的对话,这里有一些要点:
- 执行您的
CREATE TABLE
查询后,确保在 运行 任何后续 INSERT
查询之前立即提交它。
- 当 table 已经存在于数据库中时,使用错误捕获。您问如果要将更多数据导入 table,脚本是否仍然 运行。答案是否定的,因为 Python 会在
cursor.execute(query1)
. 抛出异常
- 如果您想验证您的插入操作是否成功,您可以做一个简单的记录计数检查。
编辑
昨天,当我让@mkheifetz 测试我的代码时,他发现了一个小错误,其中验证检查会 return False,原因是因为数据库已经有现有记录,所以当仅与当前数据进行比较时导入,验证将失败。因此,作为解决bug的方案,我再次修改了代码。
下面是我将如何修改您的代码:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas.io.sql
import pyodbc
import xlrd
server = 'XXXXX'
db = 'XXXXXdb'
# create Connection and Cursor objects
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server + ';DATABASE=' + db + ';Trusted_Connection=yes')
cursor = conn.cursor()
# read data
data = pd.read_excel('Flash Daily Apps through 070918.xls')
# rename columns
data = data.rename(columns={'Lease Number': 'Lease_Number',
'Start Date': 'Start_Date',
'Report Status': 'Report_Status',
'Status Date': 'Status_Date',
'Current Status': 'Current_Status',
'Sales Rep': 'Sales_Rep',
'Customer Name': 'Customer_Name',
'Total Financed': 'Total_Financed',
'Rate Class': 'Rate_Class',
'Supplier Name': 'Supplier_Name'})
# export
data.to_excel('Daily Flash.xlsx', index=False)
# Open the workbook and define the worksheet
book = xlrd.open_workbook("Daily Flash.xlsx")
sheet = book.sheet_by_name("Sheet1")
query1 = """
CREATE TABLE [LEAF].[ZZZ] (
Lease_Number varchar(255),
Start_Date varchar(255),
Report_Status varchar(255),
Status_Date varchar(255),
Current_Status varchar(255),
Sales_Rep varchar(255),
Customer_Name varchar(255),
Total_Finance varchar(255),
Rate_Class varchar(255),
Supplier_Name varchar(255),
DecisionStatus varchar(255)
)"""
query = """
INSERT INTO [LEAF].[ZZZ] (
Lease_Number,
Start_Date,
Report_Status,
Status_Date,
Current_Status,
Sales_Rep,
Customer_Name,
Total_Finance,
Rate_Class,
Supplier_Name,
DecisionStatus
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"""
# execute create table
try:
cursor.execute(query1)
conn.commit()
except pyodbc.ProgrammingError:
pass
# grab existing row count in the database for validation later
cursor.execute("SELECT count(*) FROM LEAF.ZZZ")
before_import = cursor.fetchone()
for r in range(1, sheet.nrows):
Lease_Number = sheet.cell(r,0).value
Start_Date = sheet.cell(r,1).value
Report_Status = sheet.cell(r,2).value
Status_Date = sheet.cell(r,3).value
Current_Status= sheet.cell(r,4).value
Sales_Rep = sheet.cell(r,5).value
Customer_Name = sheet.cell(r,6).value
Total_Financed= sheet.cell(r,7).value
Rate_Class = sheet.cell(r,8).value
Supplier_Name = sheet.cell(r,9).value
DecisionStatus= sheet.cell(r,10).value
# Assign values from each row
values = (Lease_Number, Start_Date, Report_Status, Status_Date, Current_Status,
Sales_Rep, Customer_Name, Total_Financed, Rate_Class, Supplier_Name,
DecisionStatus)
# Execute sql Query
cursor.execute(query, values)
# Commit the transaction
conn.commit()
# If you want to check if all rows are imported
cursor.execute("SELECT count(*) FROM LEAF.ZZZ")
result = cursor.fetchone()
print((result[0] - before_import[0]) == len(data.index)) # should be True
# Close the database connection
conn.close()
我发现其他一些问题与我得到的错误类似,但无法根据答案找出解决方法。我试图在 python 的帮助下将 excel 文件导入 SQL 服务器。这是我写的代码:
import pandas as pd
import numpy as np
import pandas.io.sql
import pyodbc
import xlrd
server = "won't disclose private info"
db = 'private info'
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + Server + ';DATABASE=' +
db + ';Trusted_Connection=yes')
cursor = conn.cursor()
book = xlrd.open_workbook("Daily Flash.xlsx")
sheet = book.sheet_by_name("Sheet1")
query1 = """CREATE TABLE [LEAF].[MK] ([LEAF][Lease_Number] varchar(255),
[LEAF][Start_Date] varchar(255), [LEAF][Report_Status] varchar(255), [LEAF]
[Status_Date] varchar(255), [LEAF][Current_Status] varchar(255), [LEAF]
[Sales_Rep] varchar(255), [LEAF][Customer_Name] varchar(255),[LEAF]
[Total_Finance] varchar(255),
[LEAF][Rate_Class] varchar(255) ,[LEAF][Supplier_Name] varchar(255) ,[LEAF]
[DecisionStatus] varchar(255))"""
query = """INSERT INTO [LEAF].[MK] (Lease_Number, Start_Date, Report_Status,
Status_Date, Current_Status, Sales_Rep, Customer_Name,Total_Finance,
Rate_Class,Supplier_Name,DecisionStatus) VALUES (%s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s)"""
for r in range(1, sheet.nrows):
Lease_Number = sheet.cell(r,0).value
Start_Date = sheet.cell(r,1).value
Report_Status = sheet.cell(r,2).value
Status_Date = sheet.cell(r,3).value
Current_Status= sheet.cell(r,4).value
Sales_Rep = sheet.cell(r,5).value
Customer_Name = sheet.cell(r,6).value
Total_Financed= sheet.cell(r,7).value
Rate_Class = sheet.cell(r,8).value
Supplier_Name = sheet.cell(r,9).value
DecisionStatus= sheet.cell(r,10).value
values = (Lease_Number, Start_Date, Report_Status, Status_Date,
Current_Status, Sales_Rep, Customer_Name, Total_Financed, Rate_Class,
Supplier_Name, DecisionStatus)
cursor.execute(query1)
cursor.execute(query, values)
database.commit()
database.close()
database.commit()
我收到的错误信息是:
ProgrammingError Traceback (most recent call last)
<ipython-input-24-c525ebf0af73> in <module>()
16
17 # Execute sql Query
---> 18 cursor.execute(query, values)
19
20 # Commit the transaction
ProgrammingError: ('The SQL contains 0 parameter markers, but 11 parameters
were supplied', 'HY000')
有人可以向我解释一下这个问题以及我该如何解决吗?谢谢!
更新:
根据下面的评论,我已收到该错误消息。我还修改了我的查询,因为我试图向其中插入值的 table 以前没有创建过,所以我更新了我的代码以尝试创建它。
但是,现在我收到错误消息:
ProgrammingError: ('42000', '[42000] [Microsoft][ODBC SQL Server Driver][SQL
Server]The specified schema name "dbo" either does not exist or you do not
have permission to use it. (2760) (SQLExecDirectW)')
我尝试通过编写 CREATE [HELLO][MK] 而不是仅仅 CREATE MK 来稍微改变它,但这告诉我 MK 已经在数据库中......接下来我应该采取什么步骤?
根据我们聊天中的对话,这里有一些要点:
- 执行您的
CREATE TABLE
查询后,确保在 运行 任何后续INSERT
查询之前立即提交它。 - 当 table 已经存在于数据库中时,使用错误捕获。您问如果要将更多数据导入 table,脚本是否仍然 运行。答案是否定的,因为 Python 会在
cursor.execute(query1)
. 抛出异常
- 如果您想验证您的插入操作是否成功,您可以做一个简单的记录计数检查。
编辑 昨天,当我让@mkheifetz 测试我的代码时,他发现了一个小错误,其中验证检查会 return False,原因是因为数据库已经有现有记录,所以当仅与当前数据进行比较时导入,验证将失败。因此,作为解决bug的方案,我再次修改了代码。
下面是我将如何修改您的代码:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas.io.sql
import pyodbc
import xlrd
server = 'XXXXX'
db = 'XXXXXdb'
# create Connection and Cursor objects
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server + ';DATABASE=' + db + ';Trusted_Connection=yes')
cursor = conn.cursor()
# read data
data = pd.read_excel('Flash Daily Apps through 070918.xls')
# rename columns
data = data.rename(columns={'Lease Number': 'Lease_Number',
'Start Date': 'Start_Date',
'Report Status': 'Report_Status',
'Status Date': 'Status_Date',
'Current Status': 'Current_Status',
'Sales Rep': 'Sales_Rep',
'Customer Name': 'Customer_Name',
'Total Financed': 'Total_Financed',
'Rate Class': 'Rate_Class',
'Supplier Name': 'Supplier_Name'})
# export
data.to_excel('Daily Flash.xlsx', index=False)
# Open the workbook and define the worksheet
book = xlrd.open_workbook("Daily Flash.xlsx")
sheet = book.sheet_by_name("Sheet1")
query1 = """
CREATE TABLE [LEAF].[ZZZ] (
Lease_Number varchar(255),
Start_Date varchar(255),
Report_Status varchar(255),
Status_Date varchar(255),
Current_Status varchar(255),
Sales_Rep varchar(255),
Customer_Name varchar(255),
Total_Finance varchar(255),
Rate_Class varchar(255),
Supplier_Name varchar(255),
DecisionStatus varchar(255)
)"""
query = """
INSERT INTO [LEAF].[ZZZ] (
Lease_Number,
Start_Date,
Report_Status,
Status_Date,
Current_Status,
Sales_Rep,
Customer_Name,
Total_Finance,
Rate_Class,
Supplier_Name,
DecisionStatus
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"""
# execute create table
try:
cursor.execute(query1)
conn.commit()
except pyodbc.ProgrammingError:
pass
# grab existing row count in the database for validation later
cursor.execute("SELECT count(*) FROM LEAF.ZZZ")
before_import = cursor.fetchone()
for r in range(1, sheet.nrows):
Lease_Number = sheet.cell(r,0).value
Start_Date = sheet.cell(r,1).value
Report_Status = sheet.cell(r,2).value
Status_Date = sheet.cell(r,3).value
Current_Status= sheet.cell(r,4).value
Sales_Rep = sheet.cell(r,5).value
Customer_Name = sheet.cell(r,6).value
Total_Financed= sheet.cell(r,7).value
Rate_Class = sheet.cell(r,8).value
Supplier_Name = sheet.cell(r,9).value
DecisionStatus= sheet.cell(r,10).value
# Assign values from each row
values = (Lease_Number, Start_Date, Report_Status, Status_Date, Current_Status,
Sales_Rep, Customer_Name, Total_Financed, Rate_Class, Supplier_Name,
DecisionStatus)
# Execute sql Query
cursor.execute(query, values)
# Commit the transaction
conn.commit()
# If you want to check if all rows are imported
cursor.execute("SELECT count(*) FROM LEAF.ZZZ")
result = cursor.fetchone()
print((result[0] - before_import[0]) == len(data.index)) # should be True
# Close the database connection
conn.close()