使用 Python 将数据从 Excel 导入 Oracle Table

Import data from Excel into Oracle Table using Python

同行,

这里是新手。我们可以从 excel 文件中读取数据并加载到 Oracle table 中吗?一些示例 python 脚本会很有帮助。我编写了几行代码来熟悉一下,如下所示。

P.S。编辑:我的意思是这只是我的部分代码。我不确定如何将 'insert statement' 或 'create table' 语句作为 Oracle 部分代码的一部分。我想在每个列的循环中加载从 excel 读取的数据。 TIA!

import openpyxl
import cx_Oracle

#Oracle connection starts here
connection = cx_Oracle.connect("<schema>", "<pwd>", "<hostname>/<sid/service>")
print("Database version:", connection.version)
print(cx_Oracle.version)
print(connection.current_schema)

# creating a table
create_table = """
CREATE TABLE test (
col1 VARCHAR2(50) NOT NULL,
col2 VARCHAR2(50) NOT NULL,
col3 VARCHAR2(50) NOT NULL,
col4 VARCHAR2(50) NOT NULL,
col5 VARCHAR2(50) NOT NULL,
col6 VARCHAR2(50) NOT NULL,
col7 VARCHAR2(50) NOT NULL
)
"""
from sys import modules
cursor.execute(create_table)    

from openpyxl import Workbook
wb = openpyxl.load_workbook('<name of the file>',data_only=True)
ws = wb['Sheet1']

x=1
m=1

# looping through each column
for j in range(2,ws.max_column+1):

   ID = m 
   col1 = ws.cell(row=x,column=j).value  
   m = m+1

   col2 = ws.cell(row=1, column=j).value

   col3 = ws.cell(row=2, column=j).value

   col4 = ws.cell(row=3,column=j).value

   col5 = ws.cell(row=4, column=j).value

   col6 = ws.cell(row=5, column=j).value

   col7 = ws.cell(row=6, column=j).value

   #looping through each row for each column      
   for i in range(1,ws.max_row+1):
         Cellval= ws.cell(row=i, column=j).value

# Inserting all the above variables for each column loop
insert_table="""
INSERT INTO test (col1,col2,col3,col4,col5,col6,col7)
VALUES ("""+col1+""",
"""+col2+""",
"""+col3+""",
"""+col4+""",
"""+col5+""",
"""+col6+""",
"""+col7+""")"""

cursor.execute(insert_table)

x=x+1

connection.close()

我做对了吗?

考虑以下更改:

  • 将嵌套的 for 循环反向 运行 行并迭代分配列值,然后追加每一行。
  • 在逐行循环中缩进 execute 行。
  • CREATE TABLEINSERT INTO 等任何操作查询使用 .commit() 来传播更改。
  • 通过使用 .execute(query, params) 中的第二个参数来参数化您的查询,这不仅可以隔离 SQL 注入(以防 Excel 单元格中有聪明用户的恶意代码),还可以避免字符串连接并引用附件以获得更清晰的代码。参见 Oracle+Python docs

调整后的代码

# looping through each row for each column      
for i in range(1, ws.max_row+1):
   for j in range(2, ws.max_column+1):       
      col1 = ws.cell(row=i, column=j).value  
      col2 = ws.cell(row=i, column=j).value
      col3 = ws.cell(row=i, column=j).value
      col4 = ws.cell(row=i, column=j).value
      col5 = ws.cell(row=i, column=j).value
      col6 = ws.cell(row=i, column=j).value
      col7 = ws.cell(row=i, column=j).value

   insert_table = "INSERT INTO test (col1, col2, col3, col4, col5, col6, col7)" + \
                  " VALUES (:1, :2, :3, :4, :5, :6, :7)"

   cursor.execute(insert_table, (col1, col2, col3, col4, col5, col6, col7))
   connection.commit()

connection.close()

您可能更喜欢直接的(不需要对每一行的每一列进行循环)和更高效的(使用cursor.executemany函数) 方法和数据分析库(pandas) 如下:

import pandas as pd
import cx_Oracle    
connection = cx_Oracle.connect("<schema>", "<pwd>", "<hostname>/<sid/service>")
cursor = connection.cursor()    
file = r'C:\path\ToFile\myFile.xlsx'        
tab_name = "TEST"
tab_exists = """
DECLARE
  v_exst INT;
BEGIN
  SELECT COUNT(*) 
    INTO v_exst 
    FROM cat 
   WHERE table_name = '"""+tab_name+"""' 
     AND table_type = 'TABLE';
  IF v_exst = 1 THEN
     EXECUTE IMMEDIATE('DROP TABLE """+tab_name+"""');
  END IF;   
END;
"""
cursor.execute(tab_exists)    
create_table = """
CREATE TABLE """+tab_name+""" (
       col1 VARCHAR2(50) NOT NULL,
       col2 VARCHAR2(50) NOT NULL,
       col3 VARCHAR2(50) NOT NULL,
       col4 VARCHAR2(50) NOT NULL,
       col5 VARCHAR2(50) NOT NULL,
       col6 VARCHAR2(50) NOT NULL,
       col7 VARCHAR2(50) NOT NULL
)    """    
cursor.execute(create_table)     
insert_table = "INSERT INTO "+tab_name+" VALUES (:1,:2,:3,:4,:5,:6,:7)"    
df = pd.read_excel(file)    
df_list = df.fillna('').values.tolist()    
cursor.executemany(insert_table,df_list)    
cursor.close()
connection.commit()
connection.close()

where also add the case of droping the TEST table whether it exists(不要忘记小心处理删除 table).

如果你有多个sheet,需要将sheet中的所有内容插入到table中,则替换尾部代码,之后使用下面的创建 table(cursor.execute(create_table)) :

xl = pd.ExcelFile(file)
ls = list(xl.sheet_names)
insert_table = "INSERT INTO "+tab_name+" VALUES(:1,:2,:3,:4,:5,:6,:7)"
for i in ls:
    df = pd.read_excel(file,sheet_name=i)
    df_list = df.fillna('').values.tolist()
    cursor.executemany(insert_table,df_list)    
cursor.close()
connection.commit()
connection.close()