使用 python 将单个 .csv 导入 mysql
importing single .csv into mysql with python
当 运行 这段代码时,我得到 Error while connecting to MySQL Not all parameters were used in the SQL statement
我也尝试过用另一种技术摄取这些
import mysql.connector as msql
from mysql.connector import Error
import pandas as pd
empdata = pd.read_csv('path_to_file', index_col=False, delimiter = ',')
empdata.head()
try:
conn = msql.connect(host='localhost', user='test345',
password='test123')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("CREATE DATABASE timetheft")
print("Database is created")
except Error as e:
print("Error while connecting to MySQL", e)
try:
conn = msql.connect(host='localhost', database='timetheft', user='test345', password='test123')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("select database();")
record = cursor.fetchone()
print("You're connected to database: ", record)
cursor.execute('DROP TABLE IF EXISTS company;')
print('Creating table....')
create_contracts_table = """
CREATE TABLE company ( ID VARCHAR(40) PRIMARY KEY,
Company_Name VARCHAR(40),
Country VARCHAR(40),
City VARCHAR(40),
Email VARCHAR(40),
Industry VARCHAR(30),
Employees VARCHAR(30)
);
"""
cursor.execute(create_company_table)
print("Table is created....")
for i,row in empdata.iterrows():
sql = "INSERT INTO timetheft.company VALUES (%S, %S, %S, %S, %S,%S,%S,%S)"
cursor.execute(sql, tuple(row))
print("Record inserted")
# the connection is not auto committed by default, so we must commit to save our changes
conn.commit()
except Error as e:
print("Error while connecting to MySQL", e)
我尝试的第二种技术
LOAD DATA LOCAL INFILE 'path_to_file'
INTO TABLE copmany
FIELDS TERMINATED BY ';'
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
效果更好,但错误很多。仅摄取了 20% 的行。
最后是 .csv 的摘录(数据在所有 1K 行中都是一致的)
"ID";"Company_Name";"国家";"城市";"电子邮件";"行业";"员工"
217520699;"Enim Corp.";"德国";"Bamberg";"posuere@diamvel.edu";"Internet";"51-100"
352428999;"Lacus Vestibulum Consulting";"Germany";"Villingen-Schwenningen";"egestas@lacusEtiambibendum.org";"Food Production";"100-500"
371718299;"Dictum Ultricies Ltd";"德国";"Anklam";"convallis.erat@sempercursus.co.uk";"Primary/Secondary 教育";"100-500"
676789799;"A Consulting";"Germany";"Andernach";"massa@etrisusQuisque.ca";"Government Relations";"100-500"
718526699;"Odio LLP";"德国";"Eisenhüttenstadt";"Quisque.varius@euismod.org";"电子学习";"11-50"
我解决了这些问题以使代码正常工作:
- 使插入语句中占位符的个数等于列数
- 占位符应小写
'%s'
- 单元格分隔符似乎是分号,而不是逗号。
对于简单地读取具有 ~1000 行的 csv Pandas 是矫枉过正的(并且 iterrows 似乎不像您预期的那样运行)。我改用了标准库中的 csv 模块。
import csv
...
sql = "INSERT INTO company VALUES (%s, %s, %s, %s, %s, %s, %s)"
with open("67359903.csv", "r", newline="") as f:
reader = csv.reader(f, delimiter=";")
# Skip the header row.
next(reader)
# For large files it may be more efficient to commit
# rows in batches.
cursor.executemany(sql, reader)
conn.commit()
如果使用csv
模块不方便,可以使用dataframe的itertuples方法遍历数据:
empdata = pd.read_csv('67359903.csv', index_col=False, delimiter=';')
for tuple_ in empdata.itertuples(index=False):
cursor.execute(sql, tuple_)
conn.commit()
或者直接dataframe can be dumped to the database。
import sqlalchemy as sa
engine = sa.create_engine('mysql+mysqlconnector:///test')
empdata.to_sql('company', engine, index=False, if_exists='replace')
当 运行 这段代码时,我得到 Error while connecting to MySQL Not all parameters were used in the SQL statement
我也尝试过用另一种技术摄取这些
import mysql.connector as msql
from mysql.connector import Error
import pandas as pd
empdata = pd.read_csv('path_to_file', index_col=False, delimiter = ',')
empdata.head()
try:
conn = msql.connect(host='localhost', user='test345',
password='test123')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("CREATE DATABASE timetheft")
print("Database is created")
except Error as e:
print("Error while connecting to MySQL", e)
try:
conn = msql.connect(host='localhost', database='timetheft', user='test345', password='test123')
if conn.is_connected():
cursor = conn.cursor()
cursor.execute("select database();")
record = cursor.fetchone()
print("You're connected to database: ", record)
cursor.execute('DROP TABLE IF EXISTS company;')
print('Creating table....')
create_contracts_table = """
CREATE TABLE company ( ID VARCHAR(40) PRIMARY KEY,
Company_Name VARCHAR(40),
Country VARCHAR(40),
City VARCHAR(40),
Email VARCHAR(40),
Industry VARCHAR(30),
Employees VARCHAR(30)
);
"""
cursor.execute(create_company_table)
print("Table is created....")
for i,row in empdata.iterrows():
sql = "INSERT INTO timetheft.company VALUES (%S, %S, %S, %S, %S,%S,%S,%S)"
cursor.execute(sql, tuple(row))
print("Record inserted")
# the connection is not auto committed by default, so we must commit to save our changes
conn.commit()
except Error as e:
print("Error while connecting to MySQL", e)
我尝试的第二种技术
LOAD DATA LOCAL INFILE 'path_to_file'
INTO TABLE copmany
FIELDS TERMINATED BY ';'
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
效果更好,但错误很多。仅摄取了 20% 的行。
最后是 .csv 的摘录(数据在所有 1K 行中都是一致的)
"ID";"Company_Name";"国家";"城市";"电子邮件";"行业";"员工" 217520699;"Enim Corp.";"德国";"Bamberg";"posuere@diamvel.edu";"Internet";"51-100" 352428999;"Lacus Vestibulum Consulting";"Germany";"Villingen-Schwenningen";"egestas@lacusEtiambibendum.org";"Food Production";"100-500" 371718299;"Dictum Ultricies Ltd";"德国";"Anklam";"convallis.erat@sempercursus.co.uk";"Primary/Secondary 教育";"100-500" 676789799;"A Consulting";"Germany";"Andernach";"massa@etrisusQuisque.ca";"Government Relations";"100-500" 718526699;"Odio LLP";"德国";"Eisenhüttenstadt";"Quisque.varius@euismod.org";"电子学习";"11-50"
我解决了这些问题以使代码正常工作:
- 使插入语句中占位符的个数等于列数
- 占位符应小写
'%s'
- 单元格分隔符似乎是分号,而不是逗号。
对于简单地读取具有 ~1000 行的 csv Pandas 是矫枉过正的(并且 iterrows 似乎不像您预期的那样运行)。我改用了标准库中的 csv 模块。
import csv
...
sql = "INSERT INTO company VALUES (%s, %s, %s, %s, %s, %s, %s)"
with open("67359903.csv", "r", newline="") as f:
reader = csv.reader(f, delimiter=";")
# Skip the header row.
next(reader)
# For large files it may be more efficient to commit
# rows in batches.
cursor.executemany(sql, reader)
conn.commit()
如果使用csv
模块不方便,可以使用dataframe的itertuples方法遍历数据:
empdata = pd.read_csv('67359903.csv', index_col=False, delimiter=';')
for tuple_ in empdata.itertuples(index=False):
cursor.execute(sql, tuple_)
conn.commit()
或者直接dataframe can be dumped to the database。
import sqlalchemy as sa
engine = sa.create_engine('mysql+mysqlconnector:///test')
empdata.to_sql('company', engine, index=False, if_exists='replace')