PyMySQL INSERT INTO query ignoring/cutting off query not 运行
PyMySQL INSERT INTO query ignoring/cutting off query not running
我有两个 INSERT INTO 查询,有两个不同的连接和游标,sql_into_scores 查询运行并向数据库添加值(分数和统计输出),但 sql_into_metrics 没有。两者都应该向数据库添加一个值列表。它特别告诉我 sql_into_metrics 查询中存在问题(也自行检查了其他查询)。这就是 lambda 函数中的所有 运行,并将结果存储在 RDS 数据库中。 MySQL 版本是 5.7.
连接(用户等。所有工作,因为我还完全删除了 sql_into_metrics 查询,并且 sql_into_scores 查询中的值存储在 AWS RDS 上。
数据从 s3 中提取(在一个 .txt 文件中)并通过 read_files 函数读入两个 numpy 数组。文本文件中的数据示例:
{"entity_id": [42, 11, 12], "score": [95, 95, 30], "TruePos": [0], "FalsePos": [1], "FalseNeg": [0], "TrueNeg": [0], "score_overall": [0.0], "precision": [0.0], "recall": [0.0], "fscore": [0.0], "support": [0.0]}
我尝试了以下方法来解决指标查询不想执行的原因:
1) 修改从(metric_list)添加值的列表,格式如下:
- [ , , , ]
- ( , , , )
- [ [ , , , ] ]
2) 将 lists/tuples 中的值全部更改为浮点数(然后分别更改 VALUES (%f, ...)。
3) 我试过 inscurs.execute 和 inscurs.execute 很多
4) 两个查询都有 conn 和 inscurs,并通过第二个 conn_2 和 inscurs_2 分开,仅用于 sql_into_metrics 查询。
def lambda_handler(event, context):
'''
Take the metric data collected after each run and
store in RDS for performance tracking internally.
'''
# Insert in database
conn = pymysql.connect(user=DB_USERNAME, password=DB_PASSWORD, host=DB_HOST, database=DB_DATABASE)
conn_2 = pymysql.connect(user=DB_USERNAME, password=DB_PASSWORD, host=DB_HOST, database=DB_DATABASE)
sql_into_metrics = "INSERT INTO dealscore_metrics_main (score_overall, precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
sql_into_scores = "INSERT INTO dealscore_scores (entity_id, score) VALUES (%s,%s)"
inscurs = pymysql.cursors.Cursor(conn)
inscurs_2 = pymysql.cursors.Cursor(conn_2)
try:
print("Adding the latest metric data into RDS...")
score_data_s3,metric_data_s3 = read_files(prefix=prefix_to_metrics, bucket=BUCKET)
metric_list = metric_data_s3.tolist()
# Commented out last modifications to fix the problem(which didnt work, this was the last try, before reverting back to output now)
# metric_list = [item for t in metric_list for item in t]
# metric_list = [float(x) for x in metric_list]
# print(type(metric_list[0]))
score_data_s3 = score_data_s3.tolist()
print("This is metric_list:", metric_list)
print("This is score_data_s3:", score_data_s3)
print(conn)
try:
inscurs_2.executemany(sql_into_metrics, metric_list)
#inscurs_1.execute(sql_into_metrics, metric_list)
except:
print("did not upload")
raise
inscurs.executemany(sql_into_scores, score_data_s3)
print(inscurs)
print(inscurs_2)
conn.commit()
conn_2.commit()
print("Now deleting the file from s3")
delete_s3_file(prefix=prefix_to_metrics)
print("Delete finished, and")
except Exception as e:
print('Unable to add latest metrics to RDS, Error: ', e)
print(traceback.format_exc())
conn.rollback()
conn_2.rollback()
inscurs.close()
inscurs_2.close()
return print("The upload was successful, shutting down...")
#where it pulls the data from (S3, .txt file, transforms to np array for #scores and metrics and returns the two arrays to then be transformed #into lists before executing the query. (This read_files function does #run normally)
def read_files(prefix, bucket):
metric_bucket = bucket
session = boto3.session.Session()
REGION = session.region_name
s3 = boto3.client('s3', region_name=REGION)
txt_file = s3.get_object(Bucket=metric_bucket, Key=prefix)
metrics = txt_file['Body'].read()
df = json.loads(metrics)
df['entity_id'] = [int(x) for x in df['entity_id']]
df['score'] = [int(x) for x in df['score']]
df['TruePos'] = [int(x) for x in df['TruePos']]
df['FalsePos'] = [int(x) for x in df['FalsePos']]
df['FalseNeg'] = [int(x) for x in df['FalseNeg']]
df['TrueNeg'] = [int(x) for x in df['TrueNeg']]
df['score_overall'] = [float(x) for x in df['score_overall']]
df['precision'] = [float(x) for x in df['precision']]
df['recall'] = [float(x) for x in df['recall']]
df['fscore'] = [float(x) for x in df['fscore']]
df['support'] = [float(x) for x in df['support']]
print(df)
numpy_array_scores = np.column_stack(
(
df['entity_id'],
df['score']
)
)
numpy_array_metrics = np.column_stack(
(
df['score_overall'],
df['precision'],
df['recall'],
df['fscore'],
df['support'],
df['TruePos'],
df['FalsePos'],
df['FalseNeg'],
df['TrueNeg']
)
)
print("The numpy array_scores:", numpy_array_scores)
print("The numpy array_metrics:", numpy_array_metrics)
return numpy_array_scores, numpy_array_metrics
这是来自 AWS lambda 的日志 output/ERROR 消息,我希望将 metric_list 和 score_data_s3 值(在列表中)添加到 RDS。
START RequestId: .....
Connecting to the Dealscore RDS DB:
Adding the latest metric data into RDS...
{'entity_id': [42, 11, 12], 'score': [95, 95, 30], 'TruePos': [0], 'FalsePos': [1], 'FalseNeg': [0], 'TrueNeg': [0], 'score_overall': [0.0], 'precision': [0.0], 'recall': [0.0], 'fscore': [0.0], 'support': [0.0]}
The numpy array_scores: [[42 95]
[11 95]
[12 30]]
The numpy array_metrics: [[0. 0. 0. 0. 0. 0. 1. 0. 0.]]
This is metric_list: [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]]
This is score_data_s3: [[42, 95], [11, 95], [12, 30]]
<pymysql.connections.Connection object at 0x7fa52ff33f28>
did not upload
Unable to add latest metrics to RDS, Error: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VA' at line 1")
Traceback (most recent call last):
File "/var/task/upload_to_rds.py", line 57, in lambda_handler
inscurs_2.executemany(sql_into_metrics, metric_list)
File "/var/task/pymysql/cursors.py", line 197, in executemany
self._get_db().encoding)
File "/var/task/pymysql/cursors.py", line 234, in _do_execute_many
rows += self.execute(sql + postfix)
File "/var/task/pymysql/cursors.py", line 170, in execute
result = self._query(query)
File "/var/task/pymysql/cursors.py", line 328, in _query
conn.query(q)
File "/var/task/pymysql/connections.py", line 517, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "/var/task/pymysql/connections.py", line 732, in _read_query_result
result.read()
File "/var/task/pymysql/connections.py", line 1075, in read
first_packet = self.connection._read_packet()
File "/var/task/pymysql/connections.py", line 684, in _read_packet
packet.check_error()
File "/var/task/pymysql/protocol.py", line 220, in check_error
err.raise_mysql_exception(self._data)
File "/var/task/pymysql/err.py", line 109, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.ProgrammingError:
(1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VA' at line 1")
The upload was successful, shutting down...
END RequestId: ...
REPORT RequestId: ...
Duration: 813.15 ms Billed Duration: 900 ms Memory Size: 256 MB Max Memory Used: 103 MB
问题是 precision
是一个 Mysql 保留字,您已将其用作列名,因此您应该使用反引号将其四舍五入并将查询更改为
sql_into_metrics = "INSERT INTO dealscore_metrics_main
(score_overall, `precision`, recall, fscore, support,
true_pos, false_pos, false_neg, true_neg)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
我有两个 INSERT INTO 查询,有两个不同的连接和游标,sql_into_scores 查询运行并向数据库添加值(分数和统计输出),但 sql_into_metrics 没有。两者都应该向数据库添加一个值列表。它特别告诉我 sql_into_metrics 查询中存在问题(也自行检查了其他查询)。这就是 lambda 函数中的所有 运行,并将结果存储在 RDS 数据库中。 MySQL 版本是 5.7.
连接(用户等。所有工作,因为我还完全删除了 sql_into_metrics 查询,并且 sql_into_scores 查询中的值存储在 AWS RDS 上。
数据从 s3 中提取(在一个 .txt 文件中)并通过 read_files 函数读入两个 numpy 数组。文本文件中的数据示例:
{"entity_id": [42, 11, 12], "score": [95, 95, 30], "TruePos": [0], "FalsePos": [1], "FalseNeg": [0], "TrueNeg": [0], "score_overall": [0.0], "precision": [0.0], "recall": [0.0], "fscore": [0.0], "support": [0.0]}
我尝试了以下方法来解决指标查询不想执行的原因:
1) 修改从(metric_list)添加值的列表,格式如下:
- [ , , , ]
- ( , , , )
- [ [ , , , ] ]
2) 将 lists/tuples 中的值全部更改为浮点数(然后分别更改 VALUES (%f, ...)。
3) 我试过 inscurs.execute 和 inscurs.execute 很多
4) 两个查询都有 conn 和 inscurs,并通过第二个 conn_2 和 inscurs_2 分开,仅用于 sql_into_metrics 查询。
def lambda_handler(event, context):
'''
Take the metric data collected after each run and
store in RDS for performance tracking internally.
'''
# Insert in database
conn = pymysql.connect(user=DB_USERNAME, password=DB_PASSWORD, host=DB_HOST, database=DB_DATABASE)
conn_2 = pymysql.connect(user=DB_USERNAME, password=DB_PASSWORD, host=DB_HOST, database=DB_DATABASE)
sql_into_metrics = "INSERT INTO dealscore_metrics_main (score_overall, precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
sql_into_scores = "INSERT INTO dealscore_scores (entity_id, score) VALUES (%s,%s)"
inscurs = pymysql.cursors.Cursor(conn)
inscurs_2 = pymysql.cursors.Cursor(conn_2)
try:
print("Adding the latest metric data into RDS...")
score_data_s3,metric_data_s3 = read_files(prefix=prefix_to_metrics, bucket=BUCKET)
metric_list = metric_data_s3.tolist()
# Commented out last modifications to fix the problem(which didnt work, this was the last try, before reverting back to output now)
# metric_list = [item for t in metric_list for item in t]
# metric_list = [float(x) for x in metric_list]
# print(type(metric_list[0]))
score_data_s3 = score_data_s3.tolist()
print("This is metric_list:", metric_list)
print("This is score_data_s3:", score_data_s3)
print(conn)
try:
inscurs_2.executemany(sql_into_metrics, metric_list)
#inscurs_1.execute(sql_into_metrics, metric_list)
except:
print("did not upload")
raise
inscurs.executemany(sql_into_scores, score_data_s3)
print(inscurs)
print(inscurs_2)
conn.commit()
conn_2.commit()
print("Now deleting the file from s3")
delete_s3_file(prefix=prefix_to_metrics)
print("Delete finished, and")
except Exception as e:
print('Unable to add latest metrics to RDS, Error: ', e)
print(traceback.format_exc())
conn.rollback()
conn_2.rollback()
inscurs.close()
inscurs_2.close()
return print("The upload was successful, shutting down...")
#where it pulls the data from (S3, .txt file, transforms to np array for #scores and metrics and returns the two arrays to then be transformed #into lists before executing the query. (This read_files function does #run normally)
def read_files(prefix, bucket):
metric_bucket = bucket
session = boto3.session.Session()
REGION = session.region_name
s3 = boto3.client('s3', region_name=REGION)
txt_file = s3.get_object(Bucket=metric_bucket, Key=prefix)
metrics = txt_file['Body'].read()
df = json.loads(metrics)
df['entity_id'] = [int(x) for x in df['entity_id']]
df['score'] = [int(x) for x in df['score']]
df['TruePos'] = [int(x) for x in df['TruePos']]
df['FalsePos'] = [int(x) for x in df['FalsePos']]
df['FalseNeg'] = [int(x) for x in df['FalseNeg']]
df['TrueNeg'] = [int(x) for x in df['TrueNeg']]
df['score_overall'] = [float(x) for x in df['score_overall']]
df['precision'] = [float(x) for x in df['precision']]
df['recall'] = [float(x) for x in df['recall']]
df['fscore'] = [float(x) for x in df['fscore']]
df['support'] = [float(x) for x in df['support']]
print(df)
numpy_array_scores = np.column_stack(
(
df['entity_id'],
df['score']
)
)
numpy_array_metrics = np.column_stack(
(
df['score_overall'],
df['precision'],
df['recall'],
df['fscore'],
df['support'],
df['TruePos'],
df['FalsePos'],
df['FalseNeg'],
df['TrueNeg']
)
)
print("The numpy array_scores:", numpy_array_scores)
print("The numpy array_metrics:", numpy_array_metrics)
return numpy_array_scores, numpy_array_metrics
这是来自 AWS lambda 的日志 output/ERROR 消息,我希望将 metric_list 和 score_data_s3 值(在列表中)添加到 RDS。
START RequestId: .....
Connecting to the Dealscore RDS DB:
Adding the latest metric data into RDS...
{'entity_id': [42, 11, 12], 'score': [95, 95, 30], 'TruePos': [0], 'FalsePos': [1], 'FalseNeg': [0], 'TrueNeg': [0], 'score_overall': [0.0], 'precision': [0.0], 'recall': [0.0], 'fscore': [0.0], 'support': [0.0]}
The numpy array_scores: [[42 95]
[11 95]
[12 30]]
The numpy array_metrics: [[0. 0. 0. 0. 0. 0. 1. 0. 0.]]
This is metric_list: [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]]
This is score_data_s3: [[42, 95], [11, 95], [12, 30]]
<pymysql.connections.Connection object at 0x7fa52ff33f28>
did not upload
Unable to add latest metrics to RDS, Error: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VA' at line 1")
Traceback (most recent call last):
File "/var/task/upload_to_rds.py", line 57, in lambda_handler
inscurs_2.executemany(sql_into_metrics, metric_list)
File "/var/task/pymysql/cursors.py", line 197, in executemany
self._get_db().encoding)
File "/var/task/pymysql/cursors.py", line 234, in _do_execute_many
rows += self.execute(sql + postfix)
File "/var/task/pymysql/cursors.py", line 170, in execute
result = self._query(query)
File "/var/task/pymysql/cursors.py", line 328, in _query
conn.query(q)
File "/var/task/pymysql/connections.py", line 517, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered)
File "/var/task/pymysql/connections.py", line 732, in _read_query_result
result.read()
File "/var/task/pymysql/connections.py", line 1075, in read
first_packet = self.connection._read_packet()
File "/var/task/pymysql/connections.py", line 684, in _read_packet
packet.check_error()
File "/var/task/pymysql/protocol.py", line 220, in check_error
err.raise_mysql_exception(self._data)
File "/var/task/pymysql/err.py", line 109, in raise_mysql_exception
raise errorclass(errno, errval)
pymysql.err.ProgrammingError:
(1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'precision, recall, fscore, support, true_pos, false_pos, false_neg, true_neg) VA' at line 1")
The upload was successful, shutting down...
END RequestId: ...
REPORT RequestId: ...
Duration: 813.15 ms Billed Duration: 900 ms Memory Size: 256 MB Max Memory Used: 103 MB
问题是 precision
是一个 Mysql 保留字,您已将其用作列名,因此您应该使用反引号将其四舍五入并将查询更改为
sql_into_metrics = "INSERT INTO dealscore_metrics_main
(score_overall, `precision`, recall, fscore, support,
true_pos, false_pos, false_neg, true_neg)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"