带日期变量的 pyodbc 查询(获取上周 +4 小时的数据)
pyodbc query with dates variable (get data for last week +4hours)
我对 Python 很陌生,对 SQL 更陌生...
我卡在 pyodbc 查询上:
- 应该使用上周 +4.5 小时的数据创建数据框。 (示例:上周一 = 07/03/2022 00:00:00,直到本周一 = 14/03/2022 04:30:00。
- 查询的主要 Table 是
tblLogs
,它有 LogDateTime
列,格式为:2022-03-11 20:29:53.000
tblLogs
有很多列,所以我也传递了列列表 LogColumnForQuery
...
目前,我使用下面的代码,但它只收集两个日期之间的数据...
我无法弄清楚如何在周日午夜后添加几个小时。
代码示例:
import pyodbc
import pandas as pd
cnx = pyodbc.connect('DRIVER=' + driver + ';SERVER=tcp:' + server + ';PORT=1433;DATABASE=' + database +
';UID=' + username + ';PWD=' + password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = ', '.join(LogColumns)
#main query
sql_query = pd.read_sql_query('SET DATEFIRST 1 SELECT ' + LogColumnForQuery +
'''
FROM tblLogs
WHERE LogDateTime >= DATEADD(day, -(DATEPART(WEEKDAY, GETDATE()) + 6), CONVERT(DATE, GETDATE()))
AND LogDateTime < DATEADD(day, 1 - DATEPART(WEEKDAY, GETDATE()), CONVERT(DATE, GETDATE())); ;
'''
, cnx)
df = pd.DataFrame(sql_query)
df.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
我正在尝试介绍以下,没有任何进展...
这些日期和它们的格式让我很烦 ;)
请指教
根据下面的代码,- 很可能我的查询不正确并且列列表的格式不正确....
import pyodbc
import pandas as pd
from datetime import datetime, timedelta
today = datetime.now()
lastMon = (today - timedelta(days = today.weekday(), weeks=1))
thisMon = today - timedelta(days = today.weekday())
lastMon = lastMon.replace(hour=00, minute=00, second=00, microsecond=00)
thisMon = thisMon.replace(hour=4, minute=30, second=00, microsecond=00)
lastMon = datetime.strftime(lastMon, '%d/%m/%Y %X')
thisMon = datetime.strftime(thisMon, '%d/%m/%Y %X')
cnx = pyodbc.connect('DRIVER=' + driver + ';SERVER=tcp:' + server + ';PORT=1433;DATABASE=' + database +
';UID=' + username + ';PWD=' + password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = ', '.join(LogColumns)
sql_query = pd.read_sql_query(f'SELECT {LogColumnForQuery} FROM tblLogs WHERE '
f'LogDateTime > ({lastMon}) AND LogDateTime < ({thisMon})'
, cnx)
df = pd.DataFrame(sql_query)
df.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
我会导入 numpy 并使用增量时间来满足您的需求
end_date = datetime.datetime.now()
first_date = start_date - datetime.timedelta(days = 6)
df = pd.DataFrame(pd.date_range(first_date, end_date, freq = '1D'), columns = ['Date'])
df['Day'] = df['Date'].dt.day_name()
df['Date'] = df['Date'].apply(lambda x : datetime.datetime.strftime(x, '%d/%m/%Y %X'))
current_monday = df['Date'].loc[df['Day'] == 'Monday'].values[0]
current_monday = datetime.datetime.strptime(current_monday, '%d/%m/%Y %X')
minutes_to_lookback = ((7 * 24 * 60) + (4 * 60) + 30)
last_monday = current_monday - datetime.timedelta(minutes = minutes_to_lookback)
current_monday = datetime.datetime.strftime(current_monday, '%d/%m/%Y %X')
last_monday = datetime.datetime.strftime(last_monday, '%d/%m/%Y %X')
cnx = pyodbc.connect('DRIVER=' + driver + ';SERVER=tcp:' + server + ';PORT=1433;DATABASE=' + database +
';UID=' + username + ';PWD=' + password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = "'" + "', '" .join(LogColumns) + "'"
query = f"""
SELECT
{LogColumnForQuery}
FROM tblLogs
WHERE 1=1
and LogDateTime between {current_monday} and {last_monday}
"""
sql_query = pd.read_sql_query(query, cnx)
sql_query.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
使用Python datetime
和SQLAlchemy Core可以避免字符串formatting/quoting和其他不便:
import datetime
import pandas as pd
import sqlalchemy as sa
engine = sa.create_engine("mssql+pyodbc://scott:tiger^5HHH@mssql_199")
table_name = "tblLogs"
# set up test environment
with engine.begin() as conn:
conn.exec_driver_sql(f"DROP TABLE IF EXISTS {table_name}")
conn.exec_driver_sql(
f"CREATE TABLE {table_name} (id int primary key, LogDateTime datetime2)"
)
# test
tbl_logs = sa.Table(table_name, sa.MetaData(), autoload_with=engine)
start_of_today = datetime.datetime.combine(
datetime.date.today(), datetime.datetime.min.time()
)
start_of_last_monday = start_of_today - datetime.timedelta(
days=start_of_today.weekday()
) # (same as today if today is a Monday)
start_time = start_of_last_monday - datetime.timedelta(days=7)
end_time = start_of_last_monday + datetime.timedelta(hours=4.5)
qry = sa.select(tbl_logs).where(
tbl_logs.c.LogDateTime.between(start_time, end_time)
)
engine.echo = True
df = pd.read_sql_query(qry, engine)
""" SQL rendered:
SELECT [tblLogs].id, [tblLogs].[LogDateTime]
FROM [tblLogs]
WHERE [tblLogs].[LogDateTime] BETWEEN ? AND ?
[generated in 0.00080s] (datetime.datetime(2022, 3, 7, 0, 0), datetime.datetime(2022, 3, 14, 4, 30))
"""
我对 Python 很陌生,对 SQL 更陌生... 我卡在 pyodbc 查询上:
- 应该使用上周 +4.5 小时的数据创建数据框。 (示例:上周一 = 07/03/2022 00:00:00,直到本周一 = 14/03/2022 04:30:00。
- 查询的主要 Table 是
tblLogs
,它有LogDateTime
列,格式为:2022-03-11 20:29:53.000 tblLogs
有很多列,所以我也传递了列列表LogColumnForQuery
...
目前,我使用下面的代码,但它只收集两个日期之间的数据... 我无法弄清楚如何在周日午夜后添加几个小时。 代码示例:
import pyodbc
import pandas as pd
cnx = pyodbc.connect('DRIVER=' + driver + ';SERVER=tcp:' + server + ';PORT=1433;DATABASE=' + database +
';UID=' + username + ';PWD=' + password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = ', '.join(LogColumns)
#main query
sql_query = pd.read_sql_query('SET DATEFIRST 1 SELECT ' + LogColumnForQuery +
'''
FROM tblLogs
WHERE LogDateTime >= DATEADD(day, -(DATEPART(WEEKDAY, GETDATE()) + 6), CONVERT(DATE, GETDATE()))
AND LogDateTime < DATEADD(day, 1 - DATEPART(WEEKDAY, GETDATE()), CONVERT(DATE, GETDATE())); ;
'''
, cnx)
df = pd.DataFrame(sql_query)
df.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
我正在尝试介绍以下,没有任何进展...
这些日期和它们的格式让我很烦 ;) 请指教
根据下面的代码,- 很可能我的查询不正确并且列列表的格式不正确....
import pyodbc
import pandas as pd
from datetime import datetime, timedelta
today = datetime.now()
lastMon = (today - timedelta(days = today.weekday(), weeks=1))
thisMon = today - timedelta(days = today.weekday())
lastMon = lastMon.replace(hour=00, minute=00, second=00, microsecond=00)
thisMon = thisMon.replace(hour=4, minute=30, second=00, microsecond=00)
lastMon = datetime.strftime(lastMon, '%d/%m/%Y %X')
thisMon = datetime.strftime(thisMon, '%d/%m/%Y %X')
cnx = pyodbc.connect('DRIVER=' + driver + ';SERVER=tcp:' + server + ';PORT=1433;DATABASE=' + database +
';UID=' + username + ';PWD=' + password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = ', '.join(LogColumns)
sql_query = pd.read_sql_query(f'SELECT {LogColumnForQuery} FROM tblLogs WHERE '
f'LogDateTime > ({lastMon}) AND LogDateTime < ({thisMon})'
, cnx)
df = pd.DataFrame(sql_query)
df.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
我会导入 numpy 并使用增量时间来满足您的需求
end_date = datetime.datetime.now()
first_date = start_date - datetime.timedelta(days = 6)
df = pd.DataFrame(pd.date_range(first_date, end_date, freq = '1D'), columns = ['Date'])
df['Day'] = df['Date'].dt.day_name()
df['Date'] = df['Date'].apply(lambda x : datetime.datetime.strftime(x, '%d/%m/%Y %X'))
current_monday = df['Date'].loc[df['Day'] == 'Monday'].values[0]
current_monday = datetime.datetime.strptime(current_monday, '%d/%m/%Y %X')
minutes_to_lookback = ((7 * 24 * 60) + (4 * 60) + 30)
last_monday = current_monday - datetime.timedelta(minutes = minutes_to_lookback)
current_monday = datetime.datetime.strftime(current_monday, '%d/%m/%Y %X')
last_monday = datetime.datetime.strftime(last_monday, '%d/%m/%Y %X')
cnx = pyodbc.connect('DRIVER=' + driver + ';SERVER=tcp:' + server + ';PORT=1433;DATABASE=' + database +
';UID=' + username + ';PWD=' + password)
cursor = cnx.cursor()
# creating list of columns to select
LogColumns = ['LogID', 'LogDateTime', 'EmployeeID', 'EntryPointID', 'EventType', 'DeviceID']
LogColumnForQuery = "'" + "', '" .join(LogColumns) + "'"
query = f"""
SELECT
{LogColumnForQuery}
FROM tblLogs
WHERE 1=1
and LogDateTime between {current_monday} and {last_monday}
"""
sql_query = pd.read_sql_query(query, cnx)
sql_query.to_csv(r'C:\Test\Monday\LastWeekLogs_Data.csv', index=False)
使用Python datetime
和SQLAlchemy Core可以避免字符串formatting/quoting和其他不便:
import datetime
import pandas as pd
import sqlalchemy as sa
engine = sa.create_engine("mssql+pyodbc://scott:tiger^5HHH@mssql_199")
table_name = "tblLogs"
# set up test environment
with engine.begin() as conn:
conn.exec_driver_sql(f"DROP TABLE IF EXISTS {table_name}")
conn.exec_driver_sql(
f"CREATE TABLE {table_name} (id int primary key, LogDateTime datetime2)"
)
# test
tbl_logs = sa.Table(table_name, sa.MetaData(), autoload_with=engine)
start_of_today = datetime.datetime.combine(
datetime.date.today(), datetime.datetime.min.time()
)
start_of_last_monday = start_of_today - datetime.timedelta(
days=start_of_today.weekday()
) # (same as today if today is a Monday)
start_time = start_of_last_monday - datetime.timedelta(days=7)
end_time = start_of_last_monday + datetime.timedelta(hours=4.5)
qry = sa.select(tbl_logs).where(
tbl_logs.c.LogDateTime.between(start_time, end_time)
)
engine.echo = True
df = pd.read_sql_query(qry, engine)
""" SQL rendered:
SELECT [tblLogs].id, [tblLogs].[LogDateTime]
FROM [tblLogs]
WHERE [tblLogs].[LogDateTime] BETWEEN ? AND ?
[generated in 0.00080s] (datetime.datetime(2022, 3, 7, 0, 0), datetime.datetime(2022, 3, 14, 4, 30))
"""