SQLDF 提取值并将其保存到文本文件
SQLDF extracting the values and saving it to a text file
我将 DBF 文件输入数据框并运行查询。
这是代码。
from dbf import Table
import pandasql as ps
dfPath1 = Table('filename.dbf')
dfPath1.open()
df1 = pd.DataFrame(dfPath1, columns=['column1', 'column2', 'column3', 'column4'])
hour1 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_12am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '00:00:00' And open_time < '00:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly1 = hour1.fillna(0)
print(hourly1)
hour2 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_1am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '01:00:00' And open_time < '01:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly2 = hour2.fillna(0)
print(hourly2)
hour3 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_2am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '02:00:00' And open_time < '02:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly3 = hour3.fillna(0)
print(hourly3)
hour4 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_3am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '03:00:00' And open_time < '03:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly4 = hour4.fillna(0)
print(hourly4)
hour5 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_4am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '04:00:00' And open_time < '04:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly5 = hour5.fillna(0)
print(hourly5)
data = [name,hour1.iloc[:,0],hour1.iloc[:,1],hour1.iloc[:,2],hour1.iloc[:,3],hour1.iloc[:,4],hour1.iloc[:,5],hour1.iloc[:,6]]
data2 = [name,hour2.iloc[:,0],hour2.iloc[:,1],hour2.iloc[:,2],hour2.iloc[:,3],hour2.iloc[:,4],hour2.iloc[:,5],hour2.iloc[:,6]]
data3 = [name,hour3.iloc[:,0],hour3.iloc[:,1],hour3.iloc[:,2],hour3.iloc[:,3],hour3.iloc[:,4],hour3.iloc[:,5],hour3.iloc[:,6]]
data4 = [name,hour4.iloc[:,0],hour4.iloc[:,1],hour4.iloc[:,2],hour4.iloc[:,3],hour4.iloc[:,4],hour4.iloc[:,5],hour4.iloc[:,6]]
data5 = [name,hour5.iloc[:,0],hour5.iloc[:,1],hour5.iloc[:,2],hour5.iloc[:,3],hour5.iloc[:,4],hour5.iloc[:,5],hour5.iloc[:,6]]
hour1['name'] = name
hour1.to_csv('sample_output.txt', index=False, sep=' ')
然后得到这样的错误..
KeyError: [Int64Index([0], dtype='int64')] 的 None 在 [columns]
中
这是我想要的文本文件的输出..
“2020-01-01 943 527.0 56.46 56.46 0.0 0.0”
问题很可能出在下面一行
data = [name,hour1[[0]],hour1[[1]],hour1[[2]],hour1[[3]],hour1[[4]],hour1[[5]],hour1[[6]]]
您可以使用 iloc
访问列
data = [name,hour1.iloc[:,0],hour1.iloc[:,1],hour1.iloc[:,2],hour1.iloc[:,3],hour1.iloc[:,4],hour1.iloc[:,5],hour1.iloc[:,6]]
虽然您可以使用 pandas.DataFrame.to_csv
更轻松地写入 csv。
例如,
# add a name column
hour1['name'] = name
# write to csv
hour1.to_csv('sample_output.txt', index=False, sep=' ')
通过使用 sep=' '
,输出将像 OP 描述的那样在每一行上用 space 分隔。
备选方案:
dbfread
包裹:
https://gist.github.com/jamespaultg/990e4650a384ade5c57a2eb56515ba62
https://dbfread.readthedocs.io/en/latest/exporting_data.html#pandas-data-frames
该 sql 查询的 DataFrame 等效操作是:
import pandas as pd
df1['open_time_hr'] = pd.to_datetime(df1['open_time']).dt.hour
df1['gross_nat_value'] = df1.taxes+df1.auto_grat+df1.discount
df_agg = df1.groupby(['date', 'session_no', 'open_time_hr']).sum()
df_agg.to_csv('sample_output.txt', index=False, sep=' ')
我将 DBF 文件输入数据框并运行查询。
这是代码。
from dbf import Table
import pandasql as ps
dfPath1 = Table('filename.dbf')
dfPath1.open()
df1 = pd.DataFrame(dfPath1, columns=['column1', 'column2', 'column3', 'column4'])
hour1 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_12am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '00:00:00' And open_time < '00:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly1 = hour1.fillna(0)
print(hourly1)
hour2 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_1am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '01:00:00' And open_time < '01:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly2 = hour2.fillna(0)
print(hourly2)
hour3 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_2am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '02:00:00' And open_time < '02:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly3 = hour3.fillna(0)
print(hourly3)
hour4 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_3am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '03:00:00' And open_time < '03:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly4 = hour4.fillna(0)
print(hourly4)
hour5 = ps.sqldf("Select df1.date, df1.session_no AS 'session_number', SUM(df1.received) AS 'sales_for_4am', SUM(df1.taxes)+SUM(df1.auto_grat)+SUM(df1.discount) AS 'gross_vat_sales', SUM(df1.taxes) AS 'total_vat', SUM(df1.discount) AS 'discount', SUM(df1.auto_grat) AS 'service_charge' From df1 Where open_time >= '04:00:00' And open_time < '04:59:59' And date= '" + str + "'")
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None):
hourly5 = hour5.fillna(0)
print(hourly5)
data = [name,hour1.iloc[:,0],hour1.iloc[:,1],hour1.iloc[:,2],hour1.iloc[:,3],hour1.iloc[:,4],hour1.iloc[:,5],hour1.iloc[:,6]]
data2 = [name,hour2.iloc[:,0],hour2.iloc[:,1],hour2.iloc[:,2],hour2.iloc[:,3],hour2.iloc[:,4],hour2.iloc[:,5],hour2.iloc[:,6]]
data3 = [name,hour3.iloc[:,0],hour3.iloc[:,1],hour3.iloc[:,2],hour3.iloc[:,3],hour3.iloc[:,4],hour3.iloc[:,5],hour3.iloc[:,6]]
data4 = [name,hour4.iloc[:,0],hour4.iloc[:,1],hour4.iloc[:,2],hour4.iloc[:,3],hour4.iloc[:,4],hour4.iloc[:,5],hour4.iloc[:,6]]
data5 = [name,hour5.iloc[:,0],hour5.iloc[:,1],hour5.iloc[:,2],hour5.iloc[:,3],hour5.iloc[:,4],hour5.iloc[:,5],hour5.iloc[:,6]]
hour1['name'] = name
hour1.to_csv('sample_output.txt', index=False, sep=' ')
然后得到这样的错误.. KeyError: [Int64Index([0], dtype='int64')] 的 None 在 [columns]
中这是我想要的文本文件的输出.. “2020-01-01 943 527.0 56.46 56.46 0.0 0.0”
问题很可能出在下面一行
data = [name,hour1[[0]],hour1[[1]],hour1[[2]],hour1[[3]],hour1[[4]],hour1[[5]],hour1[[6]]]
您可以使用 iloc
data = [name,hour1.iloc[:,0],hour1.iloc[:,1],hour1.iloc[:,2],hour1.iloc[:,3],hour1.iloc[:,4],hour1.iloc[:,5],hour1.iloc[:,6]]
虽然您可以使用 pandas.DataFrame.to_csv
更轻松地写入 csv。
例如,
# add a name column
hour1['name'] = name
# write to csv
hour1.to_csv('sample_output.txt', index=False, sep=' ')
通过使用 sep=' '
,输出将像 OP 描述的那样在每一行上用 space 分隔。
备选方案:
dbfread
包裹:
https://gist.github.com/jamespaultg/990e4650a384ade5c57a2eb56515ba62
https://dbfread.readthedocs.io/en/latest/exporting_data.html#pandas-data-frames
该 sql 查询的 DataFrame 等效操作是:
import pandas as pd
df1['open_time_hr'] = pd.to_datetime(df1['open_time']).dt.hour
df1['gross_nat_value'] = df1.taxes+df1.auto_grat+df1.discount
df_agg = df1.groupby(['date', 'session_no', 'open_time_hr']).sum()
df_agg.to_csv('sample_output.txt', index=False, sep=' ')