如何从数据框中的 csv 列表创建新的 csv
How to create new csv from list of csv's in dataframe
所以我知道我的代码不是那么接近正确,但我正在尝试逐行遍历 csv 列表,以创建一个新的 csv,其中每一行将列出满足条件的所有 csv .所有 csv 中的第一列是“日期”,我想列出所有 csv 的名称,其中 data["entry"] > 3
在该日期仍然是第一列。
更新:我想要做的是为每个 csv,为满足条件的每个日期制作一个新列表,并在新 csv 的那些日子里将 file_name 附加到 row/rows.
###create list from dir
listdrs = os.listdir('c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/SentdexTutorial/stock_dfs/')
###append full path to list
string = 'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/SentdexTutorial/stock_dfs/'
listdrs_path = [ string + x for x in listdrs]
complete_string = ' is complete'
listdrs_confirmation = [ x + complete_string for x in listdrs]
#print (listdrs_path)
###start loop, for each "file" in listdrs run the 2 functions below and overwrite saved csv.
for file_path in listdrs_path:
data = pd.read_csv(file_path, index_col=0)
########################################
####function 1
def get_price_hist(ticker):
# Put stock price data in dataframe
data = pd.read_csv(file_path)
#listdr = os.listdir('Users409\AppData\Local\Programs\Python\Python38\Indicators\Sentdex Tutorial\stock_dfs')
##print(listdr)
# Convert date to timestamp and make index
data.index = data["date"].apply(lambda x: pd.Timestamp(x))
data.drop("date", axis=1, inplace=True)
return data
##create new table and append data
data = data[data.Entry > 3]
for date in data.date:
new_table[date].append(file_path)
new_table_data = data.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())], columns=['date', 'table names'])
print(new_table_data)
我会做这样的事情。您需要根据需要修改以下代码段。
import pandas as pd
from glob import glob
from collections import defaultdict
# create and save some random data
df1 = pd.DataFrame({'date':[1,2,3], 'entry':[4,3,2]})
df2 = pd.DataFrame({'date':[1,2,3], 'entry':[1,2,4]})
df3 = pd.DataFrame({'date':[1,2,3], 'entry':[3,1,5]})
df1.to_csv('table1.csv')
df2.to_csv('table2.csv')
df3.to_csv('table3.csv')
# read all the csv
tables = glob('*.csv')
new_table = defaultdict(list)
# create new table
for table in tables:
df = pd.read_csv(table)
df = df[df.entry > 2]
for date in df.date:
new_table[date].append(table)
new_table_df = pd.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())], columns=['date', 'table names'])
print (new_table_df)
date table names
0 1 table3.csv,table1.csv
1 2 table1.csv
2 3 table2.csv,table3.csv
其他代码有一些问题,这是我能够想出的最终解决方案。
if 'Entry' in data:
##create new table and append data
data = data[data.Entry > 3]
if 'date' in data:
for date in data.date:
if date not in new_table:
new_table[date] = []
new_table[date].append(
pd.DataFrame({'FileName': [file_name], 'Entry': [int(data[data.date == date].Entry)]}))
new_table
elif 'Date' in data:
for date in data.Date:
if date not in new_table:
new_table[date] = []
new_table[date].append(
pd.DataFrame({'FileName': [file_name], 'Entry': [int(data[data.Date == date].Entry)]}))
# sorted(new_table, key=lambda x: x[0])
def find_max(tbl):
new_table_data = {}
for date in sorted(tbl.keys()):
merged_dt = pd.concat(tbl[date])
max_entry_v = max(list(merged_dt.Entry))
tbl_names = list(merged_dt[merged_dt.Entry == max_entry_v].FileName)
new_table_data[date] = tbl_names
return new_table_data
new_table_data = find_max(tbl=new_table)
#df = pd.DataFrame(new_table, columns =['date', 'tickers'])
#df.to_csv(input_path, index = False, header = True)
# find_max(new_table)
# new_table_data = pd.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())],
# columns=['date', 'table names'])
print(new_table_data)
所以我知道我的代码不是那么接近正确,但我正在尝试逐行遍历 csv 列表,以创建一个新的 csv,其中每一行将列出满足条件的所有 csv .所有 csv 中的第一列是“日期”,我想列出所有 csv 的名称,其中 data["entry"] > 3
在该日期仍然是第一列。
更新:我想要做的是为每个 csv,为满足条件的每个日期制作一个新列表,并在新 csv 的那些日子里将 file_name 附加到 row/rows.
###create list from dir
listdrs = os.listdir('c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/SentdexTutorial/stock_dfs/')
###append full path to list
string = 'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/SentdexTutorial/stock_dfs/'
listdrs_path = [ string + x for x in listdrs]
complete_string = ' is complete'
listdrs_confirmation = [ x + complete_string for x in listdrs]
#print (listdrs_path)
###start loop, for each "file" in listdrs run the 2 functions below and overwrite saved csv.
for file_path in listdrs_path:
data = pd.read_csv(file_path, index_col=0)
########################################
####function 1
def get_price_hist(ticker):
# Put stock price data in dataframe
data = pd.read_csv(file_path)
#listdr = os.listdir('Users409\AppData\Local\Programs\Python\Python38\Indicators\Sentdex Tutorial\stock_dfs')
##print(listdr)
# Convert date to timestamp and make index
data.index = data["date"].apply(lambda x: pd.Timestamp(x))
data.drop("date", axis=1, inplace=True)
return data
##create new table and append data
data = data[data.Entry > 3]
for date in data.date:
new_table[date].append(file_path)
new_table_data = data.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())], columns=['date', 'table names'])
print(new_table_data)
我会做这样的事情。您需要根据需要修改以下代码段。
import pandas as pd
from glob import glob
from collections import defaultdict
# create and save some random data
df1 = pd.DataFrame({'date':[1,2,3], 'entry':[4,3,2]})
df2 = pd.DataFrame({'date':[1,2,3], 'entry':[1,2,4]})
df3 = pd.DataFrame({'date':[1,2,3], 'entry':[3,1,5]})
df1.to_csv('table1.csv')
df2.to_csv('table2.csv')
df3.to_csv('table3.csv')
# read all the csv
tables = glob('*.csv')
new_table = defaultdict(list)
# create new table
for table in tables:
df = pd.read_csv(table)
df = df[df.entry > 2]
for date in df.date:
new_table[date].append(table)
new_table_df = pd.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())], columns=['date', 'table names'])
print (new_table_df)
date table names
0 1 table3.csv,table1.csv
1 2 table1.csv
2 3 table2.csv,table3.csv
其他代码有一些问题,这是我能够想出的最终解决方案。
if 'Entry' in data:
##create new table and append data
data = data[data.Entry > 3]
if 'date' in data:
for date in data.date:
if date not in new_table:
new_table[date] = []
new_table[date].append(
pd.DataFrame({'FileName': [file_name], 'Entry': [int(data[data.date == date].Entry)]}))
new_table
elif 'Date' in data:
for date in data.Date:
if date not in new_table:
new_table[date] = []
new_table[date].append(
pd.DataFrame({'FileName': [file_name], 'Entry': [int(data[data.Date == date].Entry)]}))
# sorted(new_table, key=lambda x: x[0])
def find_max(tbl):
new_table_data = {}
for date in sorted(tbl.keys()):
merged_dt = pd.concat(tbl[date])
max_entry_v = max(list(merged_dt.Entry))
tbl_names = list(merged_dt[merged_dt.Entry == max_entry_v].FileName)
new_table_data[date] = tbl_names
return new_table_data
new_table_data = find_max(tbl=new_table)
#df = pd.DataFrame(new_table, columns =['date', 'tickers'])
#df.to_csv(input_path, index = False, header = True)
# find_max(new_table)
# new_table_data = pd.DataFrame([(k, ','.join(new_table[k])) for k in sorted(new_table.keys())],
# columns=['date', 'table names'])
print(new_table_data)