透视 csv 数据?
Pivot csv data?
我是 Python 的新手,所以如果这很容易,请原谅。我有一个包含如下数据的 csv 文件:
Symbol,date,price
apple, 23/12/2016, 50
apple, 26/12/2016, 51
apple, 27/12/2016, 52
google,23/12/2016, 70
google,26/12/2016, 71
google,27/12/2016, 72
我需要编写一个新的 csv 文件,如下所示:
Date, apple, google
23/12/2016,50,70
26/12/2016,51,71
27/12/2016,52,72
我目前的代码如下。但是,我似乎无法将符号作为列显示。
import csv
import os
csv.register_dialect(
'mydialect',
delimiter =',',
)
symbol, date=[],[]
with open('EB_CUT2.csv',"rt") as dfile:
thedata = csv.reader(dfile, dialect ='mydialect')
for row in thedata:
if row[4] not in date:
date.append(row[4])
if row[0] not in symbol:
symbol.append(row[0])
dfile.close()
ebout = open('EB_CUT.csv',"wt",newline='')
wr = csv.writer(ebout)
for val in date:
wr.writerow([val])
ebout.close()
一个pandas解决方案:
import pandas
df = pd.read_csv('EB_CUT2.csv', index_col=1, parse_dates=True)
df2 = pd.pivot_table(df, values='price', columns=['Symbol'], index=df.index)
df2.to_csv('EB_CUT.csv')
EB_CUT.csv
的内容:
date,apple,google
2016-12-23,50,70
2016-12-26,51,71
2016-12-27,52,72
非pandas解法:
import csv
from datetime import datetime
import os
MY_DIALECT = 'mydialect'
csv.register_dialect(MY_DIALECT, delimiter =',', skipinitialspace=True,)
daily_prices = {}
with open('EB_CUT2.csv', "rt", newline='') as dfile:
reader = csv.DictReader(dfile, dialect=MY_DIALECT)
for row in reader:
date = datetime.date(datetime.strptime(row['date'], '%d/%m/%Y'))
entry = daily_prices.setdefault(date, {})
entry[row['Symbol']] = row['price']
all_symbols = sorted(
set(symbol for value in daily_prices.values() for symbol in value))
with open('EB_CUT.csv', "wt", newline='') as ebout:
writer = csv.writer(ebout)
writer.writerow(['Date'] + all_symbols) # header
for date, prices in sorted(daily_prices.items()):
row = [date.strftime('%d/%m/%Y')]
for symbol in all_symbols:
row.append(prices.get(symbol, ''))
writer.writerow(row)
如果您想从任何现有的 'EB_CUT.csv'
文件初始化 daily_prices
字典——实际上是上面显示的最后一步的逆操作——从下面的评论中您可能会想要稍后合并更多的 csv 文件,以下代码将执行此操作:
daily_prices = {}
if os.path.isfile('EB_CUT.csv'): # existing file?
# Initialize daily_prices from existing csv file.
with open('EB_CUT.csv', "rt", newline='') as inf:
reader = csv.reader(inf)
next(reader) # skip header row
for row in reader:
date = datetime.date(datetime.strptime(row[0], '%d/%m/%Y'))
days_prices = daily_prices.setdefault(date, {})
for symbol, price in row[1:]:
days_prices[symbol] = price
读取当前 csv 文件以对其进行初始化的另一种方法是始终将与当前 csv 文件对应的 daily_prices
数据以您选择的任何格式保存在单独的文件中。使用 pickle
或 json
模块可以很容易地写入和读回它。
我是 Python 的新手,所以如果这很容易,请原谅。我有一个包含如下数据的 csv 文件:
Symbol,date,price
apple, 23/12/2016, 50
apple, 26/12/2016, 51
apple, 27/12/2016, 52
google,23/12/2016, 70
google,26/12/2016, 71
google,27/12/2016, 72
我需要编写一个新的 csv 文件,如下所示:
Date, apple, google
23/12/2016,50,70
26/12/2016,51,71
27/12/2016,52,72
我目前的代码如下。但是,我似乎无法将符号作为列显示。
import csv
import os
csv.register_dialect(
'mydialect',
delimiter =',',
)
symbol, date=[],[]
with open('EB_CUT2.csv',"rt") as dfile:
thedata = csv.reader(dfile, dialect ='mydialect')
for row in thedata:
if row[4] not in date:
date.append(row[4])
if row[0] not in symbol:
symbol.append(row[0])
dfile.close()
ebout = open('EB_CUT.csv',"wt",newline='')
wr = csv.writer(ebout)
for val in date:
wr.writerow([val])
ebout.close()
一个pandas解决方案:
import pandas
df = pd.read_csv('EB_CUT2.csv', index_col=1, parse_dates=True)
df2 = pd.pivot_table(df, values='price', columns=['Symbol'], index=df.index)
df2.to_csv('EB_CUT.csv')
EB_CUT.csv
的内容:
date,apple,google
2016-12-23,50,70
2016-12-26,51,71
2016-12-27,52,72
非pandas解法:
import csv
from datetime import datetime
import os
MY_DIALECT = 'mydialect'
csv.register_dialect(MY_DIALECT, delimiter =',', skipinitialspace=True,)
daily_prices = {}
with open('EB_CUT2.csv', "rt", newline='') as dfile:
reader = csv.DictReader(dfile, dialect=MY_DIALECT)
for row in reader:
date = datetime.date(datetime.strptime(row['date'], '%d/%m/%Y'))
entry = daily_prices.setdefault(date, {})
entry[row['Symbol']] = row['price']
all_symbols = sorted(
set(symbol for value in daily_prices.values() for symbol in value))
with open('EB_CUT.csv', "wt", newline='') as ebout:
writer = csv.writer(ebout)
writer.writerow(['Date'] + all_symbols) # header
for date, prices in sorted(daily_prices.items()):
row = [date.strftime('%d/%m/%Y')]
for symbol in all_symbols:
row.append(prices.get(symbol, ''))
writer.writerow(row)
如果您想从任何现有的 'EB_CUT.csv'
文件初始化 daily_prices
字典——实际上是上面显示的最后一步的逆操作——从下面的评论中您可能会想要稍后合并更多的 csv 文件,以下代码将执行此操作:
daily_prices = {}
if os.path.isfile('EB_CUT.csv'): # existing file?
# Initialize daily_prices from existing csv file.
with open('EB_CUT.csv', "rt", newline='') as inf:
reader = csv.reader(inf)
next(reader) # skip header row
for row in reader:
date = datetime.date(datetime.strptime(row[0], '%d/%m/%Y'))
days_prices = daily_prices.setdefault(date, {})
for symbol, price in row[1:]:
days_prices[symbol] = price
读取当前 csv 文件以对其进行初始化的另一种方法是始终将与当前 csv 文件对应的 daily_prices
数据以您选择的任何格式保存在单独的文件中。使用 pickle
或 json
模块可以很容易地写入和读回它。