将 CSV 数据转换为字典中的列表
Converting CSV data to list in dictionary
我有一个 CSV 文件,格式如下:
Name_1,2,K,14
Name_1,3,T,14
Name_1,4,T,18
Name_2,2,G,12
Name_2,4,T,14
Name_2,6,K,15
Name_3,2,K,12
Name_3,3,T,15
Name_3,4,G,18
我想把它转换成一个字典,其中Name_x
是键,对应的数据是列表形式的值。像这样:
{'Name_1': [[2, 'K', 14], [3, 'T', 14], [4, 'T', 18]],
'Name_2': [[4, 'T', 14], [4, 'T', 14], [6, 'K' ,15]],
...}
到目前为止,我想我必须使用 defaultdict
:
from collections import defaultdict
d = defaultdict(list)
但是我如何append
将数据d
?我知道 defaultdict
没有 append
方法。
您需要使用名称作为键并附加行的切片作为值,使用普通或默认字典将没有顺序:
import csv
from collections import defaultdict
with open('in.csv') as f:
r = csv.reader(f)
d = defaultdict(list)
for row in r:
d[row[0]].append(row[1:])
print(d)
如果你想维持秩序,你需要 OrderedDict
:
from collections import OrderedDict
with open('in.csv') as f:
r = csv.reader(f)
od = OrderedDict()
for row in r:
# get key/ first element in row
key = row[0]
# create key/list paring if it does not exist, else just append the value
od.setdefault(key, []).append(row[1:])
print(od)
输出:
OrderedDict([('Name_1', [['2', 'K', '14'], ['3', 'T', '14'], ['4', 'T', '18']]), ('Name_2', [['2', 'G', '12'], ['4', 'T', '14'], ['6', 'K', '15']]), ('Name_3', [['2', 'K', '12'], ['3', 'T', '15'], ['4', 'G', '18']])])
如果名称已分组,您也可以使用 groupby,这将根据每行中的第一个 item/name 对元素进行分组:
import csv
from collections import OrderedDict
from itertools import groupby
from operator import itemgetter
with open('in.csv') as f:
r = csv.reader(f)
od = OrderedDict()
for k, v in groupby(r, key=itemgetter(0)):
od[k] = [sub[1:] for sub in v]
如果您使用 python3,您可以使用 *
:
解压
with open("in.csv") as f:
r = csv.reader(f)
od = OrderedDict()
for row in r:
key, *rest = row
od.setdefault(key, []).append(rest)
import csv
from collections import OrderedDict
from itertools import groupby
from operator import itemgetter
with open('in.csv') as f:
r = csv.reader(f)
od = OrderedDict()
for k, v in groupby(r, key=itemgetter(0)):
od[k] = [sub for _, *sub in v]
print(od)
在我的脑海中(因为我对 defaultdict 不太熟悉),这应该可以大致满足您的要求。
数据是 CSV 字符串
obj = {}
data = data.split('\n')
for row in data:
row = row.split(',')
if row[0] in obj:
obj[row[0]].append(row[1:])
else:
obj[row[0]] = [row[1:]]
print obj
txtcsv="""Name_1,2,K,14
Name_1,3,T,14
Name_1,4,T,18
Name_2,2,G,12
Name_2,4,T,14
Name_2,6,K,15
Name_3,2,K,12
Name_3,3,T,15
Name_3,4,G,18"""
def save():
with open("test.csv","w") as f:
f.write(txtcsv)
if __name__ == "__main__":
save()
with open("test.csv") as f:
d = {}
for l in f.readlines():
name, val = l.rstrip().split(",", 1)
d.setdefault(name, []).append(val.split(","))
print (d)
我有一个 CSV 文件,格式如下:
Name_1,2,K,14
Name_1,3,T,14
Name_1,4,T,18
Name_2,2,G,12
Name_2,4,T,14
Name_2,6,K,15
Name_3,2,K,12
Name_3,3,T,15
Name_3,4,G,18
我想把它转换成一个字典,其中Name_x
是键,对应的数据是列表形式的值。像这样:
{'Name_1': [[2, 'K', 14], [3, 'T', 14], [4, 'T', 18]],
'Name_2': [[4, 'T', 14], [4, 'T', 14], [6, 'K' ,15]],
...}
到目前为止,我想我必须使用 defaultdict
:
from collections import defaultdict
d = defaultdict(list)
但是我如何append
将数据d
?我知道 defaultdict
没有 append
方法。
您需要使用名称作为键并附加行的切片作为值,使用普通或默认字典将没有顺序:
import csv
from collections import defaultdict
with open('in.csv') as f:
r = csv.reader(f)
d = defaultdict(list)
for row in r:
d[row[0]].append(row[1:])
print(d)
如果你想维持秩序,你需要 OrderedDict
:
from collections import OrderedDict
with open('in.csv') as f:
r = csv.reader(f)
od = OrderedDict()
for row in r:
# get key/ first element in row
key = row[0]
# create key/list paring if it does not exist, else just append the value
od.setdefault(key, []).append(row[1:])
print(od)
输出:
OrderedDict([('Name_1', [['2', 'K', '14'], ['3', 'T', '14'], ['4', 'T', '18']]), ('Name_2', [['2', 'G', '12'], ['4', 'T', '14'], ['6', 'K', '15']]), ('Name_3', [['2', 'K', '12'], ['3', 'T', '15'], ['4', 'G', '18']])])
如果名称已分组,您也可以使用 groupby,这将根据每行中的第一个 item/name 对元素进行分组:
import csv
from collections import OrderedDict
from itertools import groupby
from operator import itemgetter
with open('in.csv') as f:
r = csv.reader(f)
od = OrderedDict()
for k, v in groupby(r, key=itemgetter(0)):
od[k] = [sub[1:] for sub in v]
如果您使用 python3,您可以使用 *
:
with open("in.csv") as f:
r = csv.reader(f)
od = OrderedDict()
for row in r:
key, *rest = row
od.setdefault(key, []).append(rest)
import csv
from collections import OrderedDict
from itertools import groupby
from operator import itemgetter
with open('in.csv') as f:
r = csv.reader(f)
od = OrderedDict()
for k, v in groupby(r, key=itemgetter(0)):
od[k] = [sub for _, *sub in v]
print(od)
在我的脑海中(因为我对 defaultdict 不太熟悉),这应该可以大致满足您的要求。
数据是 CSV 字符串
obj = {}
data = data.split('\n')
for row in data:
row = row.split(',')
if row[0] in obj:
obj[row[0]].append(row[1:])
else:
obj[row[0]] = [row[1:]]
print obj
txtcsv="""Name_1,2,K,14
Name_1,3,T,14
Name_1,4,T,18
Name_2,2,G,12
Name_2,4,T,14
Name_2,6,K,15
Name_3,2,K,12
Name_3,3,T,15
Name_3,4,G,18"""
def save():
with open("test.csv","w") as f:
f.write(txtcsv)
if __name__ == "__main__":
save()
with open("test.csv") as f:
d = {}
for l in f.readlines():
name, val = l.rstrip().split(",", 1)
d.setdefault(name, []).append(val.split(","))
print (d)