排序样式数据框 return Pandas 中的 keyError
Sorting styled dataframe return keyError in Pandas
我想 groupby
和 sortindex
设计一个 dataframe
。但是,编译器return报错
KeyError: ('Other', 'B')
请问这是什么问题?
The code to reproduce the above error:
import pandas as pd
import numpy as np
dict_map=dict(group_one=['D','GG','G'],group_two=['A','C','E','F'])
vv=np.random.randn(5, 4)
# ['foo', '*', 'bar','ff']
nn=np.array([['foo', '*', 'bar','ff'], ['foo', '*', 'bar','**'],
['foo', '*', 'bar','**'],['foo', '*', 'bar','ff'],
['foo', '*', '**','ff']])
arrays = [["bar", "bar", "baz", "baz"],
["one", "two", "one", "two"]]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(nn, index=["A", "B", "C","D",'G'], columns=index)
df = df.rename_axis ( index=['my_ch'] ).reset_index()
d = {i:k for k,v in dict_map.items() for i in v}
out = df.assign(Group=df.xs("my_ch",axis=1).map(d).fillna('Other'))
def highlight_(s):
return np.select(
condlist=[s.str.contains('\*\*'), s.str.contains('\*')],
choicelist=['background-color:green', 'background-color:purple'],
default='')
df=out.style.apply(highlight_)
df.data=df.data.set_index(['Group', 'my_ch'])
df.data=df.data.sort_index(level=0)
df.to_excel('n1test.xlsx')
请注意,在实际使用案例中。需要对索引 level 0
进行排序
这应该有效:
import pandas as pd
import numpy as np
dict_map = dict(group_one=["D", "GG", "G"],
group_two=["A", "C", "E", "F"])
vv = np.random.randn(5, 4)
nn = np.array(
[
["foo", "*", "bar", "ff"],
["foo", "*", "bar", "**"],
["foo", "*", "bar", "**"],
["foo", "*", "bar", "ff"],
["foo", "*", "**", "ff"],
]
)
arrays = [["bar", "bar", "baz", "baz"], ["one", "two", "one", "two"]]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(nn, index=["A", "B", "C", "D", "G"], columns=index)
df = df.rename_axis(index=["my_ch"]).reset_index()
d = {i: k for k, v in dict_map.items() for i in v}
out = df.assign(Group=df.xs("my_ch", axis=1).map(d).fillna("Other"))
def highlight_(s):
return np.select(
condlist=[s.str.contains("\*\*"), s.str.contains("\*")],
choicelist=["background-color:green", "background-color:purple"],
default=None,
)
(
out.sort_index(level=0)
.set_index(["Group", "my_ch"])
.style.apply(highlight_)
.to_excel("n1test.xlsx")
)
主要区别在于先排序,然后设置索引,然后应用 Styler 并将其保存为 Excel 文件。所有表达式都包含在括号中,而不是换行。
我想 groupby
和 sortindex
设计一个 dataframe
。但是,编译器return报错
KeyError: ('Other', 'B')
请问这是什么问题?
The code to reproduce the above error:
import pandas as pd
import numpy as np
dict_map=dict(group_one=['D','GG','G'],group_two=['A','C','E','F'])
vv=np.random.randn(5, 4)
# ['foo', '*', 'bar','ff']
nn=np.array([['foo', '*', 'bar','ff'], ['foo', '*', 'bar','**'],
['foo', '*', 'bar','**'],['foo', '*', 'bar','ff'],
['foo', '*', '**','ff']])
arrays = [["bar", "bar", "baz", "baz"],
["one", "two", "one", "two"]]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(nn, index=["A", "B", "C","D",'G'], columns=index)
df = df.rename_axis ( index=['my_ch'] ).reset_index()
d = {i:k for k,v in dict_map.items() for i in v}
out = df.assign(Group=df.xs("my_ch",axis=1).map(d).fillna('Other'))
def highlight_(s):
return np.select(
condlist=[s.str.contains('\*\*'), s.str.contains('\*')],
choicelist=['background-color:green', 'background-color:purple'],
default='')
df=out.style.apply(highlight_)
df.data=df.data.set_index(['Group', 'my_ch'])
df.data=df.data.sort_index(level=0)
df.to_excel('n1test.xlsx')
请注意,在实际使用案例中。需要对索引 level 0
进行排序
这应该有效:
import pandas as pd
import numpy as np
dict_map = dict(group_one=["D", "GG", "G"],
group_two=["A", "C", "E", "F"])
vv = np.random.randn(5, 4)
nn = np.array(
[
["foo", "*", "bar", "ff"],
["foo", "*", "bar", "**"],
["foo", "*", "bar", "**"],
["foo", "*", "bar", "ff"],
["foo", "*", "**", "ff"],
]
)
arrays = [["bar", "bar", "baz", "baz"], ["one", "two", "one", "two"]]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(nn, index=["A", "B", "C", "D", "G"], columns=index)
df = df.rename_axis(index=["my_ch"]).reset_index()
d = {i: k for k, v in dict_map.items() for i in v}
out = df.assign(Group=df.xs("my_ch", axis=1).map(d).fillna("Other"))
def highlight_(s):
return np.select(
condlist=[s.str.contains("\*\*"), s.str.contains("\*")],
choicelist=["background-color:green", "background-color:purple"],
default=None,
)
(
out.sort_index(level=0)
.set_index(["Group", "my_ch"])
.style.apply(highlight_)
.to_excel("n1test.xlsx")
)
主要区别在于先排序,然后设置索引,然后应用 Styler 并将其保存为 Excel 文件。所有表达式都包含在括号中,而不是换行。