将 pandas df 多索引转换为列

Convert pandas df multi index to columns

首先post这里,非常感谢任何帮助。

使用 python urllib2 从 api 请求读取 csv 文件响应后,我得到了一个多索引 df。它包含 19 个索引' 然后只有 2 'columns'.

如何将这 19 个索引转换为附加列?

我试过恢复索引,但没有成功。

from urllib2 import Request, urlopen, URLError import pandas as pd

url = 'URL string here'

response = urlopen(request)
df = pd.read_csv(response)
df.reset_index().head()

要清楚,例如,如果索引 1 包含字母 a、b、c、d,我只想将此索引更改为标题为 'letter' 的列,现在每一行都包含这些字母之一.当我做 reset_index 时,它确实用一个字母填充每一行,但是,该列本身仍然是一个索引..

编辑..添加更多代码,第一部分获取 df。

from urllib2 import Request, urlopen, URLError
import pandas as pd

host = 'testapi.bmreports.com'
port = '443'
rep_name = 'DETSYSPRICES'
version = 'v1'
key = 'ldytgh1ylq0k92c'
sd = '2016-05-26'
sp = 20
criteria = (host,port,rep_name,version,key,sd,sp)
url = 'https://%s:%s/BMRS/%s/%s?APIKey=%s&SettlementDate=%s&SettlementPeriod=%d&ServiceType=CSV' % criteria
request = Request(url)
#print url

response = urlopen(request)
df = pd.read_csv(response)
df


HDR INDICATIVE SYSTEM PRICE STACK DATA
BID 20160526    20  1   1   NaN NaN F   T   F   F   NaN 266.0329    -230.211    -230.211    -149.786    -149.786    -48.05  266.0329    1.00000 -48.050 -12782.88
2   T_DIDCB6    109615  -1  F   F   F   F   NaN 26.0000 -1.950  -1.950  -1.950  -1.950  -1.95   26.0000 0.98947 -1.929  -50.17
3   T_COSO-1    119674  -1  T   F   F   F   NaN 25.9000 -0.279  0.000   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
OFFER   20160526    20  1   T_WBURB-2   25968   2   F   F   F   F   0   46.0000 16.163  16.163  0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
2   T_WBURB-2   25968   1   F   F   F   F   0   46.0000 3.037   3.037   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
3   T_CNQPS-4   45744   1   F   F   F   F   0   50.0000 0.975   0.000   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
4   T_DAMC-1    85044   1   F   F   F   F   0   64.5000 4.583   4.583   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
5   T_DAMC-1    85045   1   F   F   F   F   0   64.5000 0.083   0.083   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
6   T_DAMC-1    85046   1   F   F   F   F   0   64.5000 22.000  22.000  0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
7   T_BAGE-1    33725   1   T   F   F   F   0   70.0000 1.583   1.583   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
8   T_SUTB-1    68274   1   T   F   F   F   0   72.0000 4.000   4.000   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
9   T_HUMR-1    99956   2   F   F   F   F   0   82.4700 9.250   9.250   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
10  T_HUMR-1    99956   1   F   F   F   F   0   82.4700 0.250   0.000   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
11  T_HUMR-1    99955   2   F   F   F   F   0   82.4700 0.647   0.647   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
12  T_HUMR-1    99955   1   F   F   F   F   0   82.4700 0.033   0.000   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
13  T_HUMR-1    99956   3   F   F   F   F   0   95.9700 10.317  10.317  0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
14  T_HUMR-1    99955   3   F   F   F   F   0   95.9700 0.004   0.004   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
15  T_FOYE-1    92982   1   T   F   F   F   0   103.0000    1.258   1.258   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
16  T_FOYE-1    92983   1   T   F   F   F   0   103.0000    7.500   7.500   0.000   0.000   0.00    0.0000  0.00000 0.000   0.00
FTR 19  NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

在第二部分中,我尝试重置...

df.reset_index()
df.index  


MultiIndex(levels=[[u'BID', u'FTR', u'OFFER'], [19, 20160526], [20.0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0], [u'1', u'T_BAGE-1', u'T_CNQPS-4', u'T_COSO-1', u'T_DAMC-1', u'T_DIDCB6', u'T_FOYE-1', u'T_HUMR-1', u'T_SUTB-1', u'T_WBURB-2'], [25968.0, 33725.0, 45744.0, 68274.0, 85044.0, 85045.0, 85046.0, 92982.0, 92983.0, 99955.0, 99956.0, 109615.0, 119674.0], [-1.0, 1.0, 2.0, 3.0], [u'F', u'T'], [u'F', u'T'], [u'F'], [u'F'], [0.0], [25.9, 26.0, 46.0, 50.0, 64.5, 70.0, 72.0, 82.47, 95.97, 103.0, 266.0329], [-230.211, -1.95, -0.279, 0.004, 0.033, 0.083, 0.25, 0.647, 0.975, 1.258, 1.583, 3.037, 4.0, 4.583, 7.5, 9.25, 10.317, 16.163, 22.0], [-230.211, -1.95, 0.0, 0.004, 0.083, 0.647, 1.258, 1.583, 3.037, 4.0, 4.583, 7.5, 9.25, 10.317, 16.163, 22.0], [-149.786, -1.95, 0.0], [-149.786, -1.95, 0.0], [-48.05, -1.95, 0.0], [0.0, 26.0, 266.0329], [0.0, 0.98947, 1.0]],
           labels=[[0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1], [0, 5, 3, 9, 9, 2, 4, 4, 4, 1, 8, 7, 7, 7, 7, 7, 7, 6, 6, -1], [-1, 11, 12, 0, 0, 2, 4, 5, 6, 1, 3, 10, 10, 9, 9, 10, 9, 7, 8, -1], [-1, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 3, 3, 1, 1, -1], [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, -1], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [-1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [10, 1, 0, 2, 2, 3, 4, 4, 4, 5, 6, 7, 7, 7, 7, 8, 8, 9, 9, -1], [0, 1, 2, 17, 11, 8, 13, 5, 18, 10, 12, 15, 6, 7, 4, 16, 3, 9, 14, -1], [0, 1, 2, 14, 8, 2, 10, 4, 15, 7, 9, 12, 2, 5, 2, 13, 3, 6, 11, -1], [0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1], [0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1], [0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1], [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1]])

我认为你可以将参数 inplace 添加到 reset_index:

df.reset_index(inplace=True)

或将其分配回去:

df = df.reset_index()

样本:

import pandas as pd
import io

temp=u"""HDR;INDICATIVE;SYSTEM;PRICE;STACK;DATA
BID;20160526;20;1;1;NaN;NaN;F;T;F;F;NaN;266.0329;-230.211;-230.211;-149.786;-149.786;-48.05;266.0329;1.00000;-48.050;-12782.88
2;T_DIDCB6;109615;-1;F;F;F;F;NaN;26.0000;-1.950;-1.950;-1.950;-1.950;-1.95;26.0000;0.98947;-1.929;-50.17
3;T_COSO-1;119674;-1;T;F;F;F;NaN;25.9000;-0.279;0.000;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
OFFER;20160526;20;1;T_WBURB-2;25968;2;F;F;F;F;0;46.0000;16.163;16.163;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
2;T_WBURB-2;25968;1;F;F;F;F;0;46.0000;3.037;3.037;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
3;T_CNQPS-4;45744;1;F;F;F;F;0;50.0000;0.975;0.000;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
4;T_DAMC-1;85044;1;F;F;F;F;0;64.5000;4.583;4.583;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
5;T_DAMC-1;85045;1;F;F;F;F;0;64.5000;0.083;0.083;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
6;T_DAMC-1;85046;1;F;F;F;F;0;64.5000;22.000;22.000;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
7;T_BAGE-1;33725;1;T;F;F;F;0;70.0000;1.583;1.583;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
8;T_SUTB-1;68274;1;T;F;F;F;0;72.0000;4.000;4.000;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
9;T_HUMR-1;99956;2;F;F;F;F;0;82.4700;9.250;9.250;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
10;T_HUMR-1;99956;1;F;F;F;F;0;82.4700;0.250;0.000;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
11;T_HUMR-1;99955;2;F;F;F;F;0;82.4700;0.647;0.647;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
12;T_HUMR-1;99955;1;F;F;F;F;0;82.4700;0.033;0.000;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
13;T_HUMR-1;99956;3;F;F;F;F;0;95.9700;10.317;10.317;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
14;T_HUMR-1;99955;3;F;F;F;F;0;95.9700;0.004;0.004;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
15;T_FOYE-1;92982;1;T;F;F;F;0;103.0000;1.258;1.258;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
16;T_FOYE-1;92983;1;T;F;F;F;0;103.0000;7.500;7.500;0.000;0.000;0.00;0.0000;0.00000;0.000;0.00
FTR;19;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN;NaN"""
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp), sep=";", index_col=None, parse_dates=False)
#print (df)

print (df.index)
MultiIndex(levels=[['10', '11', '12', '13', '14', '15', '16', '2', '3', '4', '5', '6', '7', '8', '9', 'BID', 'FTR', 'OFFER'], ['19', '20160526', 'T_BAGE-1', 'T_CNQPS-4', 'T_COSO-1', 'T_DAMC-1', 'T_DIDCB6', 'T_FOYE-1', 'T_HUMR-1', 'T_SUTB-1', 'T_WBURB-2'], [20.0, 25968.0, 33725.0, 45744.0, 68274.0, 85044.0, 85045.0, 85046.0, 92982.0, 92983.0, 99955.0, 99956.0, 109615.0, 119674.0], [-1.0, 1.0, 2.0, 3.0], ['1', 'F', 'T', 'T_WBURB-2'], ['25968', 'F'], ['2', 'F'], ['F'], ['0', 'F', 'T'], ['103.0000', '25.9000', '26.0000', '46.0000', '50.0000', '64.5000', '70.0000', '72.0000', '82.4700', '95.9700', 'F'], ['-0.279', '-1.950', '0.004', '0.033', '0.083', '0.250', '0.647', '0.975', '1.258', '1.583', '10.317', '22.000', '3.037', '4.000', '4.583', '7.500', '9.250', 'F'], [-1.95, 0.0, 0.004, 0.083, 0.647, 1.258, 1.583, 3.037, 4.0, 4.583, 7.5, 9.25, 10.317, 22.0], [-1.95, 0.0, 46.0, 266.0329], [-230.211, -1.95, 0.0, 16.163], [-230.211, -1.95, 0.0, 16.163], [-149.786, 0.0, 26.0]],
           labels=[[15, 7, 8, 17, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2, 3, 4, 5, 6, 16], [1, 6, 4, 1, 10, 3, 5, 5, 5, 2, 9, 8, 8, 8, 8, 8, 8, 7, 7, 0], [0, 12, 13, 0, 1, 3, 5, 6, 7, 2, 4, 11, 11, 10, 10, 11, 10, 8, 9, -1], [1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 3, 3, 1, 1, -1], [0, 1, 2, 3, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, -1], [-1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1], [-1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [2, -1, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [10, 2, 1, 10, 3, 4, 5, 5, 5, 6, 7, 8, 8, 8, 8, 9, 9, 0, 0, -1], [17, 1, 0, 17, 12, 7, 14, 4, 11, 9, 13, 16, 5, 6, 3, 10, 2, 8, 15, -1], [-1, 0, 1, 1, 7, 1, 9, 3, 13, 6, 8, 11, 1, 4, 1, 12, 2, 5, 10, -1], [3, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1], [0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1], [0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -1], [0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1]])
df.reset_index(inplace=True)

print (df.index)
RangeIndex(start=0, stop=20, step=1)