Python Linearmodels:如何让Python知道这些ID列来识别Group?

Python Linearmodels: How to let Python know these are ID columns to identify Group?

我想 运行 对一组个体进行面板回归(固定效应模型),这些个体由 provincecity 唯一标识,跨时间 t.

创建数据框和运行回归的代码

import numpy as np
import pandas as pd
from linearmodels import PanelOLS
data = {'y':[1,2,3,1,0,3],
        'x1': [0,1,2,3,0,2],
        'x2':[1,1,3,2,1,0],
        't':  ['2020-02-18', '2020-02-18', '2020-02-17', '2020-02-18', '2020-02-18', '2020-02-17'],
        'province': ['A', 'A','A','B','B','B'],
        'city': ['a','b','a','a','c','a']}
dataframe = pd.DataFrame (data, columns = ['y','x1', 'x2', 't', 'province', 'city'])

dataframe=dataframe.set_index(['t','province','city'], append=True)
mod = PanelOLS(dataframe.y, dataframe[['x1','x2']], entity_effects=True)

但是我收到一条错误消息 "DataFrame input must have a MultiIndex with 2 levels."

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-33-eb5264bfefc9> in <module>
      1 dataframe=dataframe.set_index(['t','province','city'], append=True)
----> 2 mod = PanelOLS(dataframe.y, dataframe[['x1','x2']], entity_effects=True)

C:\ProgramData\Anaconda3\lib\site-packages\linearmodels\panel\model.py in __init__(self, dependent, exog, weights, entity_effects, time_effects, other_effects, singletons, drop_absorbed)
   1038         drop_absorbed: bool = False,
   1039     ) -> None:
-> 1040         super(PanelOLS, self).__init__(dependent, exog, weights=weights)
   1041 
   1042         self._entity_effects = entity_effects

C:\ProgramData\Anaconda3\lib\site-packages\linearmodels\panel\model.py in __init__(self, dependent, exog, weights)
    224         weights: Optional[PanelDataLike] = None,
    225     ) -> None:
--> 226         self.dependent = PanelData(dependent, "Dep")
    227         self.exog = PanelData(exog, "Exog")
    228         self._original_shape = self.dependent.shape

C:\ProgramData\Anaconda3\lib\site-packages\linearmodels\panel\data.py in __init__(self, x, var_name, convert_dummies, drop_first, copy)
    198                 if len(x.index.levels) != 2:
    199                     raise ValueError(
--> 200                         "DataFrame input must have a " "MultiIndex with 2 levels"
    201                     )
    202                 if isinstance(self._original, (DataFrame, PanelData, Series)):

ValueError: DataFrame input must have a MultiIndex with 2 levels

作为解决方案,而不是做

dataframe=dataframe.set_index(['t','province','city'], append=True)

我这样做

dataframe=dataframe.set_index(['t'], append=True)

这将允许模型通过。但我不知道为什么。在这种情况下,我使用两列来标识组。 如果我需要三列来标识我的组怎么办python 如何区分 ID 和 x 变量

根据linearmodels的作者, 我需要一个实体,

import numpy as np
import pandas as pd
from linearmodels import PanelOLS
data = {'y':[1,2,3,1,0,3],
        'x1': [0,1,2,3,0,2],
        'x2':[1,1,3,2,1,0],
        't': pd.to_datetime(['2020-02-18', '2020-02-18', '2020-02-17', '2020-02-18', '2020-02-18', '2020-02-17']),
        'province': ['A', 'A','A','B','B','B'],
        'city': ['a','b','a','a','c','a']}
dataframe = pd.DataFrame (data, columns = ['y','x1', 'x2', 't', 'province', 'city'])
dataframe["city-provence"] = [(c,p) for c,p in zip(dataframe.city, dataframe.province)]
dataframe = dataframe.set_index(["city-provence","t"])

                          y  x1  x2 province city
city-provence t                                  
(a, A)        2020-02-18  1   0   1        A    a
(b, A)        2020-02-18  2   1   1        A    b
(a, A)        2020-02-17  3   2   3        A    a
(a, B)        2020-02-18  1   3   2        B    a
(c, B)        2020-02-18  0   0   1        B    c
(a, B)        2020-02-17  3   2   0        B    a