statsmodels.api returning MissingDataError: exog contains inf or nans when trying to fit multivariate regression
statsmodels.api returning MissingDataError: exog contains inf or nans when trying to fit multivariate regression
我正在尝试用 statsmodels.api
拟合多元线性回归模型。我收到一个错误 MissingDataError: exog contains inf or nans
。我检查了 nans 和 inf 并找到 none。这怎么可能?为什么我会收到此错误消息?
代码
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
df = pd.read_csv('clean_df.csv')
x_multi = df.drop('price', axis=1) #feature variables.
x_multi_cons = sm.add_constant(x_multi) #add row of constants.
我检查了所有 exog 变量的 na 值,发现 none。
x_multi_cons.isna().sum()
const 0
crime_rate 0
resid_area 0
air_qual 0
room_num 0
age 0
teachers 0
poor_prop 0
n_hos_beds 8
n_hot_rooms 0
rainfall 0
parks 0
avg_dist 0
airport_YES 0
waterbody_Lake 0
waterbody_Lake and River 0
waterbody_River 0
dtype: int64
我还检查了 exog 变量的 inf 值,发现 none。
np.isinf(x_multi_cons).sum()
const 0
crime_rate 0
resid_area 0
air_qual 0
room_num 0
age 0
teachers 0
poor_prop 0
n_hos_beds 0
n_hot_rooms 0
rainfall 0
parks 0
avg_dist 0
airport_YES 0
waterbody_Lake 0
waterbody_Lake and River 0
waterbody_River 0
dtype: int64
这里我正在拟合模型
y_multi = df['price'] # Dependent variable.
lm_multi = sm.OLS(y_multi, x_multi_cons).fit()
但我仍然收到错误:“MissingDataError:exog 包含 inf 或 nans”。这怎么可能?
ERROR:
MissingDataError Traceback (most recent call last)
<ipython-input-67-ca6d2e9ba2c0> in <module>
----> 1 lm_multi = sm.OLS(y_multi, x_multi_cons).fit()
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
871 **kwargs):
872 super(OLS, self).__init__(endog, exog, missing=missing,
--> 873 hasconst=hasconst, **kwargs)
874 if "weights" in self._init_keys:
875 self._init_keys.remove("weights")
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, weights, missing, hasconst, **kwargs)
702 weights = weights.squeeze()
703 super(WLS, self).__init__(endog, exog, missing=missing,
--> 704 weights=weights, hasconst=hasconst, **kwargs)
705 nobs = self.exog.shape[0]
706 weights = self.weights
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, **kwargs)
188 """
189 def __init__(self, endog, exog, **kwargs):
--> 190 super(RegressionModel, self).__init__(endog, exog, **kwargs)
191 self._data_attr.extend(['pinv_wexog', 'weights'])
192
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
235
236 def __init__(self, endog, exog=None, **kwargs):
--> 237 super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
238 self.initialize()
239
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
76 hasconst = kwargs.pop('hasconst', None)
77 self.data = self._handle_data(endog, exog, missing, hasconst,
---> 78 **kwargs)
79 self.k_constant = self.data.k_constant
80 self.exog = self.data.exog
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
99
100 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
--> 101 data = handle_data(endog, exog, missing, hasconst, **kwargs)
102 # kwargs arrays could have changed, easier to just attach here
103 for key in kwargs:
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
671 klass = handle_data_class_factory(endog, exog)
672 return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
--> 673 **kwargs)
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
85 self.const_idx = None
86 self.k_constant = 0
---> 87 self._handle_constant(hasconst)
88 self._check_integrity()
89 self._cache = {}
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in _handle_constant(self, hasconst)
131 exog_max = np.max(self.exog, axis=0)
132 if not np.isfinite(exog_max).all():
--> 133 raise MissingDataError('exog contains inf or nans')
134 exog_min = np.min(self.exog, axis=0)
135 const_idx = np.where(exog_max == exog_min)[0].squeeze()
MissingDataError: exog contains inf or nans
如果您查看 table:
,我不太确定您是如何得出没有 na 值的结论的
x_multi_cons.isna().sum()
[...]
n_hos_beds 8
[...]
这意味着 n_hos_beds
有 8 个缺失值。如果它不伤害你的模型,只需删除开头的 nans:
df = df.dropna()
我正在尝试用 statsmodels.api
拟合多元线性回归模型。我收到一个错误 MissingDataError: exog contains inf or nans
。我检查了 nans 和 inf 并找到 none。这怎么可能?为什么我会收到此错误消息?
代码
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
df = pd.read_csv('clean_df.csv')
x_multi = df.drop('price', axis=1) #feature variables.
x_multi_cons = sm.add_constant(x_multi) #add row of constants.
我检查了所有 exog 变量的 na 值,发现 none。
x_multi_cons.isna().sum()
const 0
crime_rate 0
resid_area 0
air_qual 0
room_num 0
age 0
teachers 0
poor_prop 0
n_hos_beds 8
n_hot_rooms 0
rainfall 0
parks 0
avg_dist 0
airport_YES 0
waterbody_Lake 0
waterbody_Lake and River 0
waterbody_River 0
dtype: int64
我还检查了 exog 变量的 inf 值,发现 none。
np.isinf(x_multi_cons).sum()
const 0
crime_rate 0
resid_area 0
air_qual 0
room_num 0
age 0
teachers 0
poor_prop 0
n_hos_beds 0
n_hot_rooms 0
rainfall 0
parks 0
avg_dist 0
airport_YES 0
waterbody_Lake 0
waterbody_Lake and River 0
waterbody_River 0
dtype: int64
这里我正在拟合模型
y_multi = df['price'] # Dependent variable.
lm_multi = sm.OLS(y_multi, x_multi_cons).fit()
但我仍然收到错误:“MissingDataError:exog 包含 inf 或 nans”。这怎么可能?
ERROR:
MissingDataError Traceback (most recent call last)
<ipython-input-67-ca6d2e9ba2c0> in <module>
----> 1 lm_multi = sm.OLS(y_multi, x_multi_cons).fit()
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
871 **kwargs):
872 super(OLS, self).__init__(endog, exog, missing=missing,
--> 873 hasconst=hasconst, **kwargs)
874 if "weights" in self._init_keys:
875 self._init_keys.remove("weights")
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, weights, missing, hasconst, **kwargs)
702 weights = weights.squeeze()
703 super(WLS, self).__init__(endog, exog, missing=missing,
--> 704 weights=weights, hasconst=hasconst, **kwargs)
705 nobs = self.exog.shape[0]
706 weights = self.weights
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/regression/linear_model.py in __init__(self, endog, exog, **kwargs)
188 """
189 def __init__(self, endog, exog, **kwargs):
--> 190 super(RegressionModel, self).__init__(endog, exog, **kwargs)
191 self._data_attr.extend(['pinv_wexog', 'weights'])
192
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
235
236 def __init__(self, endog, exog=None, **kwargs):
--> 237 super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
238 self.initialize()
239
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in __init__(self, endog, exog, **kwargs)
76 hasconst = kwargs.pop('hasconst', None)
77 self.data = self._handle_data(endog, exog, missing, hasconst,
---> 78 **kwargs)
79 self.k_constant = self.data.k_constant
80 self.exog = self.data.exog
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/model.py in _handle_data(self, endog, exog, missing, hasconst, **kwargs)
99
100 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
--> 101 data = handle_data(endog, exog, missing, hasconst, **kwargs)
102 # kwargs arrays could have changed, easier to just attach here
103 for key in kwargs:
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in handle_data(endog, exog, missing, hasconst, **kwargs)
671 klass = handle_data_class_factory(endog, exog)
672 return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
--> 673 **kwargs)
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in __init__(self, endog, exog, missing, hasconst, **kwargs)
85 self.const_idx = None
86 self.k_constant = 0
---> 87 self._handle_constant(hasconst)
88 self._check_integrity()
89 self._cache = {}
~/anaconda3/envs/python3/lib/python3.6/site-packages/statsmodels/base/data.py in _handle_constant(self, hasconst)
131 exog_max = np.max(self.exog, axis=0)
132 if not np.isfinite(exog_max).all():
--> 133 raise MissingDataError('exog contains inf or nans')
134 exog_min = np.min(self.exog, axis=0)
135 const_idx = np.where(exog_max == exog_min)[0].squeeze()
MissingDataError: exog contains inf or nans
如果您查看 table:
,我不太确定您是如何得出没有 na 值的结论的x_multi_cons.isna().sum()
[...]
n_hos_beds 8
[...]
这意味着 n_hos_beds
有 8 个缺失值。如果它不伤害你的模型,只需删除开头的 nans:
df = df.dropna()