关于 dtype=datetime64[ns] 和 date 之间 nvalid comparison 的错误信息
regarding the error message of nvalid comparison between dtype=datetime64[ns] and date
我正在尝试 运行 以下两个片段,其中一部分来自此 databricks tutorial。
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
from datetime import date
# get historical actuals & predictions for comparison
actuals_pd = history_pd[ history_pd['ds'] < date(2018, 1, 1) ]['y'] # line 1
predicted_pd = forecast_pd[ forecast_pd['ds'] < date(2018, 1, 1) ]['yhat'] # line 2
然而,我从predicted_pd = forecast_pd[ forecast_pd['ds'] < date(2018, 1, 1) ]['yhat']
得到了TypeError: Invalid comparison between dtype=datetime64[ns] and date
的错误信息。上一行的那个,看起来很像,没有这个错误。我还打印了 predicted_pd
和 actuals_pd
的类型以供参考。
TypeError Traceback (most recent call last)
<ipython-input-15-748394f8994f> in <module>
----> 1 predicted_pd = forecast_pd[ forecast_pd['ds'] < date(2018, 1, 1) ]['yhat']
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\__init__.py in wrapper(self, other)
368 rvalues = extract_array(other, extract_numpy=True)
369
--> 370 res_values = comparison_op(lvalues, rvalues, op)
371
372 return self._construct_result(res_values, name=res_name)
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\array_ops.py in comparison_op(left, right, op)
228 if should_extension_dispatch(lvalues, rvalues):
229 # Call the method on lvalues
--> 230 res_values = op(lvalues, rvalues)
231
232 elif is_scalar(rvalues) and isna(rvalues):
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\arrays\datetimelike.py in wrapper(self, other)
116 other = _validate_comparison_value(self, other)
117 except InvalidComparison:
--> 118 return invalid_comparison(self, other, op)
119
120 dtype = getattr(other, "dtype", None)
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\invalid.py in invalid_comparison(left, right, op)
32 else:
33 typ = type(right).__name__
---> 34 raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}")
35 return res_values
36
TypeError: Invalid comparison between dtype=datetime64[ns] and date
Pandas 日期默认为 datetime64[ns]
。所以您不想将它们与 datetime.date
个对象进行比较。相反,您可以只使用日期字符串,pandas 将正确处理比较。此外,如果您使用 loc
指定行和列,您将获得比示例中更清晰的语法。
datestr = '2018-01-01'
actuals_pd = history_pd.loc[history_pd['ds'] < datestr, 'y'] # line 1
predicted_pd = forecast_pd.loc[forecast_pd['ds'] < datestr, 'yhat'] # line 2
我正在尝试 运行 以下两个片段,其中一部分来自此 databricks tutorial。
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
from datetime import date
# get historical actuals & predictions for comparison
actuals_pd = history_pd[ history_pd['ds'] < date(2018, 1, 1) ]['y'] # line 1
predicted_pd = forecast_pd[ forecast_pd['ds'] < date(2018, 1, 1) ]['yhat'] # line 2
然而,我从predicted_pd = forecast_pd[ forecast_pd['ds'] < date(2018, 1, 1) ]['yhat']
得到了TypeError: Invalid comparison between dtype=datetime64[ns] and date
的错误信息。上一行的那个,看起来很像,没有这个错误。我还打印了 predicted_pd
和 actuals_pd
的类型以供参考。
TypeError Traceback (most recent call last)
<ipython-input-15-748394f8994f> in <module>
----> 1 predicted_pd = forecast_pd[ forecast_pd['ds'] < date(2018, 1, 1) ]['yhat']
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\__init__.py in wrapper(self, other)
368 rvalues = extract_array(other, extract_numpy=True)
369
--> 370 res_values = comparison_op(lvalues, rvalues, op)
371
372 return self._construct_result(res_values, name=res_name)
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\array_ops.py in comparison_op(left, right, op)
228 if should_extension_dispatch(lvalues, rvalues):
229 # Call the method on lvalues
--> 230 res_values = op(lvalues, rvalues)
231
232 elif is_scalar(rvalues) and isna(rvalues):
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
63 other = item_from_zerodim(other)
64
---> 65 return method(self, other)
66
67 return new_method
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\arrays\datetimelike.py in wrapper(self, other)
116 other = _validate_comparison_value(self, other)
117 except InvalidComparison:
--> 118 return invalid_comparison(self, other, op)
119
120 dtype = getattr(other, "dtype", None)
~\Anaconda3\envs\sparkenv\lib\site-packages\pandas\core\ops\invalid.py in invalid_comparison(left, right, op)
32 else:
33 typ = type(right).__name__
---> 34 raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}")
35 return res_values
36
TypeError: Invalid comparison between dtype=datetime64[ns] and date
Pandas 日期默认为 datetime64[ns]
。所以您不想将它们与 datetime.date
个对象进行比较。相反,您可以只使用日期字符串,pandas 将正确处理比较。此外,如果您使用 loc
指定行和列,您将获得比示例中更清晰的语法。
datestr = '2018-01-01'
actuals_pd = history_pd.loc[history_pd['ds'] < datestr, 'y'] # line 1
predicted_pd = forecast_pd.loc[forecast_pd['ds'] < datestr, 'yhat'] # line 2