hvplot histogram: DataError: None of the available storage backends were able to support the supplied data format
hvplot histogram: DataError: None of the available storage backends were able to support the supplied data format
import pandas as pd
import numpy as np
import random
import copy
import feather
import plotly.graph_objects as go
import plotly.express as px
import panel as pn
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
import cartopy
import cartopy.feature as cf
from geoviews import opts
from cartopy import crs as ccrs
import hvplot.pandas # noqa
import colorcet as cc
from colorcet.plotting import swatch
hv.extension("bokeh","plotly")
我有一个名为 test
的数据框:
Out[5]:
age age_band car_ins_renew_month people_type
0 NaN NaN NaN sign_up_only
1 61.0 55-64 7.0 active_interest
2 NaN NaN NaN sign_up_only
3 55.0 55-64 8.0 previous_customer
4 NaN NaN NaN sign_up_only
... ... ... ... ...
107627 42.0 35-44 6.0 previous_customer
107628 73.0 65+ 7.0 previous_customer
107629 NaN NaN NaN sign_up_only
107630 NaN NaN NaN sign_up_only
107631 NaN NaN NaN sign_up_only
[107632 rows x 4 columns]
In [6]: test.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107632 entries, 0 to 107631
Data columns (total 4 columns):
age 73289 non-null float32
age_band 73289 non-null category
car_ins_renew_month 64290 non-null float32
people_type 107632 non-null category
dtypes: category(2), float32(2)
memory usage: 1.0 MB
对于整个 test
数据框,我可以使用 hvplot 成功生成直方图:
age
(包含 age_band
的悬停数据):
In [7]: test.hvplot.hist(
...: y="age",by=["age_band"],
...: bins=[18,25,35,45,55,65,74],
...: xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
car_ins_renew_month
:
test.hvplot.hist(
...: y="car_ins_renew_month",
...: bins=[1,2,3,4,5,6,7,8,9,10,11,12,13],
...: xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
但是,对于 test
的子集,其中 people_type
等于 previous_customer
:
In [11]: test_prev_cust = test.loc[test["people_type"]=="previous_customer"]
虽然我可以成功生成 car_ins_renew_month
属性的直方图:
In [13]: test_prev_cust.hvplot.hist(
...: y="car_ins_renew_month",
...: bins=[1,2,3,4,5,6,7,8,9,10,11,12,13],
...: xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
当我尝试为 age
属性生成直方图时,出现以下错误:
In [14]: test_prev_cust = hvplot.hist(
...: y="age",by=["age_band"],
...: bins=[18,25,35,45,55,65,74],
...: xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
---------------------------------------------------------------------------
DataError Traceback (most recent call last)
<ipython-input-100-b2108cee586d> in <module>
7 color="teal",legend=False,
8 line_width=4,line_color="w",
----> 9 width=650,height=280
10 )
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in hist(self, y, by, **kwds)
399 The HoloViews representation of the plot.
400 """
--> 401 return self(kind='hist', x=None, y=y, by=by, **kwds)
402
403 def kde(self, y=None, by=None, **kwds):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in __call__(self, x, y, kind, **kwds)
70 return pn.panel(plot, **panel_dict)
71
---> 72 return self._get_converter(x, y, kind, **kwds)(kind, x, y)
73
74 def _get_converter(self, x=None, y=None, kind=None, **kwds):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in __call__(self, kind, x, y)
942 obj = DynamicMap(cbcallable, streams=[self.stream])
943 else:
--> 944 obj = method(x, y)
945
946 if self.crs and self.project:
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in hist(self, x, y, data)
1383 if self.by:
1384 hist = hists = histogram(
-> 1385 ds.groupby(self.by), dimension=y, **hist_opts
1386 )
1387 hist = hists.last
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/param/parameterized.py in __new__(class_, *args, **params)
2810 inst = class_.instance()
2811 inst.param._set_name(class_.__name__)
-> 2812 return inst.__call__(*args,**params)
2813
2814 def __call__(self,*args,**kw):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
162 elif 'streams' not in kwargs:
163 kwargs['streams'] = self.p.streams
--> 164 return element.apply(self, **kwargs)
165
166
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
113 for k, v in self._obj.data.items():
114 new_val = v.apply(function, dynamic=dynamic, streams=streams,
--> 115 link_inputs=link_inputs, **kwargs)
116 if new_val is not None:
117 mapped.append((k, new_val))
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
108 if hasattr(function, 'dynamic'):
109 inner_kwargs['dynamic'] = False
--> 110 return function(self._obj, **inner_kwargs)
111 elif self._obj._deep_indexable:
112 mapped = []
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
159 for k, el in element.items()])
160 elif isinstance(element, ViewableElement):
--> 161 return self._apply(element)
162 elif 'streams' not in kwargs:
163 kwargs['streams'] = self.p.streams
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in _apply(self, element, key)
119 for hook in self._preprocess_hooks:
120 kwargs.update(hook(self, element))
--> 121 ret = self._process(element, key)
122 for hook in self._postprocess_hooks:
123 ret = hook(self, ret, **kwargs)
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/operation/element.py in _process(self, element, key)
657 hist *= edges[1]-edges[0]
658 return Histogram((edges, hist), kdims=[element.get_dimension(selected_dim)],
--> 659 label=element.label, **params)
660
661
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/element/chart.py in __init__(self, data, edges, **params)
196 elif isinstance(data, tuple) and len(data) == 2 and len(data[0])+1 == len(data[1]):
197 data = data[::-1]
--> 198 super(Histogram, self).__init__(data, **params)
199
200
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/__init__.py in __init__(self, data, kdims, vdims, **kwargs)
209 validate_vdims = kwargs.pop('_validate_vdims', True)
210 initialized = Interface.initialize(type(self), data, kdims, vdims,
--> 211 datatype=kwargs.get('datatype'))
212 (data, self.interface, dims, extra_kws) = initialized
213 super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws)))
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/interface.py in initialize(cls, eltype, data, kdims, vdims, datatype)
252 % (intfc.__name__, e))
253 error = ' '.join([error, priority_error])
--> 254 raise DataError(error)
255
256 return data, interface, dims, extra_kws
DataError: None of the available storage backends were able to support the supplied data format.
我知道我可以使用 hvplot 为 car_ins_renew_month
和 age
属性的 test
数据帧的子集成功生成直方图,因为我能够为 people_type
等于 active_interest
。我不能因为 people_type
等于 previous_customer
.
我注意到 test_prev_cust
数据框的一件事是 age_band
的两个类别中没有人:
In [18]: test_prev_cust["age_band"].value_counts()
Out[18]:
45-54 13457
55-64 10369
35-44 8760
65+ 7801
25-34 0
18-24 0
Name: age_band, dtype: int64
这可能是我的问题的原因吗?如果是这样,那么有没有办法解决这个问题,并且仍然将 age_band
作为悬停数据包含在我的绘图中?
谢谢
软件版本:
bokeh 1.4.0 py37_0
cartopy 0.17.0 py37haea56ea_1
colorcet 2.0.2 py_0 pyviz
feather-format 0.4.0 py_1003 conda-forge
geoviews 1.6.5 py_0 pyviz
holoviews 1.12.6 py_0 pyviz
hvplot 0.5.2 py_0 pyviz
jupyter 1.0.0 py37_7
matplotlib 3.1.1 py37h54f8f79_0
notebook 6.0.2 py37_0
numpy 1.17.3 py37h4174a10_0
pandas 0.25.3 py37h0a44026_0
panel 0.7.0 py_0 pyviz
plotly 4.3.0 py_0 plotly
plotly_express 0.4.1 py_0 plotly
python 3.7.5 h359304d_0
seaborn 0.9.0 pyh91ea838_1
我在 os x Catalina 上,使用最新版本的 Firefox,我正在使用 Jupyter notebook。
问题是由于您的变量 age_band 是分类变量,某些类别的计数为 0,并与关键字 by=['age_band]
.
一起使用
您可以尝试将 age_band 转换为字符串,但在这种情况下,我认为创建条形图更好:
age_band_group = df.groupby(['age_band']
).agg(count=('age', np.size)
).fillna(0)
age_band_group.hvplot.bar(color='teal')
import pandas as pd
import numpy as np
import random
import copy
import feather
import plotly.graph_objects as go
import plotly.express as px
import panel as pn
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
import cartopy
import cartopy.feature as cf
from geoviews import opts
from cartopy import crs as ccrs
import hvplot.pandas # noqa
import colorcet as cc
from colorcet.plotting import swatch
hv.extension("bokeh","plotly")
我有一个名为 test
的数据框:
Out[5]:
age age_band car_ins_renew_month people_type
0 NaN NaN NaN sign_up_only
1 61.0 55-64 7.0 active_interest
2 NaN NaN NaN sign_up_only
3 55.0 55-64 8.0 previous_customer
4 NaN NaN NaN sign_up_only
... ... ... ... ...
107627 42.0 35-44 6.0 previous_customer
107628 73.0 65+ 7.0 previous_customer
107629 NaN NaN NaN sign_up_only
107630 NaN NaN NaN sign_up_only
107631 NaN NaN NaN sign_up_only
[107632 rows x 4 columns]
In [6]: test.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107632 entries, 0 to 107631
Data columns (total 4 columns):
age 73289 non-null float32
age_band 73289 non-null category
car_ins_renew_month 64290 non-null float32
people_type 107632 non-null category
dtypes: category(2), float32(2)
memory usage: 1.0 MB
对于整个 test
数据框,我可以使用 hvplot 成功生成直方图:
age
(包含 age_band
的悬停数据):
In [7]: test.hvplot.hist(
...: y="age",by=["age_band"],
...: bins=[18,25,35,45,55,65,74],
...: xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
car_ins_renew_month
:
test.hvplot.hist(
...: y="car_ins_renew_month",
...: bins=[1,2,3,4,5,6,7,8,9,10,11,12,13],
...: xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
但是,对于 test
的子集,其中 people_type
等于 previous_customer
:
In [11]: test_prev_cust = test.loc[test["people_type"]=="previous_customer"]
虽然我可以成功生成 car_ins_renew_month
属性的直方图:
In [13]: test_prev_cust.hvplot.hist(
...: y="car_ins_renew_month",
...: bins=[1,2,3,4,5,6,7,8,9,10,11,12,13],
...: xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
当我尝试为 age
属性生成直方图时,出现以下错误:
In [14]: test_prev_cust = hvplot.hist(
...: y="age",by=["age_band"],
...: bins=[18,25,35,45,55,65,74],
...: xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
---------------------------------------------------------------------------
DataError Traceback (most recent call last)
<ipython-input-100-b2108cee586d> in <module>
7 color="teal",legend=False,
8 line_width=4,line_color="w",
----> 9 width=650,height=280
10 )
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in hist(self, y, by, **kwds)
399 The HoloViews representation of the plot.
400 """
--> 401 return self(kind='hist', x=None, y=y, by=by, **kwds)
402
403 def kde(self, y=None, by=None, **kwds):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in __call__(self, x, y, kind, **kwds)
70 return pn.panel(plot, **panel_dict)
71
---> 72 return self._get_converter(x, y, kind, **kwds)(kind, x, y)
73
74 def _get_converter(self, x=None, y=None, kind=None, **kwds):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in __call__(self, kind, x, y)
942 obj = DynamicMap(cbcallable, streams=[self.stream])
943 else:
--> 944 obj = method(x, y)
945
946 if self.crs and self.project:
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in hist(self, x, y, data)
1383 if self.by:
1384 hist = hists = histogram(
-> 1385 ds.groupby(self.by), dimension=y, **hist_opts
1386 )
1387 hist = hists.last
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/param/parameterized.py in __new__(class_, *args, **params)
2810 inst = class_.instance()
2811 inst.param._set_name(class_.__name__)
-> 2812 return inst.__call__(*args,**params)
2813
2814 def __call__(self,*args,**kw):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
162 elif 'streams' not in kwargs:
163 kwargs['streams'] = self.p.streams
--> 164 return element.apply(self, **kwargs)
165
166
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
113 for k, v in self._obj.data.items():
114 new_val = v.apply(function, dynamic=dynamic, streams=streams,
--> 115 link_inputs=link_inputs, **kwargs)
116 if new_val is not None:
117 mapped.append((k, new_val))
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
108 if hasattr(function, 'dynamic'):
109 inner_kwargs['dynamic'] = False
--> 110 return function(self._obj, **inner_kwargs)
111 elif self._obj._deep_indexable:
112 mapped = []
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
159 for k, el in element.items()])
160 elif isinstance(element, ViewableElement):
--> 161 return self._apply(element)
162 elif 'streams' not in kwargs:
163 kwargs['streams'] = self.p.streams
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in _apply(self, element, key)
119 for hook in self._preprocess_hooks:
120 kwargs.update(hook(self, element))
--> 121 ret = self._process(element, key)
122 for hook in self._postprocess_hooks:
123 ret = hook(self, ret, **kwargs)
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/operation/element.py in _process(self, element, key)
657 hist *= edges[1]-edges[0]
658 return Histogram((edges, hist), kdims=[element.get_dimension(selected_dim)],
--> 659 label=element.label, **params)
660
661
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/element/chart.py in __init__(self, data, edges, **params)
196 elif isinstance(data, tuple) and len(data) == 2 and len(data[0])+1 == len(data[1]):
197 data = data[::-1]
--> 198 super(Histogram, self).__init__(data, **params)
199
200
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/__init__.py in __init__(self, data, kdims, vdims, **kwargs)
209 validate_vdims = kwargs.pop('_validate_vdims', True)
210 initialized = Interface.initialize(type(self), data, kdims, vdims,
--> 211 datatype=kwargs.get('datatype'))
212 (data, self.interface, dims, extra_kws) = initialized
213 super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws)))
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/interface.py in initialize(cls, eltype, data, kdims, vdims, datatype)
252 % (intfc.__name__, e))
253 error = ' '.join([error, priority_error])
--> 254 raise DataError(error)
255
256 return data, interface, dims, extra_kws
DataError: None of the available storage backends were able to support the supplied data format.
我知道我可以使用 hvplot 为 car_ins_renew_month
和 age
属性的 test
数据帧的子集成功生成直方图,因为我能够为 people_type
等于 active_interest
。我不能因为 people_type
等于 previous_customer
.
我注意到 test_prev_cust
数据框的一件事是 age_band
的两个类别中没有人:
In [18]: test_prev_cust["age_band"].value_counts()
Out[18]:
45-54 13457
55-64 10369
35-44 8760
65+ 7801
25-34 0
18-24 0
Name: age_band, dtype: int64
这可能是我的问题的原因吗?如果是这样,那么有没有办法解决这个问题,并且仍然将 age_band
作为悬停数据包含在我的绘图中?
谢谢
软件版本:
bokeh 1.4.0 py37_0
cartopy 0.17.0 py37haea56ea_1
colorcet 2.0.2 py_0 pyviz
feather-format 0.4.0 py_1003 conda-forge
geoviews 1.6.5 py_0 pyviz
holoviews 1.12.6 py_0 pyviz
hvplot 0.5.2 py_0 pyviz
jupyter 1.0.0 py37_7
matplotlib 3.1.1 py37h54f8f79_0
notebook 6.0.2 py37_0
numpy 1.17.3 py37h4174a10_0
pandas 0.25.3 py37h0a44026_0
panel 0.7.0 py_0 pyviz
plotly 4.3.0 py_0 plotly
plotly_express 0.4.1 py_0 plotly
python 3.7.5 h359304d_0
seaborn 0.9.0 pyh91ea838_1
我在 os x Catalina 上,使用最新版本的 Firefox,我正在使用 Jupyter notebook。
问题是由于您的变量 age_band 是分类变量,某些类别的计数为 0,并与关键字 by=['age_band]
.
您可以尝试将 age_band 转换为字符串,但在这种情况下,我认为创建条形图更好:
age_band_group = df.groupby(['age_band']
).agg(count=('age', np.size)
).fillna(0)
age_band_group.hvplot.bar(color='teal')