复杂散景图中 CheckboxButtonGroup 和 Legend 之间的交互

Question

我有一个复杂的多变量数据集，其结构与此类似：

import pandas as pd
import numpy as np
import datetime as dt
from itertools import cycle, islice

N = 24
start_date = dt.date(2016,1,1)
nbdays = int(365 / N)

df = pd.DataFrame({'Date': [start_date + dt.timedelta(days=i*nbdays) for i in range(1,N+1)], 
                   'Rating':    [(100/N)*i for i in range(1,N+1)], 
                   'Plot':      list(islice(cycle(range(1, 9)), 0, N)), 
                   'Treatment': list(islice(cycle(range(1, 7)), 0, N)), 
                   'Trial':     list(islice(cycle(range(1, 4)), 0, N)), 
                   'Name':      list(islice(cycle("ABCDEF"), 0, N)), 
                   'Target':    list(islice(cycle("JKLMNOP"), 0, N)), 
                   'Part':      list(islice(cycle("WXYZ"), 0, N)) 
                   })

我要：

绘制 Date 与 Rating 的对比图，颜色为 Treatment
具有交互式图例，以便单击治疗切换治疗的可见性
在图的一侧有其他参数的按钮（Plot、Trial、Name、Target、Part）所以单击按钮会切换相应点的可见性
将鼠标悬停在某个点上时显示所有参数

这是我的代码（变量 df 中来自上面的数据集）：

from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.palettes import Set1 
from bokeh.models import (CDSView, BooleanFilter, Legend,
                          DatetimeTickFormatter, Range1d,
                          HoverTool)
from bokeh.models.widgets import CheckboxButtonGroup, Div
from bokeh.layouts import widgetbox, layout
from bokeh.io import curdoc

columns = ['Treatment', 'Plot', 'Trial', 'Name', 'Target', 'Part']
categories = [sorted(df[column].unique()) for column in columns]
all_columns = ['Date', 'Rating'] + columns

treatment_colormap = dict(zip(categories[0], Set1[6])) 

# Create Input controls
divs = [Div(text=column+':') for column in columns[1:]]
controls   = [CheckboxButtonGroup(labels=list(map(str, category)), active=list(range(len(category)))) for category in categories[1:]]

# Create Column Data Source that will be used by the plot
source = ColumnDataSource(data=dict((column, []) for column in all_columns))


def select():
    actives = [control.active for control in controls]
    actives_names = [[category[a] for a in active] for (active, category) in zip(actives, categories[1:])]

    presence = [df[column].isin(active_names) for (column, active_names) in zip(columns[1:], actives_names)]
    result = df[np.logical_and.reduce(presence)] # 
    return result

def update():
    sdf = select()
    source.data = dict((column, sdf[column]) for column in all_columns)

    glyphs = []
    selected_treatments = sorted(sdf['Treatment'].unique())
    for treatment in selected_treatments:
        booleans = [value == treatment for value in source.data['Treatment']]
        view = CDSView(source=source, filters=[BooleanFilter(booleans)])
        color = treatment_colormap[treatment]
        glyphs.append(p.circle(x='Date', y='Rating', source=source, view=view, line_color=color, fill_color=color))

    legend = Legend(items=[
        ("treatment {}".format(treatment), [glyph]) for treatment, glyph
        in zip(selected_treatments, glyphs)
        ])

    p.add_layout(legend, 'below')
    p.legend.click_policy='hide'
    p.legend.location = 'bottom_center'
    p.legend.orientation = 'horizontal'

for control in controls:
    control.on_change('active', lambda attr, old, new: update())

def datetime_in_miliseconds(date):
    date = dt.datetime.strptime(date, '%d/%m/%Y')
    epoch = dt.datetime.utcfromtimestamp(0)
    return (date - epoch).total_seconds() * 1000.0

hover = HoverTool(tooltips=[('Date', '@Date{%d/%m/%Y}')] + [(column, '@'+column) 
                            for column in all_columns[1:]], formatters={
                             'Date': 'datetime', # use 'datetime' formatter for 'Date' field
                             })


p = figure(x_axis_type="datetime", tools=[hover])
p.title.text = 'Date vs Rating'
p.xaxis.axis_label = 'Date'
p.xaxis.formatter = DatetimeTickFormatter(days = ['%d/%m/%y'])
start = datetime_in_miliseconds('01/01/2016')
end = datetime_in_miliseconds('31/12/2016')
p.x_range=Range1d(start, end)
p.yaxis.axis_label = 'Rating'
p.ygrid.band_fill_color="olive"
p.ygrid.band_fill_alpha = 0.1
p.y_range=Range1d(0,100)

sizing_mode = 'scale_width'
inputs = widgetbox(*sum(zip(divs, controls), tuple()), sizing_mode=sizing_mode)

l = layout([[p, inputs]], sizing_mode=sizing_mode)
update()  # initial load of the data
curdoc().add_root(l)

当你运行 bokeh serve --show main.py（bokeh 版本 0.12.10）时看起来像这样：

什么有效：

点击图例切换治疗的可见性

什么不起作用：

悬停标签中显示的信息不正确（前 6 个点在其悬停标签中具有相同的信息，接下来的六个点也具有相同的悬停标签，依此类推）。
点击右边的按钮会弄乱图表：轴标签消失，第二个图例显示在图表上方而不是下方）

我该如何解决最后两点？

Answer 1

这里有一些想法：

用CustomJSFilter包裹一个javascript函数来做数据筛选。
只调用一次 p.circle() 就可以画出所有的圆。
使用 factor_cmap 将处理列映射到颜色。
使用tags 属性将数据保存在 Python 中并在 javascript 中读取。

因为只有一个 GlyphRenderer，可见性切换不适用于它的图例。

要解决此问题，请创建一个虚拟 ColumnDataSource 并使用它多次调用 p.circle() 以创建一个虚拟 GlyphRenderer 列表。为这些虚拟 GlyphRenderer 创建图例，并且 link 它们的可见属性更改为调用 source.change.emit() 的 CustomJS 重新绘制图形。

因为所有过滤器计算都由 javascript 执行，您可以创建一个可以与用户输入交互的静态 html 文件。

这是笔记本：

http://nbviewer.jupyter.org/gist/ruoyu0088/01ddf28ed041508304843f49a794d66a

from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, CustomJS, CDSView, CustomJSFilter, HoverTool
from bokeh.models.widgets import CheckboxButtonGroup
from bokeh.io import show, output_notebook
from bokeh.palettes import Set1 
from bokeh.transform import factor_cmap
from bokeh.layouts import widgetbox, layout

import pandas as pd
import numpy as np
import datetime as dt
from itertools import cycle, islice

output_notebook()

N = 24
start_date = dt.date(2016,1,1)
nbdays = int(365 / N)

df = pd.DataFrame({'Date': [start_date + dt.timedelta(days=i*nbdays) for i in range(1,N+1)], 
                   'Rating':    [(100/N)*i for i in range(1,N+1)], 
                   'Plot':      list(islice(cycle(range(1, 9)), 0, N)), 
                   'Treatment': list(islice(cycle(range(1, 7)), 0, N)), 
                   'Trial':     list(islice(cycle(range(1, 4)), 0, N)), 
                   'Name':      list(islice(cycle("ABCDEF"), 0, N)), 
                   'Target':    list(islice(cycle("JKLMNOP"), 0, N)), 
                   'Part':      list(islice(cycle("WXYZ"), 0, N)) 
                   })

columns = 'Plot', 'Trial', 'Name', 'Target', 'Part'
unique_items = [df[col].unique() for col in columns]

df["Treatment"] = df["Treatment"].astype(str)

source = ColumnDataSource(data=df)
dummy_source = ColumnDataSource(data={"x":[], "y":[]})

hover = HoverTool(tooltips=[('Date', '@Date{%d/%m/%Y}')] + [(column, '@'+column) 
                            for column in columns], formatters={
                             'Date': 'datetime', # use 'datetime' formatter for 'Date' field
                             })

p = figure(x_axis_type="datetime", tools=[hover])
color = factor_cmap("Treatment", Set1[9], df.Treatment.unique())

for i, label in enumerate(df.Treatment.unique()):
    dummy_circle = p.circle(x="x", y="y", source=dummy_source, legend="Treatment {}".format(label), color=Set1[9][i])
    dummy_circle.tags = [label]

p.legend.location = "bottom_right"
p.legend.click_policy = "hide"

def source_change(source=source):
    source.change.emit()

callback_source_change = CustomJS.from_py_func(source_change)

for item in p.legend[0].items:
    item.renderers[0].js_on_change("visible", callback_source_change)

controls = [CheckboxButtonGroup(labels=items.astype(str).tolist(), active=list(range(len(items)))) for items in unique_items]
widgets = widgetbox(*controls)

for name, control in zip(columns, controls):
    control.tags = [name]

def func_filter(source=source, legend=p.legend[0], widgets=widgets):
    window.widgets = widgets
    visible_treatments = [item.renderers[0].tags for item in legend.items if item.renderers[0].visible]
    date = source.data['Date']
    treatments = source.data['Treatment']
    res = []

    selectors = {}
    for widget in widgets.children:
        col = widget.tags[0]
        selectors[col] = dict([(widget.labels[i], i) for i in widget.active])

    for i in range(len(date)):
        flag = treatments[i] in visible_treatments
        for key, val in selectors.items():
            if source.data[key][i] not in val:
                flag = False
                break
        res.append(flag)
    return res

view = CDSView(source=source, filters=[CustomJSFilter.from_py_func(func_filter)])
p.circle(x='Date', y='Rating', source=source, view=view, line_color=color, fill_color=color)  

for control in controls:
    control.js_on_change("active", callback_source_change)

show(layout([[p, widgets]]))

复杂散景图中 CheckboxButtonGroup 和 Legend 之间的交互

Interaction between CheckboxButtonGroup and Legend in complex bokeh plot

python

pandas

bokeh