将 numpy 数组发送到 Bokeh 回调以作为音频播放

Sending numpy array to Bokeh callback to play as audio

我目前正在尝试编写一个脚本来在 Bokeh 中显示(多声道)音频的频谱图。由于我正在对音频做一些处理,我不能轻易将它们保存为计算机上的文件,所以我尽量保持在 Python.

我们的想法是创建一个图表,其中每一列对应一个音频样本,每一行对应一个通道。

现在我希望在点击一个子图时能够听到相应的音频。 我设法完成了显示频谱图的非交互部分,编写了一个回调来播放音频,并将其应用于每个回调。

这是代码的最小工作示例:

import numpy as np
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, CustomJS
from bokeh.palettes import Viridis256
from bokeh.layouts import gridplot


def bokeh_subplots(specs, wavs):
    channels = max([s.shape[0] for s in specs])

    def inner(p, s, w):
        # p is the plot, s is the spectrogram, and w is the numpy array representing the sound
        source = ColumnDataSource(data=dict(raw=w))
        callback = CustomJS(args=dict(source=source),
                            code =
                            """
                            function typedArrayToURL(typedArray, mimeType) {
                                return URL.createObjectURL(new Blob([typedArray.buffer], {type: mimeType}))
                            }

                            const bytes = new Float32Array(source.data['raw'].length);

                            for(let i = 0; i < source.data['raw'].length; i++) {
                                bytes[i] = source.data['raw'][i];
                            }

                            const url = typedArrayToURL(bytes, 'audio/wave');

                            var audio = new Audio(url);
                            audio.src = url;
                            audio.load();
                            audio.play();
                            """ % w)
        # we plot the actual spectrogram here, which works fine
        p.image([s], x=0, y=0, dw=s.shape[1], dh=s.shape[0], palette = Viridis256)
        # then we add the callback to be triggered on a click within the plot
        p.js_on_event('tap', callback)
        return p
    
    # children will contain the plots in the form of a list of list
    children = []
    for s, w in zip(specs, wavs):
        # initialise the plots for each channel of a spectrogram, missing channels will be left blank
        glyphs = [None] * channels
        for i in range(s.shape[0]):
            # apply the function above to create the plots
            glyphs[i] = inner(figure(x_range=(0, s[i].shape[1]), y_range=(0, s[i].shape[0])),
                              s[i], w[i])
        children.append(glyphs)

    # finally, create the grid of plots and display
    grid = gridplot(children=children, plot_width=250, plot_height=250)
    show(grid)

# Generate some random data for illustration
random_specs = [np.random.uniform(size=(4, 80, 800)), np.random.uniform(size=(2, 80, 750))]
random_wavs = [np.random.uniform(low=-1, high=1, size=(4, 96*800)), np.random.uniform(low=-1, high=1, size=(2, 96*750))]

# This creates a plot with 2 rows and 4 columns
bokeh_subplots(specs=random_specs, wavs=random_wavs)

我基本上复制了 this page 来编写回调,但不幸的是它似乎不适合我的用例,因为当我 运行 脚本时,绘图正确生成但音频不玩。 我还尝试在将数组编码为 base64 后创建一个数据 URI,如 here and ,结果相同。 当尝试使用提供本地文件路径的更简单的回调进行相同操作时,它工作正常

callback = CustomJS(code = """var audio = new Audio("path/to/my/file.wav");
                              audio.play();
                           """)

这可行,但对于我的目的来说不够灵活(因为我要么需要为每个频道保存一个单独的文件,要么必须完全放弃选择频道)。

我在 JavaScript 和 Bokeh 方面都非常陌生,所以我对这里的问题有点不知所措。从上面的页面我认为它与我向回调提供数组的方式有关,但我不知道如何修复它。 (就此而言,我不知道按元素填充 'bytes' 数组是否是解决此问题的有效方法,但现在我决定让脚本正常工作。)

有人对这里发生的事情有任何指示吗?

因此,在检查了 JavaScript 中的更多内容后,我最终选择了另一条带有回调的路线,即 here,最终以最小的改动工作。 搜索的力量...

这不一定是最有效的方法,但它确实有效,这对我现在来说已经足够了。

我在这里发布了完整的功能,以防有人遇到它。代码应该按原样工作,我留下了一些评论来解释哪里发生了什么。

import itertools
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, CustomJS
from bokeh.palettes import Viridis256
from bokeh.layouts import gridplot

def bokeh_subplots(specs,           # spectrograms to plot. List of numpy arrays of shape (channels, time, frequency). Heterogenous number of channels (e.g. one with 2, another with 4 channels) are handled by leaving blank spaces where required
                   wavs,            # sounds you want to play, there should be a 1-1 correspondence with specs. List of numpy arrays (tested with float32 values) of shape (channels, samples)
                   sr=48000,        # sampling rate in Hz
                   hideaxes=True,   # If True, the axes will be suppressed
                   ):
    # not really required, but helps with setting the number of rows of the final plot
    channels = max([s.shape[0] for s in specs])

    def inner(p, s, w):
        # this inner function is just for (slight) convenience
        source = ColumnDataSource(data=dict(raw=w))
        callback = CustomJS(args=dict(source=source),
                            code=
                            """
                            var audioCtx = new (window.AudioContext || window.webkitAudioContext)();
                            var myArrayBuffer = audioCtx.createBuffer(1, source.data['raw'].length, %d);

                            for (var channel = 0; channel < myArrayBuffer.numberOfChannels; channel++) {
                                  var nowBuffering = myArrayBuffer.getChannelData(channel);
                                  for (var i = 0; i < myArrayBuffer.length; i++) {
                                        nowBuffering[i] = source.data['raw'][i];
                                    }
                                }

                            var source = audioCtx.createBufferSource();
                            // set the buffer in the AudioBufferSourceNode
                            source.buffer = myArrayBuffer;
                            // connect the AudioBufferSourceNode to the
                            // destination so we can hear the sound
                            source.connect(audioCtx.destination);
                            // start the source playing
                            source.start();
                            """ % sr)
                            # Just need to specify the sampling rate here
        p.image([s], x=0, y=0, dw=s.shape[1], dh=s.shape[0], palette=Viridis256)
        p.js_on_event('tap', callback)
        return p

    children = []
    for s, w in zip(specs, wavs):
        glyphs = [None] * channels
        for i in range(s.shape[0]):
            glyphs[i] = figure(x_range=(0, s[i].shape[1]), y_range=(0, s[i].shape[0]))
            if hideaxes:
                glyphs[i].axis.visible = False
            glyphs[i] = inner(glyphs[i], s[i], w[i])
        children.append(glyphs)

    # we transpose the list so that each column corresponds to one (multichannel) spectrogram and each row corresponds to a channel of it
    children = list(map(list, itertools.zip_longest(*children, fillvalue=None)))
    grid = gridplot(children=children, plot_width=100, plot_height=100)
    show(grid)