PyAudio 回调函数只调用一次

Question

我正在尝试使用 PyAudio 简单地使用此处提供的非阻塞 IO 代码重现一个 wav 文件：PyAudio documentation.

与文档不同，我尝试使用 numpy 输入数据而不是字节，所以我使用 librosa 加载我的 .wav 文件而不是 wave，如文档。

我的代码如下，它是独立的且可重现的，你只需要将 filename 更改为你想要重现的 wave 音频之一：

import pyaudio
import wave
import time
import numpy as np
import scipy.io.wavfile as sw
import librosa
import sys
from scipy.io.wavfile import write


############ Global variables ###################
filename = '../wav/The_Weeknd.wav' #Test file
#Conversion from np to pyAudio types
np_to_pa_format = {
    np.dtype('float32') : pyaudio.paFloat32,
    np.dtype('int32') : pyaudio.paInt32,
    np.dtype('int16') : pyaudio.paInt16,
    np.dtype('int8') : pyaudio.paInt8,
    np.dtype('uint8') : pyaudio.paUInt8
}
np_type_to_sample_width = {
    np.dtype('float32') : 4,
    np.dtype('int32') : 4,
    np.dtype('int16') : 3,
    np.dtype('int8') : 1,
    np.dtype('uint8') : 1
}
STEREO = 2 #channels
#################################################

# Simple class which reads an input test wav file and reproduce it in a real time fashion. Used to test real time functioning.
class Player:
    # Loading the input test file. Crop to 30 seconds length
    def __init__(self):
        self.input_array, self.sample_rate = librosa.load(filename, sr=44100, dtype=np.float32, offset = 30, duration=30)

        print(self.sample_rate)
        print(self.input_array.shape)
        self.cycle_count = 0


    def pyaudio_callback(self,in_data, frame_count, time_info, status):
        audio_size = np.shape(self.input_array)[0]
        #print(audio_size)
        print(frame_count)
        if frame_count*self.cycle_count > audio_size:
            # Processing is complete.
            print('processing complete')
            return (None, pyaudio.paComplete)
        elif frame_count*(self.cycle_count+1) > audio_size:
            # Last frame to process.
            print('1 left frame')
            frames_left = audio_size - frame_count*self.cycle_count
        else:
            # Every other frame.
            print('everyotherframe')
            frames_left = frame_count

        data = self.input_array[frame_count*self.cycle_count:frame_count*self.cycle_count+frames_left]
        write('test.wav', 44100, data) #Saves correctly the file!

        print(data.shape)
        out_data = data.tobytes()
        print('printing length: ',len(out_data))
        self.cycle_count+=1
        print(self.cycle_count)
        print(pyaudio.paContinue)
        return (out_data, pyaudio.paContinue)





    def start_non_blocking_processing(self, save_output=True, frame_count=2**20, listen_output=True):
        '''
        Non blocking mode works on a different thread, therefore, the main thread must be kept active with, for example:
            while processing():
                time.sleep(1)
        '''
        self.save_output = save_output
        self.frame_count = frame_count

        # Initiate PyAudio
        self.pa = pyaudio.PyAudio()
        # Open stream using callback
        self.stream = self.pa.open(format=np_to_pa_format[self.input_array.dtype],
                        channels=STEREO,
                        rate=self.sample_rate,
                        output=listen_output,
                        input=not listen_output,
                        stream_callback=self.pyaudio_callback,
                        frames_per_buffer=frame_count)

        # Start the stream
        self.stream.start_stream()


    def processing(self):
        '''
        Returns true if the PyAudio stream is still active in non blocking mode.
        MUST be called AFTER self.start_non_blocking_processing.
        '''
        return self.stream.is_active()

    def terminate_processing(self):
        '''
        Terminates stream opened by self.start_non_blocking_processing.
        MUST be called AFTER self.processing returns False.
        '''
        # Stop stream.
        self.stream.stop_stream()
        self.stream.close()

        # Close PyAudio.
        self.pa.terminate()

        # Resets count.
        self.cycle_count = 0
        # Resets output.
        self.output_array = np.array([[], []], dtype=self.input_array.dtype).T



if __name__ == "__main__":
    print('RUNNING MAIN')
    player = Player()
    player.start_non_blocking_processing()
    while(player.processing()):
        time.sleep(0.1)
    player.terminate_processing()

基本上我遵循了文档教程，但我以更面向对象的方式重新编写了代码（因为我需要它来完成更大的项目）。

我能够重现音频，但我注意到：

它的音调比应有的高
它只再现一帧，变量pyaudio.paContinue总是0，因此我的代码只执行一个“window”音频。

我一直在四处寻找解决方案，但只有类似问题的答案（此处：callback called only once），我一直无法解决我的问题。

问题回顾：我的回调函数只被调用一次（因为 pyaudio.paContinue 总是 0），我不知道如何解决这个问题.

注意：代码的灵感来自https://github.com/grupo-1-ASSD-E2/ASSD-TP4

编辑：我添加了一个测试写入来检查包含音频的 numpy 数组（代码中的 data 变量）是否正确，确实如此。 write 函数正确生成具有预期音频的 .wav 文件。

编辑 2：pyaudio.paContinue 的值为 0 似乎很正常，这是“继续处理”的预期行为，如此处所述：pyAudio Documentation.所以我真的不知道为什么我的音频在回调函数迭代 1 次后停止

Answer 1

我解决了这个问题。当我使用 MONO 文件时，我在 pa.open 函数内声明 channels 为 STEREO。 librosa.read 自动将输入的 wav 转换为单声道，即使它们是立体声文件。所以基本上我的 stream 对象期待 2 个通道（交错）但它只得到 1.

完整的工作代码如下：

#https://realpython.com/playing-and-recording-sound-python/#pyaudio
import pyaudio
import wave
import time
import numpy as np
import scipy.io.wavfile as sw
import librosa
import sys
from scipy.io.wavfile import write


############ Global variables ###################
filename = '../wav/The_Weeknd.wav' #Test file
chunk = 512 #frame size
#Conversion from np to pyAudio types
np_to_pa_format = {
    np.dtype('float32') : pyaudio.paFloat32,
    np.dtype('int32') : pyaudio.paInt32,
    np.dtype('int16') : pyaudio.paInt16,
    np.dtype('int8') : pyaudio.paInt8,
    np.dtype('uint8') : pyaudio.paUInt8
}
np_type_to_sample_width = {
    np.dtype('float32') : 4,
    np.dtype('int32') : 4,
    np.dtype('int16') : 3,
    np.dtype('int8') : 1,
    np.dtype('uint8') : 1
}
STEREO = 2 #channels
#################################################

# Simple class which reads an input test wav file and reproduce it in a real time fashion. Used to test real time functioning.
class Player:
    # Loading the input test file. Crop to 30 seconds length
    def __init__(self):
        self.input_array, self.sample_rate = librosa.load(filename, sr=44100, dtype=np.float32, duration=60)

        #print(self.sample_rate)
        #print(self.input_array.shape)
        self.cycle_count = 0


    def pyaudio_callback(self,in_data, frame_count, time_info, status):
        audio_size = np.shape(self.input_array)[0]
        #print(audio_size)
        print('frame count: ', frame_count)

        if frame_count*self.cycle_count > audio_size:
            # Processing is complete.
            print('processing complete')
            return (None, pyaudio.paComplete)
        elif frame_count*(self.cycle_count+1) > audio_size:
            # Last frame to process.
            print('1 left frame')
            frames_left = audio_size - frame_count*self.cycle_count
        else:
            # Every other frame.
            print('everyotherframe')
            frames_left = frame_count

        data = self.input_array[frame_count*self.cycle_count:frame_count*self.cycle_count+frames_left]
        print('len of data', data.shape)

        #write('test.wav', 44100, data) #Saves correctly the file!
        out_data = data.astype(np.float32).tobytes()
        print('printing length: ',len(out_data))
        #print(out_data)
        self.cycle_count+=1
        print(self.cycle_count)
        print('pyaudio continue value: ',pyaudio.paContinue)
        return (out_data, pyaudio.paContinue)





    def start_non_blocking_processing(self, save_output=True, frame_count=2**10, listen_output=True):
        '''
        Non blocking mode works on a different thread, therefore, the main thread must be kept active with, for example:
            while processing():
                time.sleep(1)
        '''
        self.save_output = save_output
        self.frame_count = frame_count

        # Initiate PyAudio
        self.pa = pyaudio.PyAudio()
        # Open stream using callback
        self.stream = self.pa.open(format=np_to_pa_format[self.input_array.dtype],
                        channels=1,
                        rate=self.sample_rate,
                        output=listen_output,
                        input=not listen_output,
                        stream_callback=self.pyaudio_callback,
                        frames_per_buffer=frame_count)

        # Start the stream
        self.stream.start_stream()


    def processing(self):
        '''
        Returns true if the PyAudio stream is still active in non blocking mode.
        MUST be called AFTER self.start_non_blocking_processing.
        '''
        return self.stream.is_active()

    def terminate_processing(self):
        '''
        Terminates stream opened by self.start_non_blocking_processing.
        MUST be called AFTER self.processing returns False.
        '''
        # Stop stream.
        self.stream.stop_stream()
        self.stream.close()

        # Close PyAudio.
        self.pa.terminate()

        # Resets count.
        self.cycle_count = 0
        # Resets output.
        self.output_array = np.array([[], []], dtype=self.input_array.dtype).T



if __name__ == "__main__":
    print('RUNNING MAIN')
    player = Player()
    player.start_non_blocking_processing()
    while(player.processing()):
        time.sleep(0.1)
    player.terminate_processing()

PyAudio 回调函数只调用一次

PyAudio callback function called only once

python

audio

signal-processing

pyaudio