如何将 prism-media(@discordjs/opus) opus 流转换为适合 picovoice/porcupine 的格式?
How to convert from prism-media(@discordjs/opus) opus stream to format suitable for picovoice/porcupine?
我使用 discord.js and am attempting to implement basic voice capabilities using porcupine 创建了一个 discord 机器人。
我有每个用户的音频流,我正尝试在 porcupine 中对每个数据块使用 process(frame) 方法。
为了获得单通道数据和 16k 采样率,我使用 prism-media opus 解码器手动解码流,然后尝试传入块:
execute(connection, user, args) {
userHandlers[user] = new Porcupine([GRASSHOPPER, BUMBLEBEE], [0.5, 0.65]);
if (!receiver) {
receiver = connection.receiver;
}
userStreams[user] = receiver.createStream(user, {mode: 'opus', end: 'manual'});
const decoder = new prism.opus.Decoder({frameSize: 640, channels: 1, rate: 16000});
userStreams[user]
.pipe(decoder);
listeningToUsers[user] = true;
try {
console.log("Start utterance");
decoder.on('data', (chunk) => {//Need to make stream single channel, frame size 512
let keywordIndex = userHandlers[user].process(chunk);
if (keywordIndex != -1) {
meme.execute(connection, null, args);
}
});
} catch (error) {
console.error(error);
}
},
但是,我的问题是块的大小为 640,而该方法需要为 512 才能工作。由于答案 here.
中解释的原因,更改传递给解码器的 frameSize 不起作用
如果有人知道将我的数据转换为正确块大小的最佳方法,或者只是更好的方法,我将不胜感激。
我最终通过使用此 demo file 中包含的一些代码来完成这项工作。
我们包含一个 chunkArray 函数:
function chunkArray(array, size) {
return Array.from({ length: Math.ceil(array.length / size) }, (v, index) =>
array.slice(index * size, index * size + size)
);
}
并将之前 post 编辑的代码更改为如下所示:
execute(connection, user, args) {
userHandlers[user] = new Porcupine([GRASSHOPPER, BLUEBERRY], [0.7, 0.85]);
const frameLength = userHandlers[user].frameLength;
if (!receiver) {
receiver = connection.receiver;
}
userStreams[user] = receiver.createStream(user, {mode: 'opus', end: 'manual'});
userDecoders[user] = new prism.opus.Decoder({frameSize: 640, channels: 1, rate: 16000});
userStreams[user]
.pipe(userDecoders[user]);
listeningToUsers[user] = true;
userFrameAccumulators[user] = [];
try {
userDecoders[user].on('data', (data) => {
// Two bytes per Int16 from the data buffer
let newFrames16 = new Array(data.length / 2);
for (let i = 0; i < data.length; i += 2) {
newFrames16[i / 2] = data.readInt16LE(i);
}
// Split the incoming PCM integer data into arrays of size Porcupine.frameLength. If there's insufficient frames, or a remainder,
// store it in 'frameAccumulator' for the next iteration, so that we don't miss any audio data
userFrameAccumulators[user] = userFrameAccumulators[user].concat(newFrames16);
let frames = chunkArray(userFrameAccumulators[user], frameLength);
if (frames[frames.length - 1].length !== frameLength) {
// store remainder from divisions of frameLength
userFrameAccumulators[user] = frames.pop();
} else {
userFrameAccumulators[user] = [];
}
for (let frame of frames) {
let index = userHandlers[user].process(frame);
if (index !== -1) {
if (index == 0) {//GRASSHOPPER
play.execute(connection, null, args);
} else if (index == 1) {//BLUEBERRY
play.skip();
}
}
}
});
} catch (error) {
console.error(error);
}
}
这将获取我们的解码流,然后对其进行转换并适当地分块。
我想这里效率低下,需要改进,但它在 discord 服务器中运行良好,我想 post 一个答案,以防有人试图将 discord.js 与未来的豪猪。
我使用 discord.js and am attempting to implement basic voice capabilities using porcupine 创建了一个 discord 机器人。
我有每个用户的音频流,我正尝试在 porcupine 中对每个数据块使用 process(frame) 方法。
为了获得单通道数据和 16k 采样率,我使用 prism-media opus 解码器手动解码流,然后尝试传入块:
execute(connection, user, args) {
userHandlers[user] = new Porcupine([GRASSHOPPER, BUMBLEBEE], [0.5, 0.65]);
if (!receiver) {
receiver = connection.receiver;
}
userStreams[user] = receiver.createStream(user, {mode: 'opus', end: 'manual'});
const decoder = new prism.opus.Decoder({frameSize: 640, channels: 1, rate: 16000});
userStreams[user]
.pipe(decoder);
listeningToUsers[user] = true;
try {
console.log("Start utterance");
decoder.on('data', (chunk) => {//Need to make stream single channel, frame size 512
let keywordIndex = userHandlers[user].process(chunk);
if (keywordIndex != -1) {
meme.execute(connection, null, args);
}
});
} catch (error) {
console.error(error);
}
},
但是,我的问题是块的大小为 640,而该方法需要为 512 才能工作。由于答案 here.
中解释的原因,更改传递给解码器的 frameSize 不起作用如果有人知道将我的数据转换为正确块大小的最佳方法,或者只是更好的方法,我将不胜感激。
我最终通过使用此 demo file 中包含的一些代码来完成这项工作。
我们包含一个 chunkArray 函数:
function chunkArray(array, size) {
return Array.from({ length: Math.ceil(array.length / size) }, (v, index) =>
array.slice(index * size, index * size + size)
);
}
并将之前 post 编辑的代码更改为如下所示:
execute(connection, user, args) {
userHandlers[user] = new Porcupine([GRASSHOPPER, BLUEBERRY], [0.7, 0.85]);
const frameLength = userHandlers[user].frameLength;
if (!receiver) {
receiver = connection.receiver;
}
userStreams[user] = receiver.createStream(user, {mode: 'opus', end: 'manual'});
userDecoders[user] = new prism.opus.Decoder({frameSize: 640, channels: 1, rate: 16000});
userStreams[user]
.pipe(userDecoders[user]);
listeningToUsers[user] = true;
userFrameAccumulators[user] = [];
try {
userDecoders[user].on('data', (data) => {
// Two bytes per Int16 from the data buffer
let newFrames16 = new Array(data.length / 2);
for (let i = 0; i < data.length; i += 2) {
newFrames16[i / 2] = data.readInt16LE(i);
}
// Split the incoming PCM integer data into arrays of size Porcupine.frameLength. If there's insufficient frames, or a remainder,
// store it in 'frameAccumulator' for the next iteration, so that we don't miss any audio data
userFrameAccumulators[user] = userFrameAccumulators[user].concat(newFrames16);
let frames = chunkArray(userFrameAccumulators[user], frameLength);
if (frames[frames.length - 1].length !== frameLength) {
// store remainder from divisions of frameLength
userFrameAccumulators[user] = frames.pop();
} else {
userFrameAccumulators[user] = [];
}
for (let frame of frames) {
let index = userHandlers[user].process(frame);
if (index !== -1) {
if (index == 0) {//GRASSHOPPER
play.execute(connection, null, args);
} else if (index == 1) {//BLUEBERRY
play.skip();
}
}
}
});
} catch (error) {
console.error(error);
}
}
这将获取我们的解码流,然后对其进行转换并适当地分块。
我想这里效率低下,需要改进,但它在 discord 服务器中运行良好,我想 post 一个答案,以防有人试图将 discord.js 与未来的豪猪。