AudioContext、getUserMedia 和 websockets 音频流
AudioContext, getUserMedia, and websockets audio streaming
我正在尝试制作一个尽可能简单的 Javascript 前端,它允许我使用 getUserMedia
在网络浏览器中单击鼠标从用户的麦克风接收音频,将其修改为自定义采样率和单通道,并通过 websocket 将其流式传输到我的服务器,在那里它将中继到 Watson Speech API.
我已经使用 autobahn. I have been trying to make an updated client library drawing on whisper and ws-audio-api but both libraries seem outdated and include much functionality I don't need which I am trying to filter out. I am using XAudioJS 构建了 websocket 服务器来重新采样音频。
我目前的进展是Codepen。我卡住了,找不到更清楚的例子。
- 耳语和 ws-audio-api 在页面加载时初始化 AudioContext,导致 error 至少 Chrome 和 iOS 作为音频上下文必须现在被初始化为对用户交互的响应。我试图将 AudioContext 移动到
onClick
事件中,但这导致我必须单击两次才能开始流式传输。我目前在 onClick
事件中使用 audio_context.resume()
但这似乎是一个迂回的解决方案,结果页面显示它始终在记录,即使它没有记录,这可能会让我的用户感到不安。 如何正确启动点击录制并在点击时终止录制?
- 我已从已弃用的
Navigator.getUserMedia()
to MediaDevices.getUserMedia()
更新,但不确定是否需要更改第 83-86 行的旧版供应商前缀以匹配新功能?
- 最重要的是,一旦我从
getUserMedia
获得流,我如何正确地对其重新采样并将其转发到打开的 websocket?我对从一个节点到另一个节点弹跳音频的结构有点困惑,我需要第 93-108 行的帮助。
我找到了帮助 and was able to build a more modern JavaScript frontend based on the code from vin-ni's Google-Cloud-Speech-Node-Socket-Playground,我稍微调整了一下。 2021 年的许多现有音频流演示要么已经过时,要么具有大量“额外”功能,这增加了开始使用 websockets 和音频流的障碍。我创建了这个“基本”脚本,它将音频流减少到只有四个关键功能:
- 打开网络套接字
- 开始直播
- 重新采样音频
- 停止直播
希望这个 KISS(保持简单,愚蠢)演示可以帮助其他人比我更快地开始使用流式音频。
这是我的 JavaScript 前端
//================= CONFIG =================
// Global Variables
let websocket_uri = 'ws://127.0.0.1:9001';
let bufferSize = 4096,
AudioContext,
context,
processor,
input,
globalStream,
websocket;
// Initialize WebSocket
initWebSocket();
//================= RECORDING =================
function startRecording() {
streamStreaming = true;
AudioContext = window.AudioContext || window.webkitAudioContext;
context = new AudioContext({
// if Non-interactive, use 'playback' or 'balanced' // https://developer.mozilla.org/en-US/docs/Web/API/AudioContextLatencyCategory
latencyHint: 'interactive',
});
processor = context.createScriptProcessor(bufferSize, 1, 1);
processor.connect(context.destination);
context.resume();
var handleSuccess = function (stream) {
globalStream = stream;
input = context.createMediaStreamSource(stream);
input.connect(processor);
processor.onaudioprocess = function (e) {
var left = e.inputBuffer.getChannelData(0);
var left16 = downsampleBuffer(left, 44100, 16000);
websocket.send(left16);
};
};
navigator.mediaDevices.getUserMedia({audio: true, video: false}).then(handleSuccess);
} // closes function startRecording()
function stopRecording() {
streamStreaming = false;
let track = globalStream.getTracks()[0];
track.stop();
input.disconnect(processor);
processor.disconnect(context.destination);
context.close().then(function () {
input = null;
processor = null;
context = null;
AudioContext = null;
});
} // closes function stopRecording()
function initWebSocket() {
// Create WebSocket
websocket = new WebSocket(websocket_uri);
//console.log("Websocket created...");
// WebSocket Definitions: executed when triggered webSocketStatus
websocket.onopen = function() {
console.log("connected to server");
//websocket.send("CONNECTED TO YOU");
document.getElementById("webSocketStatus").innerHTML = 'Connected';
}
websocket.onclose = function(e) {
console.log("connection closed (" + e.code + ")");
document.getElementById("webSocketStatus").innerHTML = 'Not Connected';
}
websocket.onmessage = function(e) {
//console.log("message received: " + e.data);
console.log(e.data);
try {
result = JSON.parse(e.data);
} catch (e) {
$('.message').html('Error retrieving data: ' + e);
}
if (typeof(result) !== 'undefined' && typeof(result.error) !== 'undefined') {
$('.message').html('Error: ' + result.error);
}
else {
$('.message').html('Welcome!');
}
}
} // closes function initWebSocket()
function downsampleBuffer (buffer, sampleRate, outSampleRate) {
if (outSampleRate == sampleRate) {
return buffer;
}
if (outSampleRate > sampleRate) {
throw 'downsampling rate show be smaller than original sample rate';
}
var sampleRateRatio = sampleRate / outSampleRate;
var newLength = Math.round(buffer.length / sampleRateRatio);
var result = new Int16Array(newLength);
var offsetResult = 0;
var offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
var accum = 0,
count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
result[offsetResult] = Math.min(1, accum / count) * 0x7fff;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result.buffer;
} // closes function downsampleBuffer()
还有我的 index.html
文件
<!DOCTYPE html>
<html>
<head>
<script src='jquery-1.8.3.js'></script>
<script src='client.js'></script>
</head>
<body>
<div class='message'>Welcome!</div>
<button onclick='startRecording()'>Start recording</button>
<button onclick='stopRecording()'>Stop recording</button>
<br/>
<div>WebSocket: <span id="webSocketStatus">Not Connected</span></div>
</body>
</html>
您可以使用可以在 Crossbario's GitHub 上找到的大多数高速公路 python 回声服务器来测试它。 startRecording()
和 stopRecording()
函数也可以从 Storyline 或 H5P 中的变量调用,如果有人想将其用于 ed tech 中的语音识别(比如我)。
我正在尝试制作一个尽可能简单的 Javascript 前端,它允许我使用 getUserMedia
在网络浏览器中单击鼠标从用户的麦克风接收音频,将其修改为自定义采样率和单通道,并通过 websocket 将其流式传输到我的服务器,在那里它将中继到 Watson Speech API.
我已经使用 autobahn. I have been trying to make an updated client library drawing on whisper and ws-audio-api but both libraries seem outdated and include much functionality I don't need which I am trying to filter out. I am using XAudioJS 构建了 websocket 服务器来重新采样音频。
我目前的进展是Codepen。我卡住了,找不到更清楚的例子。
- 耳语和 ws-audio-api 在页面加载时初始化 AudioContext,导致 error 至少 Chrome 和 iOS 作为音频上下文必须现在被初始化为对用户交互的响应。我试图将 AudioContext 移动到
onClick
事件中,但这导致我必须单击两次才能开始流式传输。我目前在onClick
事件中使用audio_context.resume()
但这似乎是一个迂回的解决方案,结果页面显示它始终在记录,即使它没有记录,这可能会让我的用户感到不安。 如何正确启动点击录制并在点击时终止录制? - 我已从已弃用的
Navigator.getUserMedia()
toMediaDevices.getUserMedia()
更新,但不确定是否需要更改第 83-86 行的旧版供应商前缀以匹配新功能? - 最重要的是,一旦我从
getUserMedia
获得流,我如何正确地对其重新采样并将其转发到打开的 websocket?我对从一个节点到另一个节点弹跳音频的结构有点困惑,我需要第 93-108 行的帮助。
我找到了帮助
- 打开网络套接字
- 开始直播
- 重新采样音频
- 停止直播
希望这个 KISS(保持简单,愚蠢)演示可以帮助其他人比我更快地开始使用流式音频。
这是我的 JavaScript 前端
//================= CONFIG =================
// Global Variables
let websocket_uri = 'ws://127.0.0.1:9001';
let bufferSize = 4096,
AudioContext,
context,
processor,
input,
globalStream,
websocket;
// Initialize WebSocket
initWebSocket();
//================= RECORDING =================
function startRecording() {
streamStreaming = true;
AudioContext = window.AudioContext || window.webkitAudioContext;
context = new AudioContext({
// if Non-interactive, use 'playback' or 'balanced' // https://developer.mozilla.org/en-US/docs/Web/API/AudioContextLatencyCategory
latencyHint: 'interactive',
});
processor = context.createScriptProcessor(bufferSize, 1, 1);
processor.connect(context.destination);
context.resume();
var handleSuccess = function (stream) {
globalStream = stream;
input = context.createMediaStreamSource(stream);
input.connect(processor);
processor.onaudioprocess = function (e) {
var left = e.inputBuffer.getChannelData(0);
var left16 = downsampleBuffer(left, 44100, 16000);
websocket.send(left16);
};
};
navigator.mediaDevices.getUserMedia({audio: true, video: false}).then(handleSuccess);
} // closes function startRecording()
function stopRecording() {
streamStreaming = false;
let track = globalStream.getTracks()[0];
track.stop();
input.disconnect(processor);
processor.disconnect(context.destination);
context.close().then(function () {
input = null;
processor = null;
context = null;
AudioContext = null;
});
} // closes function stopRecording()
function initWebSocket() {
// Create WebSocket
websocket = new WebSocket(websocket_uri);
//console.log("Websocket created...");
// WebSocket Definitions: executed when triggered webSocketStatus
websocket.onopen = function() {
console.log("connected to server");
//websocket.send("CONNECTED TO YOU");
document.getElementById("webSocketStatus").innerHTML = 'Connected';
}
websocket.onclose = function(e) {
console.log("connection closed (" + e.code + ")");
document.getElementById("webSocketStatus").innerHTML = 'Not Connected';
}
websocket.onmessage = function(e) {
//console.log("message received: " + e.data);
console.log(e.data);
try {
result = JSON.parse(e.data);
} catch (e) {
$('.message').html('Error retrieving data: ' + e);
}
if (typeof(result) !== 'undefined' && typeof(result.error) !== 'undefined') {
$('.message').html('Error: ' + result.error);
}
else {
$('.message').html('Welcome!');
}
}
} // closes function initWebSocket()
function downsampleBuffer (buffer, sampleRate, outSampleRate) {
if (outSampleRate == sampleRate) {
return buffer;
}
if (outSampleRate > sampleRate) {
throw 'downsampling rate show be smaller than original sample rate';
}
var sampleRateRatio = sampleRate / outSampleRate;
var newLength = Math.round(buffer.length / sampleRateRatio);
var result = new Int16Array(newLength);
var offsetResult = 0;
var offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
var accum = 0,
count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
result[offsetResult] = Math.min(1, accum / count) * 0x7fff;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result.buffer;
} // closes function downsampleBuffer()
还有我的 index.html
文件
<!DOCTYPE html>
<html>
<head>
<script src='jquery-1.8.3.js'></script>
<script src='client.js'></script>
</head>
<body>
<div class='message'>Welcome!</div>
<button onclick='startRecording()'>Start recording</button>
<button onclick='stopRecording()'>Stop recording</button>
<br/>
<div>WebSocket: <span id="webSocketStatus">Not Connected</span></div>
</body>
</html>
您可以使用可以在 Crossbario's GitHub 上找到的大多数高速公路 python 回声服务器来测试它。 startRecording()
和 stopRecording()
函数也可以从 Storyline 或 H5P 中的变量调用,如果有人想将其用于 ed tech 中的语音识别(比如我)。