如何将文本保存为语音音频文件客户端?

How to save a text to speech audio file client side?

期望的行为

允许用户通过点击按钮下载文本到语音的音频文件,就像这个官方演示:

https://text-to-speech-starter-kit.ng.bluemix.net

我试过的

我正在使用:

https://github.com/watson-developer-cloud/node-sdk

我可以在服务器端生成一个音频文件,但不知道如何将该文件发送回客户端以供他们保存 - 所以我试图在客户端生成它。

尝试 01:生成音频文件服务器端

server.js(有效)

const fs = require('fs');
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');

const textToSpeech = new TextToSpeechV1({
    iam_apikey: '{apikey}',
});

const synthesizeParams = {
    text: 'Hello world',
    accept: 'audio/wav',
    voice: 'en-US_AllisonVoice',
};

textToSpeech.synthesize(synthesizeParams)
    .then(audio => {
        audio.pipe(fs.createWriteStream('hello_world.wav'));
    })
    .catch(err => {
        console.log('error:', err);
    });

供参考,根据the docs.synthesize()方法响应类型为:

NodeJS.ReadableStream|FileObject|Buffer   

尝试 02:生成音频文件客户端

server.js - 需要获得令牌(有效)

var AuthorizationV1 = require('ibm-watson/authorization/v1');

var iam_apikey = local_settings.TEXT_TO_SPEECH_IAM_APIKEY;
var url = local_settings.TEXT_TO_SPEECH_URL;

var authorization = new AuthorizationV1({
    iam_apikey: iam_apikey,
    url: url
});

const api_tts_token_get = async (req, res) => {

    authorization.getToken(function(err, token) {
        if (!token) {
            console.log('error:', err);
        } else {
            res.json({ token: token, url: url });
        }
    });

}

app.route("/api/:api_version/text-to-speech/token")
    .get(api_tts_token_get); 

client.js(无效)

var TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');

const get_token = (parameters) => {

    $.ajax({
        url: "/api/v1/text-to-speech/token",
        data: parameters,
        dataType: 'json',
        cache: false,
        headers: headers,
        success: function(results) {

            var token = results.token;
            var url = results.url;

            var textToSpeech = new TextToSpeechV1({ token: token, url: url });

            var synthesizeParams = {
                text: 'hello world!',
                accept: 'audio/wav',
                voice: 'en-US_AllisonV3Voice'
            };

            textToSpeech.synthesize(synthesizeParams, function(err, result) {
                if (err) {
                    return console.log(err);
                }
                console.log(result);
            });

        },
        statusCode: {
            500: function() {

                console.log("that didn't work");

            }
        }
    });

}

webpack.config.js

根据以下说明添加:

https://github.com/watson-developer-cloud/node-sdk/tree/master/examples/webpack#important-notes

node: {
    // see http://webpack.github.io/docs/configuration.html#node
    // and https://webpack.js.org/configuration/node/
    fs: 'empty',
    net: 'empty',
    tls: 'empty'
},

chrome 开发工具错误:

xhr.js:108 Refused to set unsafe header "User-Agent"

The provided value 'stream' is not a valid enum value of type XMLHttpRequestResponseType.

Access to XMLHttpRequest at 'https://***.watsonplatform.net/text-to-speech/api/v1/synthesize?voice=en-US_AllisonV3Voice'
from origin 'http://localhost:3000' has been blocked by CORS policy: 
Request header field x-ibmcloud-sdk-analytics is not allowed by 
Access-Control-Allow-Headers in preflight response.

Error: Response not received. Body of error is HTTP ClientRequest object
at RequestWrapper.formatError (requestwrapper.js:218)
at eval (requestwrapper.js:206)

这是我想出的一种解决方案。

它生成音频文件服务器端并通过res.download()发回。

only caveat is that you can't use $.ajax() 而是类似于:

window.open("/api/v1/audio?file_id=12345");

server.js

var TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');

const api_audio_get = async (req, res) => {

    var query_parameters = req.query;

    var file_id = query_parameters.file_id;

    var textToSpeech = new TextToSpeechV1({
        iam_apikey: local_settings.TEXT_TO_SPEECH_IAM_APIKEY,
        url: local_settings.TEXT_TO_SPEECH_URL
    });

    const synthesizeParams = {
        text: 'here is test voice',
        accept: 'audio/wav',
        voice: 'en-US_AllisonV3Voice',
    };

    textToSpeech.synthesize(
        synthesizeParams,
        function(err, audio) {
            if (err) {
                console.log(err);
                return;
            }

            // see:  
            // this allows you to create temp file on server, send it, then delete it
            var filename = file_id + ".wav";
            var absPath = path.join(__dirname, "/my_files/", filename);
            var relPath = path.join("./my_files", filename); // path relative to server root

            // see: https://nodejs.org/en/knowledge/advanced/streams/how-to-use-fs-create-write-stream/
            var write_stream = fs.createWriteStream(relPath);
            // audio is written to the writestream
            audio.pipe(write_stream);

            // see: 
            write_stream.on('finish', function() {

                res.download(absPath, (err) => {
                    if (err) {
                        console.log(err);
                    }
                    fs.unlink(relPath, (err) => {
                        if (err) {
                            console.log(err);
                        }
                        console.log("FILE [" + filename + "] REMOVED!");
                    });
                });

            });

        }
    );

}

// route handler
app.route("/api/:api_version/audio")
    .get(api_audio_get);

client.js

$(document).on("click", ".download_audio", function() {

    window.open("/api/v1/audio?file_id=12345");

});