I've been trying to get the speech to text to work via node.js and having no luck. I am literally using the example from Github (https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/quickstart/javascript/node/from-file/index.js), pasted below. I filled in subscriptionKey, region and pointed to a local WAV file. I specified a different language (at line 39). This is the code, with the subscription key suppressed.
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
(function() {
// <code>
"use strict";
// pull in the required packages.
var sdk = require("microsoft-cognitiveservices-speech-sdk");
var fs = require("fs");
// replace with your own subscription key,
// service region (e.g., "westus"), and
// the name of the file you want to run
// through the speech recognizer.
var subscriptionKey = "f7c2434d************************";
var serviceRegion = "centralus"; // e.g., "westus"
var filename = "/Users/john/data/Espinhas072_6832572-2104061302.wav";
// create the push stream we need for the speech sdk.
var pushStream = sdk.AudioInputStream.createPushStream();
// open the file and push it to the push stream.
fs.createReadStream(filename).on('data', function(arrayBuffer) {
pushStream.write(arrayBuffer.slice());
}).on('end', function() {
pushStream.close();
});
// we are done with the setup
console.log("Now recognizing from: " + filename);
// now create the audio-config pointing to our stream and
// the speech config specifying the language.
var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
var speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
// setting the recognition language to Portuguese.
speechConfig.speechRecognitionLanguage = "pt-PT";
// create the speech recognizer.
var recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
// start the recognizer and wait for a result.
recognizer.recognizeOnceAsync(
function (result) {
console.log(result);
recognizer.close();
recognizer = undefined;
},
function (err) {
console.trace("err - " + err);
recognizer.close();
recognizer = undefined;
});
// </code>
}());
The result I get back is this:
SpeechRecognitionResult {
privResultId: 'FFCB985CC6674620879C6D765EA5F93B',
privReason: 0,
privText: undefined,
privDuration: 56000000,
privOffset: 0,
privLanguage: undefined,
privLanguageDetectionConfidence: undefined,
privErrorDetails: undefined,
privJson: '{"Id":"9e07fa894d77473b982e5f2efa897e5c","RecognitionStatus":"InitialSilenceTimeout","Offset":0,"Duration":56000000}',
privProperties: PropertyCollection {
privKeys: [ 'SpeechServiceResponse_JsonResult' ],
privValues: [
'{"Id":"9e07fa894d77473b982e5f2efa897e5c","RecognitionStatus":"InitialSilenceTimeout","Offset":0,"Duration":56000000}'
]
},
privSpeakerId: undefined
}
This is Day 1 on Azure for me, so it's entirely possible I'm doing something incredibly dumb and missing it. I've tried a couple of other files to no avail. The file is 4 minutes long and I've tried it with subscription keys for both a free level subscription as well as paid. I noticed the "InitialSilenceTimeout" and can't figure out what that might be about: the file has music as a lead-in, so I tried a version that had that edited out. Running out of ideas.