A simple Azure Speech Service module that uses the Microsoft Edge Read Aloud API.
Full support for SSML Only supports speak
, voice
, and prosody
element types. The following is the default SSML object:
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts"
xml:lang="${this._voiceLang}">
<voice name="${voiceName}">
<prosody rate="${rate}" pitch="${pitch}" volume="${volume}">
${input}
</prosody>
</voice>
</speak>
Documentation on the SSML format can be found here . All supported audio formats can be found here.
Make sure to escape/sanitize your user's input! Use a library like xml-escape.
import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
const tts = new MsEdgeTTS();
await tts.setMetadata("en-IE-ConnorNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
const {audioStream} = tts.toStream("Hi, how are you?");
audioStream.on("data", (data) => {
console.log("DATA RECEIVED", data);
// raw audio file data
});
audioStream.on("close", () => {
console.log("STREAM CLOSED");
});
import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
(async () => {
const tts = new MsEdgeTTS();
await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
const {audioFilePath} = await tts.toFile("./tmpfolder", "Hi, how are you?");
})();
import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
(async () => {
const tts = new MsEdgeTTS();
await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
const {audioStream} = await tts.toStream("Hi, how are you?", {rate: 0.5, pitch: "+200Hz"});
})();
Use a custom http.Agent implementation like https-proxy-agent or socks-proxy-agent.
import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
import {SocksProxyAgent} from 'socks-proxy-agent';
(async () => {
const agent = new SocksProxyAgent("socks://your-name%40gmail.com:[email protected]")
const tts = new MsEdgeTTS(agent);
await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS);
const {audioStream} = await tts.toStream("Hi, how are you?");
})();
import {MsEdgeTTS, OUTPUT_FORMAT} from "msedge-tts";
(async () => {
const tts = new MsEdgeTTS();
await tts.setMetadata("en-US-AriaNeural", OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS, {
wordBoundaryEnabled: true,
sentenceBoundaryEnabled: true
});
// as stream
const {metadataStream} = await tts.toStream("Hi, how are you doing today hello hello hello?");
/* ->
{
"Metadata": [
{
"Type": "SentenceBoundary",
"Data": {
"Offset": 1000000,
"Duration": 35875000,
"text": {
"Text": "Hi, how are you doing today hello hello hello?",
"Length": 46,
"BoundaryType": "SentenceBoundary"
}
}
}
]
}
*/
// or as file
const {metadataFilePath} = await tts.toFile("Hi, how are you?");
/* ->
{
"Metadata": [
<all metadata combined>
]
}
*/
})();
For the full documentation check out the API Documentation.
This library only supports promises.