-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrenderer.js
165 lines (146 loc) · 4.82 KB
/
renderer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
const path = require('path');
const snowboy = require('snowboy');
const {Detector, Models} = snowboy;
const speech = require('@google-cloud/speech');
// TODO: make configurable via UI
// location of google server credentials
const client = new speech.SpeechClient({
// your keyfile goes here
keyFilename: '/home/matt/dev/Speech-7a0f6aa1b14a.json'
});
const ctx = new window.AudioContext();
// this is used to track when audio will be streamed to google
let googleListen = false;
// TODO: make configurable via the UI
const languageCode = navigator.language || 'en-US';
// TODO: make configurable via the UI
// If you want interim results, set this to true
const interimResults = false;
const request = {
config: {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: languageCode,
},
interimResults
};
// number of silence frames snowboy detect before closing google stream
const silenceThreshold = 25;
// maximum duration of audio sent to google
// google API has a hard limit of 60 secs
// this is a safety incase silence detection fails to terminate the stream
const googleMaxDuration = 10000;
let recognizeStream;
let silenceIndex = 0;
let googleKiller;
// init snowboy
const models = new Models();
models.add({
// TODO: make this configurable via the UI
// setup hotword "alexa"
file: path.join(__dirname, 'snowboy', 'alexa.umdl'),
hotwords: 'alexa',
// or setup hotword "snow boy" said as two words
// file: path.join(__dirname, 'lib', 'snowboy', 'snowboy.umdl'),
// hotwords: 'snowboy',
sensitivity: '0.5',
});
const detector = new Detector({
resource: path.join(__dirname, 'snowboy', 'common.res'),
models: models,
audioGain: 2.0,
sampleRate: 16000
});
// when snowboy reports an error
detector.on('error', console.error);
// when snowboy detects silence after the hotword
detector.on('silence', function() {
if(googleListen) {
silenceIndex++;
if(silenceIndex > silenceThreshold) {
stopGoogle();
}
}
});
// stop streaming to google
function stopGoogle() {
googleListen = false;
clearTimeout(googleKiller);
silenceIndex = 0;
recognizeStream.end();
}
// when snowboy detects a hotword
detector.on('hotword', (index, hotword, buffer) => {
console.log('hotword', index, hotword);
recognizeStream = client.streamingRecognize(request)
.on('error', console.error)
.on('data', data => {
if(data.results[0]) {
console.log(data.results[0].alternatives[0].transcript);
}
});
// write the buffered audio immediately after the hotword to google
recognizeStream.write(buffer);
// start streaming audio to google
googleListen = true;
// make sure recording stops at max limit
googleKiller = setTimeout(() => {
if(googleListen) {
stopGoogle();
}
}, googleMaxDuration);
});
navigator.mediaDevices.getUserMedia({audio: true})
.then(media_stream => {
media_stream = media_stream;
// audioIn is a MediaStreamAudioSourceNode which is child of AudioNode
const vol = ctx.createGain();
const audioIn = ctx.createMediaStreamSource(media_stream);
audioIn.connect(vol);
const recorder = ctx.createScriptProcessor(0, 2, 2);
recorder.onaudioprocess = audio => {
// dealing with mono mic, so only need one channel
const left = audio.inputBuffer.getChannelData(0);
const downsampled = downsample(left, 44100, 16000);
// continuously send audio to snowboy for hotword detection
detector.write(downsampled);
// only send audio to google after hotword detection
if(googleListen) {
recognizeStream.write(downsampled);
}
};
vol.connect(recorder);
recorder.connect(ctx.destination);
})
.catch(err => {
console.log('getUserMedia failed.', err);
});
function downsample(floatsArray, sampleRate, outSampleRate) {
if(outSampleRate == sampleRate) {
return '';
}
if(outSampleRate > sampleRate) {
throw "downsampling rate show be smaller than original sample rate";
}
const sampleRateRatio = sampleRate / outSampleRate;
const sourceSampleCount = floatsArray.length;
const newSampleCount = Math.round(sourceSampleCount / sampleRateRatio);
// establish space for n samples at 2 bytes each
const buffer = new ArrayBuffer(newSampleCount * 2);
const view = new Int16Array(buffer);
let offsetResult = 0;
let offsetBuffer = 0;
while(offsetResult < newSampleCount) {
const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
const x = floatsArray.slice(offsetBuffer, nextOffsetBuffer);
// average samples
const sum = x.reduce((a, b) => a + b);
const s = Math.max(-1, Math.min(1, sum / x.length));
// convert float32 to int16
view[offsetResult] = s < 0 ? s * 0x8000 : s * 0x7FFF;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
// buffer is not copied using this technique, right?
return new Buffer(buffer);
}