|
@@ -1,258 +0,0 @@
|
1
|
|
-// @flow
|
2
|
|
-
|
3
|
|
-import { createRnnoiseProcessorPromise, getSampleLength } from '../../../../rnnoise';
|
4
|
|
-import EventEmitter from 'events';
|
5
|
|
-import JitsiMeetJS from '../../../lib-jitsi-meet';
|
6
|
|
-import logger from '../../logger';
|
7
|
|
-import { VAD_SCORE_PUBLISHED } from './Events';
|
8
|
|
-
|
9
|
|
-/**
|
10
|
|
- * The structure used by TrackVADEmitter to relay a score
|
11
|
|
- */
|
12
|
|
-export type VADScore = {
|
13
|
|
-
|
14
|
|
- /**
|
15
|
|
- * Device ID associated with the VAD score
|
16
|
|
- */
|
17
|
|
- deviceId: string,
|
18
|
|
-
|
19
|
|
- /**
|
20
|
|
- * The PCM score from 0 - 1 i.e. 0.60
|
21
|
|
- */
|
22
|
|
- score: number,
|
23
|
|
-
|
24
|
|
- /**
|
25
|
|
- * Epoch time at which PCM was recorded
|
26
|
|
- */
|
27
|
|
- timestamp: number
|
28
|
|
-
|
29
|
|
-};
|
30
|
|
-
|
31
|
|
-/**
|
32
|
|
- * Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode.
|
33
|
|
- * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
|
34
|
|
- * The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the
|
35
|
|
- * score is published to consumers via an EventEmitter.
|
36
|
|
- * After work is done with this service the destroy method needs to be called for a proper cleanup.
|
37
|
|
- */
|
38
|
|
-export default class TrackVADEmitter extends EventEmitter {
|
39
|
|
- /**
|
40
|
|
- * The AudioContext instance.
|
41
|
|
- */
|
42
|
|
- _audioContext: AudioContext;
|
43
|
|
-
|
44
|
|
- /**
|
45
|
|
- * The MediaStreamAudioSourceNode instance.
|
46
|
|
- */
|
47
|
|
- _audioSource: MediaStreamAudioSourceNode;
|
48
|
|
-
|
49
|
|
- /**
|
50
|
|
- * The ScriptProcessorNode instance.
|
51
|
|
- */
|
52
|
|
- _audioProcessingNode: ScriptProcessorNode;
|
53
|
|
-
|
54
|
|
- /**
|
55
|
|
- * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
|
56
|
|
- */
|
57
|
|
- _bufferResidue: Float32Array;
|
58
|
|
-
|
59
|
|
- /**
|
60
|
|
- * State flag, check if the instance was destroyed
|
61
|
|
- */
|
62
|
|
- _destroyed: boolean = false;
|
63
|
|
-
|
64
|
|
- /**
|
65
|
|
- * The JitsiLocalTrack instance.
|
66
|
|
- */
|
67
|
|
- _localTrack: Object;
|
68
|
|
-
|
69
|
|
- /**
|
70
|
|
- * Device ID of the target microphone.
|
71
|
|
- */
|
72
|
|
- _micDeviceId: string;
|
73
|
|
-
|
74
|
|
- /**
|
75
|
|
- * Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample
|
76
|
|
- * rate.
|
77
|
|
- */
|
78
|
|
- _onAudioProcess: (audioEvent: Object) => void;
|
79
|
|
-
|
80
|
|
- /**
|
81
|
|
- * Sample rate of the ScriptProcessorNode.
|
82
|
|
- */
|
83
|
|
- _procNodeSampleRate: number;
|
84
|
|
-
|
85
|
|
- /**
|
86
|
|
- * Rnnoise adapter that allows us to calculate VAD score for PCM samples
|
87
|
|
- */
|
88
|
|
- _rnnoiseProcessor: Object;
|
89
|
|
-
|
90
|
|
- /**
|
91
|
|
- * PCM Sample size expected by the RnnoiseProcessor instance.
|
92
|
|
- */
|
93
|
|
- _rnnoiseSampleSize: number;
|
94
|
|
-
|
95
|
|
- /**
|
96
|
|
- * Constructor.
|
97
|
|
- *
|
98
|
|
- * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024,
|
99
|
|
- * 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
|
100
|
|
- * @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score
|
101
|
|
- * for PCM samples.
|
102
|
|
- * @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
|
103
|
|
- */
|
104
|
|
- constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) {
|
105
|
|
- super();
|
106
|
|
- this._procNodeSampleRate = procNodeSampleRate;
|
107
|
|
- this._rnnoiseProcessor = rnnoiseProcessor;
|
108
|
|
- this._localTrack = jitsiLocalTrack;
|
109
|
|
- this._micDeviceId = jitsiLocalTrack.getDeviceId();
|
110
|
|
- this._bufferResidue = new Float32Array([]);
|
111
|
|
- this._audioContext = new AudioContext();
|
112
|
|
- this._rnnoiseSampleSize = getSampleLength();
|
113
|
|
- this._onAudioProcess = this._onAudioProcess.bind(this);
|
114
|
|
-
|
115
|
|
- this._initializeAudioContext();
|
116
|
|
- this._connectAudioGraph();
|
117
|
|
-
|
118
|
|
- logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`);
|
119
|
|
- }
|
120
|
|
-
|
121
|
|
- /**
|
122
|
|
- * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
|
123
|
|
- *
|
124
|
|
- * @param {string} micDeviceId - Target microphone device id.
|
125
|
|
- * @param {number} procNodeSampleRate - Sample rate of the proc node.
|
126
|
|
- * @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
|
127
|
|
- */
|
128
|
|
- static async create(micDeviceId: string, procNodeSampleRate: number) {
|
129
|
|
- let rnnoiseProcessor = null;
|
130
|
|
- let localTrack = null;
|
131
|
|
-
|
132
|
|
- try {
|
133
|
|
- logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`);
|
134
|
|
-
|
135
|
|
- rnnoiseProcessor = await createRnnoiseProcessorPromise();
|
136
|
|
- localTrack = await JitsiMeetJS.createLocalTracks({
|
137
|
|
- devices: [ 'audio' ],
|
138
|
|
- micDeviceId
|
139
|
|
- });
|
140
|
|
-
|
141
|
|
- // We only expect one audio track when specifying a device id.
|
142
|
|
- if (!localTrack[0]) {
|
143
|
|
- throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
|
144
|
|
- }
|
145
|
|
-
|
146
|
|
- return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]);
|
147
|
|
- } catch (error) {
|
148
|
|
- logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`);
|
149
|
|
-
|
150
|
|
- if (rnnoiseProcessor) {
|
151
|
|
- rnnoiseProcessor.destroy();
|
152
|
|
- }
|
153
|
|
-
|
154
|
|
- if (localTrack) {
|
155
|
|
- localTrack.stopStream();
|
156
|
|
- }
|
157
|
|
-
|
158
|
|
- throw error;
|
159
|
|
- }
|
160
|
|
- }
|
161
|
|
-
|
162
|
|
- /**
|
163
|
|
- * Sets up the audio graph in the AudioContext.
|
164
|
|
- *
|
165
|
|
- * @returns {Promise<void>}
|
166
|
|
- */
|
167
|
|
- _initializeAudioContext() {
|
168
|
|
- this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
|
169
|
|
-
|
170
|
|
- // TODO AudioProcessingNode is deprecated check and replace with alternative.
|
171
|
|
- // We don't need stereo for determining the VAD score so we create a single chanel processing node.
|
172
|
|
- this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
|
173
|
|
- this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
|
174
|
|
- }
|
175
|
|
-
|
176
|
|
- /**
|
177
|
|
- * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
|
178
|
|
- * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
|
179
|
|
- * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
|
180
|
|
- * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.
|
181
|
|
- *
|
182
|
|
- * @param {AudioProcessingEvent} audioEvent - Audio event.
|
183
|
|
- * @returns {void}
|
184
|
|
- */
|
185
|
|
- _onAudioProcess(audioEvent: Object) {
|
186
|
|
- // Prepend the residue PCM buffer from the previous process callback.
|
187
|
|
- const inData = audioEvent.inputBuffer.getChannelData(0);
|
188
|
|
- const completeInData = [ ...this._bufferResidue, ...inData ];
|
189
|
|
- const sampleTimestamp = Date.now();
|
190
|
|
-
|
191
|
|
- let i = 0;
|
192
|
|
-
|
193
|
|
- for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) {
|
194
|
|
- const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize);
|
195
|
|
- const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample);
|
196
|
|
-
|
197
|
|
- this.emit(VAD_SCORE_PUBLISHED, {
|
198
|
|
- timestamp: sampleTimestamp,
|
199
|
|
- score: vadScore,
|
200
|
|
- deviceId: this._micDeviceId
|
201
|
|
- });
|
202
|
|
- }
|
203
|
|
-
|
204
|
|
- this._bufferResidue = completeInData.slice(i, completeInData.length);
|
205
|
|
- }
|
206
|
|
-
|
207
|
|
- /**
|
208
|
|
- * Connects the nodes in the AudioContext to start the flow of audio data.
|
209
|
|
- *
|
210
|
|
- * @returns {void}
|
211
|
|
- */
|
212
|
|
- _connectAudioGraph() {
|
213
|
|
- this._audioSource.connect(this._audioProcessingNode);
|
214
|
|
- this._audioProcessingNode.connect(this._audioContext.destination);
|
215
|
|
- }
|
216
|
|
-
|
217
|
|
- /**
|
218
|
|
- * Disconnects the nodes in the AudioContext.
|
219
|
|
- *
|
220
|
|
- * @returns {void}
|
221
|
|
- */
|
222
|
|
- _disconnectAudioGraph() {
|
223
|
|
- // Even thought we disconnect the processing node it seems that some callbacks remain queued,
|
224
|
|
- // resulting in calls with and uninitialized context.
|
225
|
|
- // eslint-disable-next-line no-empty-function
|
226
|
|
- this._audioProcessingNode.onaudioprocess = () => {};
|
227
|
|
- this._audioProcessingNode.disconnect();
|
228
|
|
- this._audioSource.disconnect();
|
229
|
|
- }
|
230
|
|
-
|
231
|
|
- /**
|
232
|
|
- * Cleanup potentially acquired resources.
|
233
|
|
- *
|
234
|
|
- * @returns {void}
|
235
|
|
- */
|
236
|
|
- _cleanupResources() {
|
237
|
|
- logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`);
|
238
|
|
-
|
239
|
|
- this._disconnectAudioGraph();
|
240
|
|
- this._localTrack.stopStream();
|
241
|
|
- this._rnnoiseProcessor.destroy();
|
242
|
|
- }
|
243
|
|
-
|
244
|
|
- /**
|
245
|
|
- * Destroy TrackVADEmitter instance (release resources and stop callbacks).
|
246
|
|
- *
|
247
|
|
- * @returns {void}
|
248
|
|
- */
|
249
|
|
- destroy() {
|
250
|
|
- if (this._destroyed) {
|
251
|
|
- return;
|
252
|
|
- }
|
253
|
|
-
|
254
|
|
- logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`);
|
255
|
|
- this._cleanupResources();
|
256
|
|
- this._destroyed = true;
|
257
|
|
- }
|
258
|
|
-}
|