瀏覽代碼

feat: integrate rnnoise based service for voice activity (VAD) detection

j8
Andrei Gavrilescu 5 年之前
父節點
當前提交
761ac6a730

+ 10
- 2
Makefile 查看文件

@@ -3,6 +3,7 @@ CLEANCSS = ./node_modules/.bin/cleancss
3 3
 DEPLOY_DIR = libs
4 4
 LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/
5 5
 LIBFLAC_DIR = node_modules/libflacjs/dist/min/
6
+RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/
6 7
 NODE_SASS = ./node_modules/.bin/node-sass
7 8
 NPM = npm
8 9
 OUTPUT_DIR = .
@@ -20,7 +21,7 @@ compile:
20 21
 clean:
21 22
 	rm -fr $(BUILD_DIR)
22 23
 
23
-deploy: deploy-init deploy-appbundle deploy-lib-jitsi-meet deploy-libflac deploy-css deploy-local
24
+deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-css deploy-local
24 25
 
25 26
 deploy-init:
26 27
 	rm -fr $(DEPLOY_DIR)
@@ -47,6 +48,8 @@ deploy-appbundle:
47 48
 		$(BUILD_DIR)/analytics-ga.min.map \
48 49
 		$(BUILD_DIR)/video-blur-effect.min.js \
49 50
 		$(BUILD_DIR)/video-blur-effect.min.map \
51
+		$(BUILD_DIR)/rnnoise-processor.min.js \
52
+		$(BUILD_DIR)/rnnoise-processor.min.map \
50 53
 		$(DEPLOY_DIR)
51 54
 
52 55
 deploy-lib-jitsi-meet:
@@ -63,6 +66,11 @@ deploy-libflac:
63 66
 		$(LIBFLAC_DIR)/libflac4-1.3.2.min.js.mem \
64 67
 		$(DEPLOY_DIR)
65 68
 
69
+deploy-rnnoise-binary:
70
+	cp \
71
+		$(RNNOISE_WASM_DIR)/rnnoise.wasm \
72
+		$(DEPLOY_DIR)
73
+
66 74
 deploy-css:
67 75
 	$(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \
68 76
 	$(CLEANCSS) $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \
@@ -71,7 +79,7 @@ deploy-css:
71 79
 deploy-local:
72 80
 	([ ! -x deploy-local.sh ] || ./deploy-local.sh)
73 81
 
74
-dev: deploy-init deploy-css deploy-lib-jitsi-meet deploy-libflac
82
+dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac
75 83
 	$(WEBPACK_DEV_SERVER)
76 84
 
77 85
 source-package:

+ 33
- 0
package-lock.json 查看文件

@@ -16413,6 +16413,39 @@
16413 16413
         "inherits": "^2.0.1"
16414 16414
       }
16415 16415
     },
16416
+    "rnnoise-wasm": {
16417
+      "version": "github:jitsi/rnnoise-wasm#db96d11f175a22ef56c7db1ba9550835b716e615",
16418
+      "from": "github:jitsi/rnnoise-wasm#db96d11f175a22ef56c7db1ba9550835b716e615"
16419
+    },
16420
+    "rollup-plugin-visualizer": {
16421
+      "version": "1.1.1",
16422
+      "resolved": "https://registry.npmjs.org/rollup-plugin-visualizer/-/rollup-plugin-visualizer-1.1.1.tgz",
16423
+      "integrity": "sha512-7xkSKp+dyJmSC7jg2LXqViaHuOnF1VvIFCnsZEKjrgT5ZVyiLLSbeszxFcQSfNJILphqgAEmWAUz0Z4xYScrRw==",
16424
+      "optional": true,
16425
+      "requires": {
16426
+        "mkdirp": "^0.5.1",
16427
+        "opn": "^5.4.0",
16428
+        "source-map": "^0.7.3",
16429
+        "typeface-oswald": "0.0.54"
16430
+      },
16431
+      "dependencies": {
16432
+        "opn": {
16433
+          "version": "5.5.0",
16434
+          "resolved": "https://registry.npmjs.org/opn/-/opn-5.5.0.tgz",
16435
+          "integrity": "sha512-PqHpggC9bLV0VeWcdKhkpxY+3JTzetLSqTCWL/z/tFIbI6G8JCjondXklT1JinczLz2Xib62sSp0T/gKT4KksA==",
16436
+          "optional": true,
16437
+          "requires": {
16438
+            "is-wsl": "^1.1.0"
16439
+          }
16440
+        },
16441
+        "source-map": {
16442
+          "version": "0.7.3",
16443
+          "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz",
16444
+          "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==",
16445
+          "optional": true
16446
+        }
16447
+      }
16448
+    },
16416 16449
     "rsvp": {
16417 16450
       "version": "4.8.5",
16418 16451
       "resolved": "https://registry.npmjs.org/rsvp/-/rsvp-4.8.5.tgz",

+ 1
- 0
package.json 查看文件

@@ -87,6 +87,7 @@
87 87
     "react-transition-group": "2.4.0",
88 88
     "redux": "4.0.4",
89 89
     "redux-thunk": "2.2.0",
90
+    "rnnoise-wasm": "github:jitsi/rnnoise-wasm.git#db96d11f175a22ef56c7db1ba9550835b716e615",
90 91
     "styled-components": "3.4.9",
91 92
     "util": "0.12.1",
92 93
     "uuid": "3.1.0",

+ 44
- 0
react/features/rnnoise/functions.js 查看文件

@@ -0,0 +1,44 @@
1
+// @flow
2
+
3
+import { getJitsiMeetGlobalNS, loadScript } from '../base/util';
4
+
5
+let loadRnnoisePromise;
6
+
7
+/**
8
+ * Returns promise that resolves with a RnnoiseProcessor instance.
9
+ *
10
+ * @returns {Promise<RnnoiseProcessor>} - Resolves with the blur effect instance.
11
+ */
12
+export function createRnnoiseProcessorPromise() {
13
+    // Subsequent calls should not attempt to load the script multiple times.
14
+    if (!loadRnnoisePromise) {
15
+        loadRnnoisePromise = loadScript('libs/rnnoise-processor.min.js');
16
+    }
17
+
18
+    return loadRnnoisePromise.then(() => {
19
+        const ns = getJitsiMeetGlobalNS();
20
+
21
+        if (ns?.effects?.rnnoise?.createRnnoiseProcessor) {
22
+            return ns.effects.rnnoise.createRnnoiseProcessor();
23
+        }
24
+
25
+        throw new Error('Rnnoise module binding createRnnoiseProcessor not found!');
26
+    });
27
+}
28
+
29
+/**
30
+ * Get the accepted sample length for the rnnoise library. We might want to expose it with flow libdefs.
31
+ *
32
+ * @returns {number}
33
+ */
34
+export function getSampleLength() {
35
+    const ns = getJitsiMeetGlobalNS();
36
+
37
+    const rnnoiseSample = ns?.effects?.rnnoise?.RNNOISE_SAMPLE_LENGTH;
38
+
39
+    if (!rnnoiseSample) {
40
+        throw new Error('Please call createRnnoiseProcessorPromise first or wait for promise to resolve!');
41
+    }
42
+
43
+    return rnnoiseSample;
44
+}

+ 2
- 0
react/features/rnnoise/index.js 查看文件

@@ -0,0 +1,2 @@
1
+
2
+export * from './functions';

+ 174
- 0
react/features/stream-effects/rnnoise/RnnoiseProcessor.js 查看文件

@@ -0,0 +1,174 @@
1
+// @flow
2
+
3
+/**
4
+ * Constant. Rnnoise default sample size, samples of different size won't work.
5
+ */
6
+export const RNNOISE_SAMPLE_LENGTH: number = 480;
7
+
8
+/**
9
+ *  Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4.
10
+ */
11
+const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4;
12
+
13
+/**
14
+ * Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
15
+ * memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
16
+ * detection) scores.
17
+ */
18
+export default class RnnoiseProcessor {
19
+    /**
20
+     * Rnnoise context object needed to perform the audio processing.
21
+     */
22
+    _context: ?Object;
23
+
24
+    /**
25
+     * State flag, check if the instance was destroyed.
26
+     */
27
+    _destroyed: boolean = false;
28
+
29
+    /**
30
+     * WASM interface through which calls to rnnoise are made.
31
+     */
32
+    _wasmInterface: Object;
33
+
34
+    /**
35
+     * WASM dynamic memory buffer used as input for rnnoise processing method.
36
+     */
37
+    _wasmPcmInput: Object;
38
+
39
+    /**
40
+     * The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
41
+     */
42
+    _wasmPcmInputF32Index: number;
43
+
44
+    /**
45
+     * WASM dynamic memory buffer used as output for rnnoise processing method.
46
+     */
47
+    _wasmPcmOutput: Object;
48
+
49
+    /**
50
+     * Constructor.
51
+     *
52
+     * @class
53
+     * @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
54
+     */
55
+    constructor(wasmInterface: Object) {
56
+        // Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
57
+        // i.e. in case of exception there are no side effects.
58
+        try {
59
+            this._wasmInterface = wasmInterface;
60
+
61
+            // For VAD score purposes only allocate the buffers once and reuse them
62
+            this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
63
+
64
+            if (!this._wasmPcmInput) {
65
+                throw Error('Failed to create wasm input memory buffer!');
66
+            }
67
+
68
+            this._wasmPcmOutput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
69
+
70
+            if (!this._wasmPcmOutput) {
71
+                wasmInterface._free(this._wasmPcmInput);
72
+                throw Error('Failed to create wasm output memory buffer!');
73
+            }
74
+
75
+            // The HEAPF32.set function requires an index relative to a Float32 array view of the wasm memory model
76
+            // which is an array of bytes. This means we have to divide it by the size of a float to get the index
77
+            // relative to a Float32 Array.
78
+            this._wasmPcmInputF32Index = this._wasmPcmInput / 4;
79
+
80
+            this._context = this._wasmInterface._rnnoise_create();
81
+        } catch (error) {
82
+            // release can be called even if not all the components were initialized.
83
+            this._releaseWasmResources();
84
+            throw error;
85
+        }
86
+    }
87
+
88
+    /**
89
+     * Copy the input PCM Audio Sample to the wasm input buffer.
90
+     *
91
+     * @param {Float32Array} pcmSample - Array containing 16 bit format PCM sample stored in 32 Floats .
92
+     * @returns {void}
93
+     */
94
+    _copyPCMSampleToWasmBuffer(pcmSample: Float32Array) {
95
+        this._wasmInterface.HEAPF32.set(pcmSample, this._wasmPcmInputF32Index);
96
+    }
97
+
98
+    /**
99
+     * Convert 32 bit Float PCM samples to 16 bit Float PCM samples and store them in 32 bit Floats.
100
+     *
101
+     * @param {Float32Array} f32Array - Array containing 32 bit PCM samples.
102
+     * @returns {void}
103
+     */
104
+    _convertTo16BitPCM(f32Array: Float32Array) {
105
+        for (const [ index, value ] of f32Array.entries()) {
106
+            f32Array[index] = value * 0x7fff;
107
+        }
108
+    }
109
+
110
+    /**
111
+     * Release resources associated with the wasm context. If something goes downhill here
112
+     * i.e. Exception is thrown, there is nothing much we can do.
113
+     *
114
+     * @returns {void}
115
+     */
116
+    _releaseWasmResources() {
117
+        // For VAD score purposes only allocate the buffers once and reuse them
118
+        if (this._wasmPcmInput) {
119
+            this._wasmInterface._free(this._wasmPcmInput);
120
+            this._wasmPcmInput = null;
121
+        }
122
+
123
+        if (this._wasmPcmOutput) {
124
+            this._wasmInterface._free(this._wasmPcmOutput);
125
+            this._wasmPcmOutput = null;
126
+        }
127
+
128
+        if (this._context) {
129
+            this._wasmInterface._rnnoise_destroy(this._context);
130
+            this._context = null;
131
+        }
132
+    }
133
+
134
+    /**
135
+     * Release any resources required by the rnnoise context this needs to be called
136
+     * before destroying any context that uses the processor.
137
+     *
138
+     * @returns {void}
139
+     */
140
+    destroy() {
141
+        // Attempting to release a non initialized processor, do nothing.
142
+        if (this._destroyed) {
143
+            return;
144
+        }
145
+
146
+        this._releaseWasmResources();
147
+
148
+        this._destroyed = true;
149
+    }
150
+
151
+    /**
152
+     * Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
153
+     * The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
154
+     *
155
+     * @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
156
+     * @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
157
+     */
158
+    calculateAudioFrameVAD(pcmFrame: Float32Array) {
159
+        if (this._destroyed) {
160
+            throw new Error('RnnoiseProcessor instance is destroyed, please create another one!');
161
+        }
162
+
163
+        const pcmFrameLength = pcmFrame.length;
164
+
165
+        if (pcmFrameLength !== RNNOISE_SAMPLE_LENGTH) {
166
+            throw new Error(`Rnnoise can only process PCM frames of 480 samples! Input sample was:${pcmFrameLength}`);
167
+        }
168
+
169
+        this._convertTo16BitPCM(pcmFrame);
170
+        this._copyPCMSampleToWasmBuffer(pcmFrame);
171
+
172
+        return this._wasmInterface._rnnoise_process_frame(this._context, this._wasmPcmOutput, this._wasmPcmInput);
173
+    }
174
+}

+ 36
- 0
react/features/stream-effects/rnnoise/index.js 查看文件

@@ -0,0 +1,36 @@
1
+// @flow
2
+
3
+// Script expects to find rnnoise webassembly binary in the same public path root, otherwise it won't load
4
+// During the build phase this needs to be taken care of manually
5
+import rnnoiseWasmInit from 'rnnoise-wasm';
6
+import RnnoiseProcessor from './RnnoiseProcessor';
7
+
8
+export { RNNOISE_SAMPLE_LENGTH } from './RnnoiseProcessor';
9
+export type { RnnoiseProcessor };
10
+
11
+let rnnoiseWasmInterface;
12
+let initializePromise;
13
+
14
+/**
15
+ * Creates a new instance of RnnoiseProcessor.
16
+ *
17
+ * @returns {Promise<RnnoiseProcessor>}
18
+ */
19
+export function createRnnoiseProcessor() {
20
+    if (!initializePromise) {
21
+        initializePromise = new Promise((resolve, reject) => {
22
+            rnnoiseWasmInterface = rnnoiseWasmInit({
23
+                onRuntimeInitialized() {
24
+                    resolve();
25
+                },
26
+                onAbort(reason) {
27
+                    reject(reason);
28
+                }
29
+            });
30
+        });
31
+    }
32
+
33
+    return initializePromise.then(
34
+        () => new RnnoiseProcessor(rnnoiseWasmInterface)
35
+    );
36
+}

+ 258
- 0
react/features/vad-reporter/TrackVADEmitter.js 查看文件

@@ -0,0 +1,258 @@
1
+// @flow
2
+
3
+import { createRnnoiseProcessorPromise, getSampleLength } from '../rnnoise/';
4
+import EventEmitter from 'events';
5
+import JitsiMeetJS from '../base/lib-jitsi-meet';
6
+import logger from './logger';
7
+import { VAD_SCORE_PUBLISHED } from './VADEvents';
8
+
9
+/**
10
+ * The structure used by TrackVADEmitter to relay a score
11
+ */
12
+export type VADScore = {
13
+
14
+    /**
15
+     * Device ID associated with the VAD score
16
+     */
17
+    deviceId: string,
18
+
19
+    /**
20
+     * The PCM score from 0 - 1 i.e. 0.60
21
+     */
22
+    score: number,
23
+
24
+    /**
25
+     * Epoch time at which PCM was recorded
26
+     */
27
+    timestamp: number
28
+
29
+};
30
+
31
+/**
32
+ * Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode.
33
+ * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
34
+ * The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the
35
+ * score is published to consumers via an EventEmitter.
36
+ * After work is done with this service the destroy method needs to be called for a proper cleanup.
37
+ */
38
+export default class TrackVADEmitter extends EventEmitter {
39
+    /**
40
+     * The AudioContext instance.
41
+     */
42
+    _audioContext: AudioContext;
43
+
44
+    /**
45
+     * The MediaStreamAudioSourceNode instance.
46
+     */
47
+    _audioSource: MediaStreamAudioSourceNode;
48
+
49
+    /**
50
+     * The ScriptProcessorNode instance.
51
+     */
52
+    _audioProcessingNode: ScriptProcessorNode;
53
+
54
+    /**
55
+     * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
56
+     */
57
+    _bufferResidue: Float32Array;
58
+
59
+    /**
60
+     * State flag, check if the instance was destroyed
61
+     */
62
+    _destroyed: boolean = false;
63
+
64
+    /**
65
+     * The JitsiLocalTrack instance.
66
+     */
67
+    _localTrack: Object;
68
+
69
+    /**
70
+     * Device ID of the target microphone.
71
+     */
72
+    _micDeviceId: string;
73
+
74
+    /**
75
+     * Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample
76
+     * rate.
77
+     */
78
+    _onAudioProcess: (audioEvent: Object) => void;
79
+
80
+    /**
81
+     * Sample rate of the ScriptProcessorNode.
82
+     */
83
+    _procNodeSampleRate: number;
84
+
85
+    /**
86
+     * Rnnoise adapter that allows us to calculate VAD score for PCM samples
87
+     */
88
+    _rnnoiseProcessor: Object;
89
+
90
+    /**
91
+     * PCM Sample size expected by the RnnoiseProcessor instance.
92
+     */
93
+    _rnnoiseSampleSize: number;
94
+
95
+    /**
96
+     * Constructor.
97
+     *
98
+     * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values  256, 512, 1024,
99
+     *  2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
100
+     * @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score
101
+     * for PCM samples.
102
+     * @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
103
+     */
104
+    constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) {
105
+        super();
106
+        this._procNodeSampleRate = procNodeSampleRate;
107
+        this._rnnoiseProcessor = rnnoiseProcessor;
108
+        this._localTrack = jitsiLocalTrack;
109
+        this._micDeviceId = jitsiLocalTrack.getDeviceId();
110
+        this._bufferResidue = new Float32Array([]);
111
+        this._audioContext = new AudioContext();
112
+        this._rnnoiseSampleSize = getSampleLength();
113
+        this._onAudioProcess = this._onAudioProcess.bind(this);
114
+
115
+        this._initializeAudioContext();
116
+        this._connectAudioGraph();
117
+
118
+        logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`);
119
+    }
120
+
121
+    /**
122
+     * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
123
+     *
124
+     * @param {string} micDeviceId - Target microphone device id.
125
+     * @param {number} procNodeSampleRate - Sample rate of the proc node.
126
+     * @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
127
+     */
128
+    static async create(micDeviceId: string, procNodeSampleRate: number) {
129
+        let rnnoiseProcessor = null;
130
+        let localTrack = null;
131
+
132
+        try {
133
+            logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`);
134
+
135
+            rnnoiseProcessor = await createRnnoiseProcessorPromise();
136
+            localTrack = await JitsiMeetJS.createLocalTracks({
137
+                devices: [ 'audio' ],
138
+                micDeviceId
139
+            });
140
+
141
+            // We only expect one audio track when specifying a device id.
142
+            if (!localTrack[0]) {
143
+                throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
144
+            }
145
+
146
+            return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]);
147
+        } catch (error) {
148
+            logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`);
149
+
150
+            if (rnnoiseProcessor) {
151
+                rnnoiseProcessor.destroy();
152
+            }
153
+
154
+            if (localTrack) {
155
+                localTrack.stopStream();
156
+            }
157
+
158
+            throw error;
159
+        }
160
+    }
161
+
162
+    /**
163
+     * Sets up the audio graph in the AudioContext.
164
+     *
165
+     * @returns {Promise<void>}
166
+     */
167
+    _initializeAudioContext() {
168
+        this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
169
+
170
+        // TODO AudioProcessingNode is deprecated check and replace with alternative.
171
+        // We don't need stereo for determining the VAD score so we create a single chanel processing node.
172
+        this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
173
+        this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
174
+    }
175
+
176
+    /**
177
+     * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
178
+     * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
179
+     * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
180
+     * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.
181
+     *
182
+     * @param {AudioProcessingEvent} audioEvent - Audio event.
183
+     * @returns {void}
184
+     */
185
+    _onAudioProcess(audioEvent: Object) {
186
+        // Prepend the residue PCM buffer from the previous process callback.
187
+        const inData = audioEvent.inputBuffer.getChannelData(0);
188
+        const completeInData = [ ...this._bufferResidue, ...inData ];
189
+        const sampleTimestamp = Date.now();
190
+
191
+        let i = 0;
192
+
193
+        for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) {
194
+            const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize);
195
+            const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample);
196
+
197
+            this.emit(VAD_SCORE_PUBLISHED, {
198
+                timestamp: sampleTimestamp,
199
+                score: vadScore,
200
+                deviceId: this._micDeviceId
201
+            });
202
+        }
203
+
204
+        this._bufferResidue = completeInData.slice(i, completeInData.length);
205
+    }
206
+
207
+    /**
208
+     * Connects the nodes in the AudioContext to start the flow of audio data.
209
+     *
210
+     * @returns {void}
211
+     */
212
+    _connectAudioGraph() {
213
+        this._audioSource.connect(this._audioProcessingNode);
214
+        this._audioProcessingNode.connect(this._audioContext.destination);
215
+    }
216
+
217
+    /**
218
+     * Disconnects the nodes in the AudioContext.
219
+     *
220
+     * @returns {void}
221
+     */
222
+    _disconnectAudioGraph() {
223
+        // Even thought we disconnect the processing node it seems that some callbacks remain queued,
224
+        // resulting in calls with and uninitialized context.
225
+        // eslint-disable-next-line no-empty-function
226
+        this._audioProcessingNode.onaudioprocess = () => {};
227
+        this._audioProcessingNode.disconnect();
228
+        this._audioSource.disconnect();
229
+    }
230
+
231
+    /**
232
+     * Cleanup potentially acquired resources.
233
+     *
234
+     * @returns {void}
235
+     */
236
+    _cleanupResources() {
237
+        logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`);
238
+
239
+        this._disconnectAudioGraph();
240
+        this._localTrack.stopStream();
241
+        this._rnnoiseProcessor.destroy();
242
+    }
243
+
244
+    /**
245
+     * Destroy TrackVADEmitter instance (release resources and stop callbacks).
246
+     *
247
+     * @returns {void}
248
+     */
249
+    destroy() {
250
+        if (this._destroyed) {
251
+            return;
252
+        }
253
+
254
+        logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`);
255
+        this._cleanupResources();
256
+        this._destroyed = true;
257
+    }
258
+}

+ 7
- 0
react/features/vad-reporter/VADEvents.js 查看文件

@@ -0,0 +1,7 @@
1
+// Event generated by a TrackVADEmitter when it emits a VAD score from rnnoise.
2
+// The generated objects are of type VADScore
3
+export const VAD_SCORE_PUBLISHED = 'vad-score-published';
4
+
5
+// Event generated by VADReportingService when if finishes creating a VAD report for the monitored devices.
6
+// The generated objects are of type Array<VADReportScore>, one score for each monitored device.
7
+export const VAD_REPORT_PUBLISHED = 'vad-report-published';

+ 284
- 0
react/features/vad-reporter/VADReportingService.js 查看文件

@@ -0,0 +1,284 @@
1
+// @flow
2
+
3
+import EventEmitter from 'events';
4
+import logger from './logger';
5
+import TrackVADEmitter from './TrackVADEmitter';
6
+import { VAD_SCORE_PUBLISHED, VAD_REPORT_PUBLISHED } from './VADEvents';
7
+import type { VADScore } from './TrackVADEmitter';
8
+export type { VADScore };
9
+
10
+/**
11
+ * Sample rate used by TrackVADEmitter, this value determines how often the ScriptProcessorNode is going to call the
12
+ * process audio function and with what sample size.
13
+ * Basically lower values mean more callbacks with lower processing times bigger values less callbacks with longer
14
+ * processing times. This value is somewhere in the middle, so we strike a balance between flooding with callbacks
15
+ * and processing time. Possible values  256, 512, 1024, 2048, 4096, 8192, 16384. Passing other values will default
16
+ * to closes neighbor.
17
+ */
18
+const SCRIPT_NODE_SAMPLE_RATE = 4096;
19
+
20
+/**
21
+ * Context that contains the emitter and additional information about the device.
22
+ */
23
+type VADDeviceContext = {
24
+
25
+    /**
26
+     * MediaDeviceInfo for associated context
27
+     */
28
+    deviceInfo: MediaDeviceInfo,
29
+
30
+    /**
31
+     * Array with VAD scores publish from the emitter.
32
+     */
33
+    scoreArray: Array<VADScore>,
34
+
35
+    /**
36
+     * TrackVADEmitter associated with media device
37
+     */
38
+    vadEmitter: TrackVADEmitter
39
+};
40
+
41
+/**
42
+ * The structure used by VADReportingService to relay a score report
43
+ */
44
+export type VADReportScore = {
45
+
46
+    /**
47
+     * Device ID associated with the VAD score
48
+     */
49
+    deviceId: string,
50
+
51
+    /**
52
+     * The PCM score from 0 - 1 i.e. 0.60
53
+     */
54
+    score: number,
55
+
56
+    /**
57
+     * Epoch time at which PCM was recorded
58
+     */
59
+    timestamp: number
60
+};
61
+
62
+
63
+/**
64
+ * Voice activity detection reporting service. The service create TrackVADEmitters for the provided devices and
65
+ * publishes an average of their VAD score over the specified interval via EventEmitter.
66
+ * The service is not reusable if destroyed a new one needs to be created, i.e. when a new device is added to the system
67
+ * a new service needs to be created and the old discarded.
68
+ */
69
+export default class VADReportingService extends EventEmitter {
70
+    /**
71
+     * Map containing context for devices currently being monitored by the reporting service.
72
+     */
73
+    _contextMap: Map<string, VADDeviceContext>;
74
+
75
+    /**
76
+     * State flag, check if the instance was destroyed.
77
+     */
78
+    _destroyed: boolean = false;
79
+
80
+    /**
81
+     * Delay at which to publish VAD score for monitored devices.
82
+     */
83
+    _intervalDelay: number;
84
+
85
+    /**
86
+     * Identifier for the interval publishing stats on the set interval.
87
+     */
88
+    _intervalId: ?IntervalID;
89
+
90
+    /**
91
+     * Constructor.
92
+     *
93
+     * @param {number} intervalDelay - Delay at which to publish VAD score for monitored devices.
94
+     * @param {Function} publishScoreCallBack - Function called on the specific interval with the calculated VAD score.
95
+     */
96
+    constructor(intervalDelay: number) {
97
+        super();
98
+        this._contextMap = new Map();
99
+        this._intervalDelay = intervalDelay;
100
+
101
+        logger.log(`Constructed VADReportingService with publish interval of: ${intervalDelay}`);
102
+    }
103
+
104
+    /**
105
+     * Factory methods that creates the TrackVADEmitters for the associated array of devices and instantiates
106
+     * a VADReportingService.
107
+     *
108
+     * @param {Array<MediaDeviceInfo>} micDeviceList - Device list that is monitored inside the service.
109
+     * @param {number} intervalDelay - Delay at which to publish VAD score for monitored devices.
110
+     * @param {Function} publishScoreCallBack - Function called on the specific interval with the calculated VAD score.
111
+     *
112
+     * @returns {Promise<VADReportingService>}
113
+     */
114
+    static create(micDeviceList: Array<MediaDeviceInfo>, intervalDelay: number) {
115
+        const vadReportingService = new VADReportingService(intervalDelay);
116
+        const emitterPromiseArray = [];
117
+
118
+        // Create a TrackVADEmitter for each provided audioinput device.
119
+        for (const micDevice of micDeviceList) {
120
+            if (micDevice.kind !== 'audioinput') {
121
+                logger.warn(`Provided device ${micDevice.label} -> ${micDevice.deviceId}, is not audioinput ignoring!`);
122
+
123
+                return;
124
+            }
125
+
126
+            logger.log(`Initializing VAD context for mic: ${micDevice.label} -> ${micDevice.deviceId}`);
127
+
128
+            const emitterPromise = TrackVADEmitter.create(micDevice.deviceId, SCRIPT_NODE_SAMPLE_RATE).then(emitter => {
129
+                emitter.on(VAD_SCORE_PUBLISHED, vadReportingService._devicePublishVADScore.bind(vadReportingService));
130
+
131
+                return {
132
+                    vadEmitter: emitter,
133
+                    deviceInfo: micDevice,
134
+                    scoreArray: []
135
+                };
136
+            });
137
+
138
+            emitterPromiseArray.push(emitterPromise);
139
+        }
140
+
141
+        // Once all the TrackVADEmitter promises are resolved check if all of them resolved properly if not reject
142
+        // the promise and clear the already created emitters.
143
+        // $FlowFixMe - allSettled is not part of flow prototype even though it's a valid Promise function
144
+        return Promise.allSettled(emitterPromiseArray).then(outcomeArray => {
145
+            const vadContextArray = [];
146
+            const rejectedEmitterPromiseArray = [];
147
+
148
+            for (const outcome of outcomeArray) {
149
+                if (outcome.status === 'fulfilled') {
150
+                    vadContextArray.push(outcome.value);
151
+                } else {
152
+                    // Promise was rejected.
153
+                    logger.error(`Create TrackVADEmitter promise failed with ${outcome.reason}`);
154
+
155
+                    rejectedEmitterPromiseArray.push(outcome);
156
+                }
157
+            }
158
+
159
+            // Check if there were any rejected promises and clear the already created ones list.
160
+            if (rejectedEmitterPromiseArray.length > 0) {
161
+                logger.error('Cleaning up remaining VADDeviceContext, due to create fail!');
162
+
163
+                for (const context of vadContextArray) {
164
+                    context.vadEmitter.destroy();
165
+                }
166
+
167
+                // Reject create promise if one emitter failed to instantiate, we might one just ignore it,
168
+                // leaving it like this for now
169
+                throw new Error('Create VADReportingService failed due to TrackVADEmitter creation issues!');
170
+            }
171
+
172
+            vadReportingService._setVADContextArray(vadContextArray);
173
+            vadReportingService._startPublish();
174
+
175
+            return vadReportingService;
176
+        });
177
+    }
178
+
179
+    /**
180
+     * Destroy TrackVADEmitters and clear the context map.
181
+     *
182
+     * @returns {void}
183
+     */
184
+    _clearContextMap() {
185
+        for (const vadContext of this._contextMap.values()) {
186
+            vadContext.vadEmitter.destroy();
187
+        }
188
+        this._contextMap.clear();
189
+    }
190
+
191
+    /**
192
+     * Set the watched device contexts.
193
+     *
194
+     * @param {Array<VADDeviceContext>} vadContextArray - List of mics.
195
+     * @returns {void}
196
+     */
197
+    _setVADContextArray(vadContextArray: Array<VADDeviceContext>): void {
198
+        for (const vadContext of vadContextArray) {
199
+            this._contextMap.set(vadContext.deviceInfo.deviceId, vadContext);
200
+        }
201
+    }
202
+
203
+    /**
204
+     * Start the setInterval reporting process.
205
+     *
206
+     * @returns {void}.
207
+     */
208
+    _startPublish() {
209
+        logger.log('VADReportingService started publishing.');
210
+        this._intervalId = setInterval(() => {
211
+            this._reportVadScore();
212
+        }, this._intervalDelay);
213
+    }
214
+
215
+    /**
216
+     * Function called at set interval with selected compute. The result will be published on the set callback.
217
+     *
218
+     * @returns {void}
219
+     */
220
+    _reportVadScore() {
221
+        const vadComputeScoreArray = [];
222
+        const computeTimestamp = Date.now();
223
+
224
+        // Go through each device and compute cumulated VAD score.
225
+
226
+        for (const [ deviceId, vadContext ] of this._contextMap) {
227
+            const nrOfVADScores = vadContext.scoreArray.length;
228
+            let vadSum = 0;
229
+
230
+            vadContext.scoreArray.forEach(vadScore => {
231
+                vadSum += vadScore.score;
232
+            });
233
+
234
+            // TODO For now we just calculate the average score for each device, more compute algorithms will be added.
235
+            const avgVAD = vadSum / nrOfVADScores;
236
+
237
+            vadContext.scoreArray = [];
238
+
239
+            vadComputeScoreArray.push({
240
+                timestamp: computeTimestamp,
241
+                score: avgVAD,
242
+                deviceId
243
+            });
244
+        }
245
+
246
+        this.emit(VAD_REPORT_PUBLISHED, vadComputeScoreArray);
247
+    }
248
+
249
+    /**
250
+     * Callback method passed to vad emitters in order to publish their score.
251
+     *
252
+     * @param {VADScore} vadScore - Mic publishing the score.
253
+     * @returns {void}
254
+     */
255
+    _devicePublishVADScore(vadScore: VADScore) {
256
+        const context = this._contextMap.get(vadScore.deviceId);
257
+
258
+        if (context) {
259
+            context.scoreArray.push(vadScore);
260
+        }
261
+    }
262
+
263
+    /**
264
+     * Destroy the VADReportingService, stops the setInterval reporting, destroys the emitters and clears the map.
265
+     * After this call the instance is no longer usable.
266
+     *
267
+     * @returns {void}.
268
+     */
269
+    destroy() {
270
+        if (this._destroyed) {
271
+            return;
272
+        }
273
+
274
+        logger.log('Destroying VADReportingService.');
275
+
276
+        if (this._intervalId) {
277
+            clearInterval(this._intervalId);
278
+            this._intervalId = null;
279
+        }
280
+        this._clearContextMap();
281
+        this._destroyed = true;
282
+    }
283
+
284
+}

+ 5
- 0
react/features/vad-reporter/logger.js 查看文件

@@ -0,0 +1,5 @@
1
+// @flow
2
+
3
+import { getLogger } from '../base/logging/functions';
4
+
5
+export default getLogger('features/vad-reporter');

+ 34
- 3
webpack.config.js 查看文件

@@ -214,16 +214,46 @@ module.exports = [
214 214
         },
215 215
         performance: getPerformanceHints(5 * 1024)
216 216
     }),
217
+
218
+    // Because both video-blur-effect and rnnoise-processor modules are loaded
219
+    // in a lazy manner using the loadScript function with a hard coded name,
220
+    // i.e.loadScript('libs/rnnoise-processor.min.js'), webpack dev server
221
+    // won't know how to properly load them using the default config filename
222
+    // and sourceMapFilename parameters which target libs without .min in dev
223
+    // mode. Thus we change these modules to have the same filename in both
224
+    // prod and dev mode.
217 225
     Object.assign({}, config, {
218 226
         entry: {
219 227
             'video-blur-effect': './react/features/stream-effects/blur/index.js'
220 228
         },
221 229
         output: Object.assign({}, config.output, {
222 230
             library: [ 'JitsiMeetJS', 'app', 'effects' ],
223
-            libraryTarget: 'window'
231
+            libraryTarget: 'window',
232
+            filename: '[name].min.js',
233
+            sourceMapFilename: '[name].min.map'
224 234
         }),
225 235
         performance: getPerformanceHints(1 * 1024 * 1024)
226 236
     }),
237
+
238
+    Object.assign({}, config, {
239
+        entry: {
240
+            'rnnoise-processor': './react/features/stream-effects/rnnoise/index.js'
241
+        },
242
+        node: {
243
+            // Emscripten generated glue code "rnnoise.js" expects node fs module,
244
+            // we need to specify this parameter so webpack knows how to properly
245
+            // interpret it when encountered.
246
+            fs: 'empty'
247
+        },
248
+        output: Object.assign({}, config.output, {
249
+            library: [ 'JitsiMeetJS', 'app', 'effects', 'rnnoise' ],
250
+            libraryTarget: 'window',
251
+            filename: '[name].min.js',
252
+            sourceMapFilename: '[name].min.map'
253
+        }),
254
+        performance: getPerformanceHints(30 * 1024)
255
+    }),
256
+
227 257
     Object.assign({}, config, {
228 258
         entry: {
229 259
             'external_api': './modules/API/external/index.js'
@@ -249,7 +279,8 @@ function devServerProxyBypass({ path }) {
249 279
     if (path.startsWith('/css/') || path.startsWith('/doc/')
250 280
             || path.startsWith('/fonts/') || path.startsWith('/images/')
251 281
             || path.startsWith('/sounds/')
252
-            || path.startsWith('/static/')) {
282
+            || path.startsWith('/static/')
283
+            || path.endsWith('.wasm')) {
253 284
         return path;
254 285
     }
255 286
 
@@ -258,7 +289,7 @@ function devServerProxyBypass({ path }) {
258 289
     /* eslint-disable array-callback-return, indent */
259 290
 
260 291
     if ((Array.isArray(configs) ? configs : Array(configs)).some(c => {
261
-                if (path.startsWith(c.output.publicPath)) {
292
+            if (path.startsWith(c.output.publicPath)) {
262 293
                     if (!minimize) {
263 294
                         // Since webpack-dev-server is serving non-minimized
264 295
                         // artifacts, serve them even if the minimized ones are

Loading…
取消
儲存