Parcourir la source

feat(blur) replace BodyPix with TFLite

Use the Google Meet model and SIMD optimized WASM.
master
Tudor D. Pop il y a 4 ans
Parent
révision
946339a52e
Aucun compte lié à l'adresse e-mail de l'auteur

+ 2
- 0
.eslintignore Voir le fichier

@@ -6,6 +6,8 @@ build/*
6 6
 flow-typed/*
7 7
 libs/*
8 8
 
9
+react/features/stream-effects/blur/vendor/*
10
+
9 11
 # ESLint will by default ignore its own configuration file. However, there does
10 12
 # not seem to be a reason why we will want to risk being inconsistent with our
11 13
 # remaining JavaScript source code.

+ 14
- 2
Makefile Voir le fichier

@@ -5,6 +5,8 @@ LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/
5 5
 LIBFLAC_DIR = node_modules/libflacjs/dist/min/
6 6
 OLM_DIR = node_modules/olm
7 7
 RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/
8
+TFLITE_WASM = react/features/stream-effects/blur/vendor/tflite
9
+MEET_MODELS_DIR  = react/features/stream-effects/blur/vendor/models/
8 10
 NODE_SASS = ./node_modules/.bin/sass
9 11
 NPM = npm
10 12
 OUTPUT_DIR = .
@@ -26,7 +28,7 @@ clean:
26 28
 	rm -fr $(BUILD_DIR)
27 29
 
28 30
 .NOTPARALLEL:
29
-deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local
31
+deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local
30 32
 
31 33
 deploy-init:
32 34
 	rm -fr $(DEPLOY_DIR)
@@ -82,6 +84,16 @@ deploy-rnnoise-binary:
82 84
 		$(RNNOISE_WASM_DIR)/rnnoise.wasm \
83 85
 		$(DEPLOY_DIR)
84 86
 
87
+deploy-tflite:
88
+	cp \
89
+		$(TFLITE_WASM)/*.wasm \
90
+		$(DEPLOY_DIR)		
91
+
92
+deploy-meet-models:
93
+	cp \
94
+		$(MEET_MODELS_DIR)/*.tflite \
95
+		$(DEPLOY_DIR)	
96
+
85 97
 deploy-css:
86 98
 	$(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \
87 99
 	$(CLEANCSS) --skip-rebase $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \
@@ -91,7 +103,7 @@ deploy-local:
91 103
 	([ ! -x deploy-local.sh ] || ./deploy-local.sh)
92 104
 
93 105
 .NOTPARALLEL:
94
-dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm
106
+dev: deploy-init deploy-css deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm
95 107
 	$(WEBPACK_DEV_SERVER) --detect-circular-deps
96 108
 
97 109
 source-package:

+ 5
- 5
package-lock.json Voir le fichier

@@ -15491,11 +15491,6 @@
15491 15491
       "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-1.0.2.tgz",
15492 15492
       "integrity": "sha512-MTX+MeG5U994cazkjd/9KNAapsHnibjMLnfXodlkXw76JEea0UiNzrqidzo1emMwk7w5Qhc9jd4Bn9TBb1MFwA=="
15493 15493
     },
15494
-    "stackblur-canvas": {
15495
-      "version": "2.3.0",
15496
-      "resolved": "https://registry.npmjs.org/stackblur-canvas/-/stackblur-canvas-2.3.0.tgz",
15497
-      "integrity": "sha512-3ZHJv+43D8YttgumssIxkfs3hBXW7XaMS5Ux65fOBhKDYMjbG5hF8Ey8a90RiiJ58aQnAhWbGilPzZ9rkIlWgQ=="
15498
-    },
15499 15494
     "stacktrace-parser": {
15500 15495
       "version": "0.1.8",
15501 15496
       "resolved": "https://registry.npmjs.org/stacktrace-parser/-/stacktrace-parser-0.1.8.tgz",
@@ -17071,6 +17066,11 @@
17071 17066
         "loose-envify": "^1.0.0"
17072 17067
       }
17073 17068
     },
17069
+    "wasm-check": {
17070
+      "version": "2.0.1",
17071
+      "resolved": "https://registry.npmjs.org/wasm-check/-/wasm-check-2.0.1.tgz",
17072
+      "integrity": "sha512-5otny2JrfRNKIc+zi1YSOrNxXe47trEQbpY6g/MtHrFwLumKSJyAIobGXH1tlEBezE95eIsmDokBbUZtIZTvvA=="
17073
+    },
17074 17074
     "watchpack": {
17075 17075
       "version": "1.7.2",
17076 17076
       "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.7.2.tgz",

+ 1
- 1
package.json Voir le fichier

@@ -95,10 +95,10 @@
95 95
     "redux-thunk": "2.2.0",
96 96
     "rnnoise-wasm": "github:jitsi/rnnoise-wasm#566a16885897704d6e6d67a1d5ac5d39781db2af",
97 97
     "rtcstats": "github:jitsi/rtcstats#v6.2.0",
98
-    "stackblur-canvas": "2.3.0",
99 98
     "styled-components": "3.4.9",
100 99
     "util": "0.12.1",
101 100
     "uuid": "3.1.0",
101
+    "wasm-check": "2.0.1",
102 102
     "windows-iana": "^3.1.0",
103 103
     "xmldom": "0.1.27",
104 104
     "zxcvbn": "4.4.2"

+ 102
- 50
react/features/stream-effects/blur/JitsiStreamBlurEffect.js Voir le fichier

@@ -1,7 +1,4 @@
1 1
 // @flow
2
-
3
-import * as StackBlur from 'stackblur-canvas';
4
-
5 2
 import {
6 3
     CLEAR_TIMEOUT,
7 4
     TIMEOUT_TICK,
@@ -9,21 +6,27 @@ import {
9 6
     timerWorkerScript
10 7
 } from './TimerWorker';
11 8
 
9
+const segmentationWidth = 256;
10
+const segmentationHeight = 144;
11
+const segmentationPixelCount = segmentationWidth * segmentationHeight;
12
+const blurValue = '25px';
13
+
12 14
 /**
13 15
  * Represents a modified MediaStream that adds blur to video background.
14 16
  * <tt>JitsiStreamBlurEffect</tt> does the processing of the original
15 17
  * video stream.
16 18
  */
17 19
 export default class JitsiStreamBlurEffect {
18
-    _bpModel: Object;
20
+    _model: Object;
19 21
     _inputVideoElement: HTMLVideoElement;
20
-    _inputVideoCanvasElement: HTMLCanvasElement;
21 22
     _onMaskFrameTimer: Function;
22 23
     _maskFrameTimerWorker: Worker;
23
-    _maskInProgress: boolean;
24 24
     _outputCanvasElement: HTMLCanvasElement;
25
+    _outputCanvasCtx: Object;
26
+    _segmentationMaskCtx: Object;
27
+    _segmentationMask: Object;
28
+    _segmentationMaskCanvas: Object;
25 29
     _renderMask: Function;
26
-    _segmentationData: Object;
27 30
     isEnabled: Function;
28 31
     startEffect: Function;
29 32
     stopEffect: Function;
@@ -35,7 +38,7 @@ export default class JitsiStreamBlurEffect {
35 38
      * @param {BodyPix} bpModel - BodyPix model.
36 39
      */
37 40
     constructor(bpModel: Object) {
38
-        this._bpModel = bpModel;
41
+        this._model = bpModel;
39 42
 
40 43
         // Bind event handler so it is only bound once for every instance.
41 44
         this._onMaskFrameTimer = this._onMaskFrameTimer.bind(this);
@@ -44,7 +47,6 @@ export default class JitsiStreamBlurEffect {
44 47
         this._outputCanvasElement = document.createElement('canvas');
45 48
         this._outputCanvasElement.getContext('2d');
46 49
         this._inputVideoElement = document.createElement('video');
47
-        this._inputVideoCanvasElement = document.createElement('canvas');
48 50
     }
49 51
 
50 52
     /**
@@ -61,61 +63,108 @@ export default class JitsiStreamBlurEffect {
61 63
     }
62 64
 
63 65
     /**
64
-     * Loop function to render the background mask.
66
+     * Represents the run post processing.
65 67
      *
66
-     * @private
67 68
      * @returns {void}
68 69
      */
69
-    async _renderMask() {
70
-        if (!this._maskInProgress) {
71
-            this._maskInProgress = true;
72
-            this._bpModel.segmentPerson(this._inputVideoElement, {
73
-                internalResolution: 'low', // resized to 0.5 times of the original resolution before inference
74
-                maxDetections: 1, // max. number of person poses to detect per image
75
-                segmentationThreshold: 0.7, // represents probability that a pixel belongs to a person
76
-                flipHorizontal: false,
77
-                scoreThreshold: 0.2
78
-            }).then(data => {
79
-                this._segmentationData = data;
80
-                this._maskInProgress = false;
81
-            });
82
-        }
83
-        const inputCanvasCtx = this._inputVideoCanvasElement.getContext('2d');
84
-
85
-        inputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
70
+    runPostProcessing() {
71
+        this._outputCanvasCtx.globalCompositeOperation = 'copy';
86 72
 
87
-        const currentFrame = inputCanvasCtx.getImageData(
73
+        // Draw segmentation mask.
74
+        this._outputCanvasCtx.filter = `blur(${blurValue})`;
75
+        this._outputCanvasCtx.drawImage(
76
+            this._segmentationMaskCanvas,
77
+            0,
88 78
             0,
79
+            segmentationWidth,
80
+            segmentationHeight,
89 81
             0,
90
-            this._inputVideoCanvasElement.width,
91
-            this._inputVideoCanvasElement.height
82
+            0,
83
+            this._inputVideoElement.width,
84
+            this._inputVideoElement.height
92 85
         );
93 86
 
94
-        if (this._segmentationData) {
95
-            const blurData = new ImageData(currentFrame.data.slice(), currentFrame.width, currentFrame.height);
96
-
97
-            StackBlur.imageDataRGB(blurData, 0, 0, currentFrame.width, currentFrame.height, 12);
87
+        this._outputCanvasCtx.globalCompositeOperation = 'source-in';
88
+        this._outputCanvasCtx.filter = 'none';
89
+        this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
98 90
 
99
-            for (let x = 0; x < this._outputCanvasElement.width; x++) {
100
-                for (let y = 0; y < this._outputCanvasElement.height; y++) {
101
-                    const n = (y * this._outputCanvasElement.width) + x;
91
+        this._outputCanvasCtx.globalCompositeOperation = 'destination-over';
92
+        this._outputCanvasCtx.filter = `blur(${blurValue})`; // FIXME Does not work on Safari.
93
+        this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
94
+    }
102 95
 
103
-                    if (this._segmentationData.data[n] === 0) {
104
-                        currentFrame.data[n * 4] = blurData.data[n * 4];
105
-                        currentFrame.data[(n * 4) + 1] = blurData.data[(n * 4) + 1];
106
-                        currentFrame.data[(n * 4) + 2] = blurData.data[(n * 4) + 2];
107
-                        currentFrame.data[(n * 4) + 3] = blurData.data[(n * 4) + 3];
108
-                    }
109
-                }
110
-            }
96
+    /**
97
+     * Represents the run Tensorflow Interference.
98
+     *
99
+     * @returns {void}
100
+     */
101
+    runInference() {
102
+        this._model._runInference();
103
+        const outputMemoryOffset = this._model._getOutputMemoryOffset() / 4;
104
+
105
+        for (let i = 0; i < segmentationPixelCount; i++) {
106
+            const background = this._model.HEAPF32[outputMemoryOffset + (i * 2)];
107
+            const person = this._model.HEAPF32[outputMemoryOffset + (i * 2) + 1];
108
+            const shift = Math.max(background, person);
109
+            const backgroundExp = Math.exp(background - shift);
110
+            const personExp = Math.exp(person - shift);
111
+
112
+            // Sets only the alpha component of each pixel.
113
+            this._segmentationMask.data[(i * 4) + 3] = (255 * personExp) / (backgroundExp + personExp);
111 114
         }
112
-        this._outputCanvasElement.getContext('2d').putImageData(currentFrame, 0, 0);
115
+        this._segmentationMaskCtx.putImageData(this._segmentationMask, 0, 0);
116
+    }
117
+
118
+    /**
119
+     * Loop function to render the background mask.
120
+     *
121
+     * @private
122
+     * @returns {void}
123
+     */
124
+    _renderMask() {
125
+        this.resizeSource();
126
+        this.runInference();
127
+        this.runPostProcessing();
128
+
113 129
         this._maskFrameTimerWorker.postMessage({
114 130
             id: SET_TIMEOUT,
115 131
             timeMs: 1000 / 30
116 132
         });
117 133
     }
118 134
 
135
+    /**
136
+     * Represents the resize source process.
137
+     *
138
+     * @returns {void}
139
+     */
140
+    resizeSource() {
141
+        this._segmentationMaskCtx.drawImage(
142
+            this._inputVideoElement,
143
+            0,
144
+            0,
145
+            this._inputVideoElement.width,
146
+            this._inputVideoElement.height,
147
+            0,
148
+            0,
149
+            segmentationWidth,
150
+            segmentationHeight
151
+        );
152
+
153
+        const imageData = this._segmentationMaskCtx.getImageData(
154
+            0,
155
+            0,
156
+            segmentationWidth,
157
+            segmentationHeight
158
+        );
159
+        const inputMemoryOffset = this._model._getInputMemoryOffset() / 4;
160
+
161
+        for (let i = 0; i < segmentationPixelCount; i++) {
162
+            this._model.HEAPF32[inputMemoryOffset + (i * 3)] = imageData.data[i * 4] / 255;
163
+            this._model.HEAPF32[inputMemoryOffset + (i * 3) + 1] = imageData.data[(i * 4) + 1] / 255;
164
+            this._model.HEAPF32[inputMemoryOffset + (i * 3) + 2] = imageData.data[(i * 4) + 2] / 255;
165
+        }
166
+    }
167
+
119 168
     /**
120 169
      * Checks if the local track supports this effect.
121 170
      *
@@ -136,15 +185,18 @@ export default class JitsiStreamBlurEffect {
136 185
     startEffect(stream: MediaStream) {
137 186
         this._maskFrameTimerWorker = new Worker(timerWorkerScript, { name: 'Blur effect worker' });
138 187
         this._maskFrameTimerWorker.onmessage = this._onMaskFrameTimer;
139
-
140 188
         const firstVideoTrack = stream.getVideoTracks()[0];
141 189
         const { height, frameRate, width }
142 190
             = firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints();
143 191
 
192
+        this._segmentationMask = new ImageData(segmentationWidth, segmentationHeight);
193
+        this._segmentationMaskCanvas = document.createElement('canvas');
194
+        this._segmentationMaskCanvas.width = segmentationWidth;
195
+        this._segmentationMaskCanvas.height = segmentationHeight;
196
+        this._segmentationMaskCtx = this._segmentationMaskCanvas.getContext('2d');
144 197
         this._outputCanvasElement.width = parseInt(width, 10);
145 198
         this._outputCanvasElement.height = parseInt(height, 10);
146
-        this._inputVideoCanvasElement.width = parseInt(width, 10);
147
-        this._inputVideoCanvasElement.height = parseInt(height, 10);
199
+        this._outputCanvasCtx = this._outputCanvasElement.getContext('2d');
148 200
         this._inputVideoElement.width = parseInt(width, 10);
149 201
         this._inputVideoElement.height = parseInt(height, 10);
150 202
         this._inputVideoElement.autoplay = true;

+ 26
- 10
react/features/stream-effects/blur/index.js Voir le fichier

@@ -1,8 +1,15 @@
1 1
 // @flow
2 2
 
3
-import * as bodyPix from '@tensorflow-models/body-pix';
3
+import * as wasmCheck from 'wasm-check';
4 4
 
5 5
 import JitsiStreamBlurEffect from './JitsiStreamBlurEffect';
6
+import createTFLiteModule from './vendor/tflite/tflite';
7
+import createTFLiteSIMDModule from './vendor/tflite/tflite-simd';
8
+
9
+const models = {
10
+    '96': '/libs/segm_lite_v681.tflite',
11
+    '144': '/libs/segm_full_v679.tflite'
12
+};
6 13
 
7 14
 /**
8 15
  * Creates a new instance of JitsiStreamBlurEffect. This loads the bodyPix model that is used to
@@ -14,15 +21,24 @@ export async function createBlurEffect() {
14 21
     if (!MediaStreamTrack.prototype.getSettings && !MediaStreamTrack.prototype.getConstraints) {
15 22
         throw new Error('JitsiStreamBlurEffect not supported!');
16 23
     }
24
+    let tflite;
25
+
26
+    if (wasmCheck.feature.simd) {
27
+        tflite = await createTFLiteSIMDModule();
28
+    } else {
29
+        tflite = await createTFLiteModule();
30
+    }
31
+
32
+    const modelBufferOffset = tflite._getModelBufferMemoryOffset();
33
+    const modelResponse = await fetch(
34
+        models['144']
35
+    );
36
+
37
+    const model = await modelResponse.arrayBuffer();
38
+
39
+    tflite.HEAPU8.set(new Uint8Array(model), modelBufferOffset);
17 40
 
18
-    // An output stride of 16 and a multiplier of 0.5 are used for improved
19
-    // performance on a larger range of CPUs.
20
-    const bpModel = await bodyPix.load({
21
-        architecture: 'MobileNetV1',
22
-        outputStride: 16,
23
-        multiplier: 0.50,
24
-        quantBytes: 2
25
-    });
41
+    tflite._loadModel(model.byteLength);
26 42
 
27
-    return new JitsiStreamBlurEffect(bpModel);
43
+    return new JitsiStreamBlurEffect(tflite);
28 44
 }

+ 24
- 0
react/features/stream-effects/blur/vendor/README.md Voir le fichier

@@ -0,0 +1,24 @@
1
+# Virtual Background on stream effects
2
+
3
+> Inspired from https://ai.googleblog.com/2020/10/background-features-in-google-meet.html and https://github.com/Volcomix/virtual-background.git
4
+
5
+#### Canvas 2D + CPU
6
+
7
+This rendering pipeline is pretty much the same as for BodyPix. It relies on Canvas compositing properties to blend rendering layers according to the segmentation mask.
8
+
9
+Interactions with TFLite inference tool are executed on CPU to convert from UInt8 to Float32 for the model input and to apply softmax on the model output.
10
+
11
+The framerate is higher and the quality looks better than BodyPix
12
+
13
+#### SIMD and non-SIMD
14
+
15
+How to test on SIMD:
16
+1. Go to chrome://flags/
17
+2. Search for SIMD flag
18
+3. Enable WebAssembly SIMD support(Enables support for the WebAssembly SIMD proposal).
19
+4. Reopen Google Chrome
20
+
21
+More details:
22
+- [WebAssembly](https://webassembly.org/)
23
+- [WebAssembly SIMD](https://github.com/WebAssembly/simd)
24
+- [TFLite](https://blog.tensorflow.org/2020/07/accelerating-tensorflow-lite-xnnpack-integration.html)

BIN
react/features/stream-effects/blur/vendor/models/segm_full_v679.tflite Voir le fichier


BIN
react/features/stream-effects/blur/vendor/models/segm_lite_v681.tflite Voir le fichier


+ 21
- 0
react/features/stream-effects/blur/vendor/tflite/tflite-simd.js
Fichier diff supprimé car celui-ci est trop grand
Voir le fichier


BIN
react/features/stream-effects/blur/vendor/tflite/tflite-simd.wasm Voir le fichier


+ 21
- 0
react/features/stream-effects/blur/vendor/tflite/tflite.js
Fichier diff supprimé car celui-ci est trop grand
Voir le fichier


BIN
react/features/stream-effects/blur/vendor/tflite/tflite.wasm Voir le fichier


+ 2
- 1
react/features/toolbox/components/web/Toolbox.js Voir le fichier

@@ -1,6 +1,7 @@
1 1
 // @flow
2 2
 
3 3
 import React, { Component } from 'react';
4
+import * as wasmCheck from 'wasm-check';
4 5
 
5 6
 import {
6 7
     ACTION_SHORTCUT_TRIGGERED,
@@ -1069,7 +1070,7 @@ class Toolbox extends Component<Props, State> {
1069 1070
                 && <VideoBlurButton
1070 1071
                     key = 'videobackgroundblur'
1071 1072
                     showLabel = { true }
1072
-                    visible = { !_screensharing } />,
1073
+                    visible = { !_screensharing && wasmCheck.feature.simd } />,
1073 1074
             this._shouldShowButton('settings')
1074 1075
                 && <SettingsButton
1075 1076
                     key = 'settings'

Chargement…
Annuler
Enregistrer