|
@@ -1,378 +1,384 @@
|
1
|
|
-// Worker for E2EE/Insertable streams. Currently served as an inline blob.
|
2
|
|
-const code = `
|
3
|
|
- // Polyfill RTCEncoded(Audio|Video)Frame.getMetadata() (not available in M83, available M84+).
|
4
|
|
- // The polyfill can not be done on the prototype since its not exposed in workers. Instead,
|
5
|
|
- // it is done as another transformation to keep it separate.
|
6
|
|
- function polyFillEncodedFrameMetadata(encodedFrame, controller) {
|
7
|
|
- if (!encodedFrame.getMetadata) {
|
|
1
|
+/* global TransformStream */
|
|
2
|
+
|
|
3
|
+// Worker for E2EE/Insertable streams.
|
|
4
|
+//
|
|
5
|
+
|
|
6
|
+/**
|
|
7
|
+ * Polyfill RTCEncoded(Audio|Video)Frame.getMetadata() (not available in M83, available M84+).
|
|
8
|
+ * The polyfill can not be done on the prototype since its not exposed in workers. Instead,
|
|
9
|
+ * it is done as another transformation to keep it separate.
|
|
10
|
+ */
|
|
11
|
+function polyFillEncodedFrameMetadata(encodedFrame, controller) {
|
|
12
|
+ if (!encodedFrame.getMetadata) {
|
8
|
13
|
encodedFrame.getMetadata = function() {
|
9
|
|
- return {
|
10
|
|
- // TODO: provide a more complete polyfill based on additionalData for video.
|
11
|
|
- synchronizationSource: this.synchronizationSource,
|
12
|
|
- contributingSources: this.contributingSources
|
13
|
|
- };
|
|
14
|
+ return {
|
|
15
|
+ // TODO: provide a more complete polyfill based on additionalData for video.
|
|
16
|
+ synchronizationSource: this.synchronizationSource,
|
|
17
|
+ contributingSources: this.contributingSources
|
|
18
|
+ };
|
14
|
19
|
};
|
15
|
|
- }
|
16
|
|
- controller.enqueue(encodedFrame);
|
17
|
20
|
}
|
|
21
|
+ controller.enqueue(encodedFrame);
|
|
22
|
+}
|
|
23
|
+
|
|
24
|
+// We use a ringbuffer of keys so we can change them and still decode packets that were
|
|
25
|
+// encrypted with an old key.
|
|
26
|
+// In the future when we dont rely on a globally shared key we will actually use it. For
|
|
27
|
+// now set the size to 1 which means there is only a single key. This causes some
|
|
28
|
+// glitches when changing the key but its ok.
|
|
29
|
+const keyRingSize = 1;
|
|
30
|
+
|
|
31
|
+// We use a 96 bit IV for AES GCM. This is signalled in plain together with the
|
|
32
|
+// packet. See https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams
|
|
33
|
+const ivLength = 12;
|
|
34
|
+
|
|
35
|
+// We use a 128 bit key for AES GCM.
|
|
36
|
+const keyGenParameters = {
|
|
37
|
+ name: 'AES-GCM',
|
|
38
|
+ length: 128
|
|
39
|
+};
|
|
40
|
+
|
|
41
|
+// We copy the first bytes of the VP8 payload unencrypted.
|
|
42
|
+// For keyframes this is 10 bytes, for non-keyframes (delta) 3. See
|
|
43
|
+// https://tools.ietf.org/html/rfc6386#section-9.1
|
|
44
|
+// This allows the bridge to continue detecting keyframes (only one byte needed in the JVB)
|
|
45
|
+// and is also a bit easier for the VP8 decoder (i.e. it generates funny garbage pictures
|
|
46
|
+// instead of being unable to decode).
|
|
47
|
+// This is a bit for show and we might want to reduce to 1 unconditionally in the final version.
|
|
48
|
+//
|
|
49
|
+// For audio (where frame.type is not set) we do not encrypt the opus TOC byte:
|
|
50
|
+// https://tools.ietf.org/html/rfc6716#section-3.1
|
|
51
|
+const unencryptedBytes = {
|
|
52
|
+ key: 10,
|
|
53
|
+ delta: 3,
|
|
54
|
+ undefined: 1 // frame.type is not set on audio
|
|
55
|
+};
|
|
56
|
+
|
|
57
|
+// Salt used in key derivation
|
|
58
|
+// FIXME: We currently use the MUC room name for this which has the same lifetime
|
|
59
|
+// as this worker. While not (pseudo)random as recommended in
|
|
60
|
+// https://developer.mozilla.org/en-US/docs/Web/API/Pbkdf2Params
|
|
61
|
+// this is easily available and the same for all participants.
|
|
62
|
+// We currently do not enforce a minimum length of 16 bytes either.
|
|
63
|
+let _keySalt;
|
|
64
|
+
|
|
65
|
+// Raw keyBytes used to derive the key.
|
|
66
|
+let _keyBytes;
|
|
67
|
+
|
|
68
|
+/**
|
|
69
|
+ * Derives a AES-GCM key from the input using PBKDF2
|
|
70
|
+ * The key length can be configured above and should be either 128 or 256 bits.
|
|
71
|
+ * @param {Uint8Array} keyBytes - Value to derive key from
|
|
72
|
+ * @param {Uint8Array} salt - Salt used in key derivation
|
|
73
|
+ */
|
|
74
|
+async function deriveKey(keyBytes, salt) {
|
|
75
|
+ // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/importKey
|
|
76
|
+ const material = await crypto.subtle.importKey('raw', keyBytes,
|
|
77
|
+ 'PBKDF2', false, [ 'deriveBits', 'deriveKey' ]);
|
|
78
|
+
|
|
79
|
+ // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/deriveKey#PBKDF2
|
|
80
|
+ return crypto.subtle.deriveKey({
|
|
81
|
+ name: 'PBKDF2',
|
|
82
|
+ salt,
|
|
83
|
+ iterations: 100000,
|
|
84
|
+ hash: 'SHA-256'
|
|
85
|
+ }, material, keyGenParameters, false, [ 'encrypt', 'decrypt' ]);
|
|
86
|
+}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+/** Per-participant context holding the cryptographic keys and
|
|
90
|
+ * encode/decode functions
|
|
91
|
+ */
|
|
92
|
+class Context {
|
|
93
|
+ /**
|
|
94
|
+ * @param {string} id - local muc resourcepart
|
|
95
|
+ */
|
|
96
|
+ constructor(id) {
|
|
97
|
+ // An array (ring) of keys that we use for sending and receiving.
|
|
98
|
+ this._cryptoKeyRing = new Array(keyRingSize);
|
|
99
|
+
|
|
100
|
+ // A pointer to the currently used key.
|
|
101
|
+ this._currentKeyIndex = -1;
|
|
102
|
+
|
|
103
|
+ // We keep track of how many frames we have sent per ssrc.
|
|
104
|
+ // Starts with a random offset similar to the RTP sequence number.
|
|
105
|
+ this._sendCounts = new Map();
|
18
|
106
|
|
19
|
|
- // We use a ringbuffer of keys so we can change them and still decode packets that were
|
20
|
|
- // encrypted with an old key.
|
21
|
|
- // In the future when we dont rely on a globally shared key we will actually use it. For
|
22
|
|
- // now set the size to 1 which means there is only a single key. This causes some
|
23
|
|
- // glitches when changing the key but its ok.
|
24
|
|
- const keyRingSize = 1;
|
25
|
|
-
|
26
|
|
- // We use a 96 bit IV for AES GCM. This is signalled in plain together with the
|
27
|
|
- // packet. See https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams
|
28
|
|
- const ivLength = 12;
|
29
|
|
-
|
30
|
|
- // We use a 128 bit key for AES GCM.
|
31
|
|
- const keyGenParameters = {
|
32
|
|
- name: 'AES-GCM',
|
33
|
|
- length: 128
|
34
|
|
- };
|
35
|
|
-
|
36
|
|
- // We copy the first bytes of the VP8 payload unencrypted.
|
37
|
|
- // For keyframes this is 10 bytes, for non-keyframes (delta) 3. See
|
38
|
|
- // https://tools.ietf.org/html/rfc6386#section-9.1
|
39
|
|
- // This allows the bridge to continue detecting keyframes (only one byte needed in the JVB)
|
40
|
|
- // and is also a bit easier for the VP8 decoder (i.e. it generates funny garbage pictures
|
41
|
|
- // instead of being unable to decode).
|
42
|
|
- // This is a bit for show and we might want to reduce to 1 unconditionally in the final version.
|
43
|
|
- //
|
44
|
|
- // For audio (where frame.type is not set) we do not encrypt the opus TOC byte:
|
45
|
|
- // https://tools.ietf.org/html/rfc6716#section-3.1
|
46
|
|
- const unencryptedBytes = {
|
47
|
|
- key: 10,
|
48
|
|
- delta: 3,
|
49
|
|
- undefined: 1 // frame.type is not set on audio
|
50
|
|
- };
|
51
|
|
-
|
52
|
|
- // Salt used in key derivation
|
53
|
|
- // FIXME: We currently use the MUC room name for this which has the same lifetime
|
54
|
|
- // as this worker. While not (pseudo)random as recommended in
|
55
|
|
- // https://developer.mozilla.org/en-US/docs/Web/API/Pbkdf2Params
|
56
|
|
- // this is easily available and the same for all participants.
|
57
|
|
- // We currently do not enforce a minimum length of 16 bytes either.
|
58
|
|
- let keySalt;
|
59
|
|
-
|
60
|
|
- // Raw keyBytes used to derive the key.
|
61
|
|
- let keyBytes;
|
|
107
|
+ this._id = id;
|
|
108
|
+ }
|
62
|
109
|
|
63
|
110
|
/**
|
64
|
|
- * Derives a AES-GCM key from the input using PBKDF2
|
65
|
|
- * The key length can be configured above and should be either 128 or 256 bits.
|
|
111
|
+ * Derives a per-participant key.
|
66
|
112
|
* @param {Uint8Array} keyBytes - Value to derive key from
|
67
|
113
|
* @param {Uint8Array} salt - Salt used in key derivation
|
68
|
114
|
*/
|
69
|
|
- async function deriveKey(keyBytes, salt) {
|
70
|
|
- // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/importKey
|
71
|
|
- const material = await crypto.subtle.importKey('raw', keyBytes,
|
72
|
|
- 'PBKDF2', false, [ 'deriveBits', 'deriveKey' ]);
|
73
|
|
-
|
74
|
|
- // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/deriveKey#PBKDF2
|
75
|
|
- return crypto.subtle.deriveKey({
|
76
|
|
- name: 'PBKDF2',
|
77
|
|
- salt,
|
78
|
|
- iterations: 100000,
|
79
|
|
- hash: 'SHA-256'
|
80
|
|
- }, material, keyGenParameters, false, [ 'encrypt', 'decrypt' ]);
|
81
|
|
- }
|
|
115
|
+ async deriveKey(keyBytes, salt) {
|
|
116
|
+ const encoder = new TextEncoder();
|
|
117
|
+ const idBytes = encoder.encode(this._id);
|
82
|
118
|
|
|
119
|
+ // Separate both parts by a null byte to avoid ambiguity attacks.
|
|
120
|
+ const participantSalt = new Uint8Array(salt.byteLength + idBytes.byteLength + 1);
|
83
|
121
|
|
84
|
|
- /** Per-participant context holding the cryptographic keys and
|
85
|
|
- * encode/decode functions
|
86
|
|
- */
|
87
|
|
- class Context {
|
88
|
|
- /**
|
89
|
|
- * @param {string} id - local muc resourcepart
|
90
|
|
- */
|
91
|
|
- constructor(id) {
|
92
|
|
- // An array (ring) of keys that we use for sending and receiving.
|
93
|
|
- this._cryptoKeyRing = new Array(keyRingSize);
|
|
122
|
+ participantSalt.set(salt);
|
|
123
|
+ participantSalt.set(idBytes, salt.byteLength + 1);
|
94
|
124
|
|
95
|
|
- // A pointer to the currently used key.
|
96
|
|
- this._currentKeyIndex = -1;
|
97
|
|
-
|
98
|
|
- // We keep track of how many frames we have sent per ssrc.
|
99
|
|
- // Starts with a random offset similar to the RTP sequence number.
|
100
|
|
- this._sendCounts = new Map();
|
|
125
|
+ return deriveKey(keyBytes, participantSalt);
|
|
126
|
+ }
|
101
|
127
|
|
102
|
|
- this._id = id;
|
103
|
|
- }
|
|
128
|
+ /**
|
|
129
|
+ * Sets a key and starts using it for encrypting.
|
|
130
|
+ * @param {CryptoKey} key
|
|
131
|
+ */
|
|
132
|
+ setKey(key) {
|
|
133
|
+ this._currentKeyIndex++;
|
|
134
|
+ this._cryptoKeyRing[this._currentKeyIndex % this._cryptoKeyRing.length] = key;
|
|
135
|
+ }
|
104
|
136
|
|
105
|
|
- /**
|
106
|
|
- * Derives a per-participant key.
|
107
|
|
- * @param {Uint8Array} keyBytes - Value to derive key from
|
108
|
|
- * @param {Uint8Array} salt - Salt used in key derivation
|
109
|
|
- */
|
110
|
|
- async deriveKey(keyBytes, salt) {
|
111
|
|
- const encoder = new TextEncoder();
|
112
|
|
- const idBytes = encoder.encode(this._id);
|
113
|
|
- // Separate both parts by a null byte to avoid ambiguity attacks.
|
114
|
|
- const participantSalt = new Uint8Array(salt.byteLength + idBytes.byteLength + 1);
|
115
|
|
- participantSalt.set(salt);
|
116
|
|
- participantSalt.set(idBytes, salt.byteLength + 1);
|
117
|
|
-
|
118
|
|
- return deriveKey(keyBytes, participantSalt);
|
119
|
|
- }
|
120
|
|
- /**
|
121
|
|
- * Sets a key and starts using it for encrypting.
|
122
|
|
- * @param {CryptoKey} key
|
123
|
|
- */
|
124
|
|
- setKey(key) {
|
125
|
|
- this._currentKeyIndex++;
|
126
|
|
- this._cryptoKeyRing[this._currentKeyIndex % this._cryptoKeyRing.length] = key;
|
|
137
|
+ /**
|
|
138
|
+ * Construct the IV used for AES-GCM and sent (in plain) with the packet similar to
|
|
139
|
+ * https://tools.ietf.org/html/rfc7714#section-8.1
|
|
140
|
+ * It concatenates
|
|
141
|
+ * - the 32 bit synchronization source (SSRC) given on the encoded frame,
|
|
142
|
+ * - the 32 bit rtp timestamp given on the encoded frame,
|
|
143
|
+ * - a send counter that is specific to the SSRC. Starts at a random number.
|
|
144
|
+ * The send counter is essentially the pictureId but we currently have to implement this ourselves.
|
|
145
|
+ * There is no XOR with a salt. Note that this IV leaks the SSRC to the receiver but since this is
|
|
146
|
+ * randomly generated and SFUs may not rewrite this is considered acceptable.
|
|
147
|
+ * The SSRC is used to allow demultiplexing multiple streams with the same key, as described in
|
|
148
|
+ * https://tools.ietf.org/html/rfc3711#section-4.1.1
|
|
149
|
+ * The RTP timestamp is 32 bits and advances by the codec clock rate (90khz for video, 48khz for
|
|
150
|
+ * opus audio) every second. For video it rolls over roughly every 13 hours.
|
|
151
|
+ * The send counter will advance at the frame rate (30fps for video, 50fps for 20ms opus audio)
|
|
152
|
+ * every second. It will take a long time to roll over.
|
|
153
|
+ *
|
|
154
|
+ * See also https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams
|
|
155
|
+ */
|
|
156
|
+ makeIV(synchronizationSource, timestamp) {
|
|
157
|
+ const iv = new ArrayBuffer(ivLength);
|
|
158
|
+ const ivView = new DataView(iv);
|
|
159
|
+
|
|
160
|
+ // having to keep our own send count (similar to a picture id) is not ideal.
|
|
161
|
+ if (!this._sendCounts.has(synchronizationSource)) {
|
|
162
|
+ // Initialize with a random offset, similar to the RTP sequence number.
|
|
163
|
+ this._sendCounts.set(synchronizationSource, Math.floor(Math.random() * 0xFFFF));
|
127
|
164
|
}
|
|
165
|
+ const sendCount = this._sendCounts.get(synchronizationSource);
|
128
|
166
|
|
129
|
|
- /**
|
130
|
|
- * Construct the IV used for AES-GCM and sent (in plain) with the packet similar to
|
131
|
|
- * https://tools.ietf.org/html/rfc7714#section-8.1
|
132
|
|
- * It concatenates
|
133
|
|
- * - the 32 bit synchronization source (SSRC) given on the encoded frame,
|
134
|
|
- * - the 32 bit rtp timestamp given on the encoded frame,
|
135
|
|
- * - a send counter that is specific to the SSRC. Starts at a random number.
|
136
|
|
- * The send counter is essentially the pictureId but we currently have to implement this ourselves.
|
137
|
|
- * There is no XOR with a salt. Note that this IV leaks the SSRC to the receiver but since this is
|
138
|
|
- * randomly generated and SFUs may not rewrite this is considered acceptable.
|
139
|
|
- * The SSRC is used to allow demultiplexing multiple streams with the same key, as described in
|
140
|
|
- * https://tools.ietf.org/html/rfc3711#section-4.1.1
|
141
|
|
- * The RTP timestamp is 32 bits and advances by the codec clock rate (90khz for video, 48khz for
|
142
|
|
- * opus audio) every second. For video it rolls over roughly every 13 hours.
|
143
|
|
- * The send counter will advance at the frame rate (30fps for video, 50fps for 20ms opus audio)
|
144
|
|
- * every second. It will take a long time to roll over.
|
145
|
|
- *
|
146
|
|
- * See also https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams
|
147
|
|
- */
|
148
|
|
- makeIV(synchronizationSource, timestamp) {
|
149
|
|
- const iv = new ArrayBuffer(ivLength);
|
150
|
|
- const ivView = new DataView(iv);
|
151
|
|
-
|
152
|
|
- // having to keep our own send count (similar to a picture id) is not ideal.
|
153
|
|
- if (!this._sendCounts.has(synchronizationSource)) {
|
154
|
|
- // Initialize with a random offset, similar to the RTP sequence number.
|
155
|
|
- this._sendCounts.set(synchronizationSource, Math.floor(Math.random() * 0xFFFF));
|
156
|
|
- }
|
157
|
|
- const sendCount = this._sendCounts.get(synchronizationSource);
|
|
167
|
+ ivView.setUint32(0, synchronizationSource);
|
|
168
|
+ ivView.setUint32(4, timestamp);
|
|
169
|
+ ivView.setUint32(8, sendCount % 0xFFFF);
|
158
|
170
|
|
159
|
|
- ivView.setUint32(0, synchronizationSource);
|
160
|
|
- ivView.setUint32(4, timestamp);
|
161
|
|
- ivView.setUint32(8, sendCount % 0xFFFF);
|
|
171
|
+ this._sendCounts.set(synchronizationSource, sendCount + 1);
|
162
|
172
|
|
163
|
|
- this._sendCounts.set(synchronizationSource, sendCount + 1);
|
|
173
|
+ return iv;
|
|
174
|
+ }
|
164
|
175
|
|
165
|
|
- return iv;
|
|
176
|
+ /**
|
|
177
|
+ * Function that will be injected in a stream and will encrypt the given encoded frames.
|
|
178
|
+ *
|
|
179
|
+ * @param {RTCEncodedVideoFrame|RTCEncodedAudioFrame} encodedFrame - Encoded video frame.
|
|
180
|
+ * @param {TransformStreamDefaultController} controller - TransportStreamController.
|
|
181
|
+ *
|
|
182
|
+ * The packet format is described below. One of the design goals was to not require
|
|
183
|
+ * changes to the SFU which for video requires not encrypting the keyframe bit of VP8
|
|
184
|
+ * as SFUs need to detect a keyframe (framemarking or the generic frame descriptor will
|
|
185
|
+ * solve this eventually). This also "hides" that a client is using E2EE a bit.
|
|
186
|
+ *
|
|
187
|
+ * Note that this operates on the full frame, i.e. for VP8 the data described in
|
|
188
|
+ * https://tools.ietf.org/html/rfc6386#section-9.1
|
|
189
|
+ *
|
|
190
|
+ * The VP8 payload descriptor described in
|
|
191
|
+ * https://tools.ietf.org/html/rfc7741#section-4.2
|
|
192
|
+ * is part of the RTP packet and not part of the frame and is not controllable by us.
|
|
193
|
+ * This is fine as the SFU keeps having access to it for routing.
|
|
194
|
+ *
|
|
195
|
+ * The encrypted frame is formed as follows:
|
|
196
|
+ * 1) Leave the first (10, 3, 1) bytes unencrypted, depending on the frame type and kind.
|
|
197
|
+ * 2) Form the GCM IV for the frame as described above.
|
|
198
|
+ * 3) Encrypt the rest of the frame using AES-GCM.
|
|
199
|
+ * 4) Allocate space for the encrypted frame.
|
|
200
|
+ * 5) Copy the unencrypted bytes to the start of the encrypted frame.
|
|
201
|
+ * 6) Append the ciphertext to the encrypted frame.
|
|
202
|
+ * 7) Append the IV.
|
|
203
|
+ * 8) Append a single byte for the key identifier. TODO: we don't need all the bits.
|
|
204
|
+ * 9) Enqueue the encrypted frame for sending.
|
|
205
|
+ */
|
|
206
|
+ encodeFunction(encodedFrame, controller) {
|
|
207
|
+ const keyIndex = this._currentKeyIndex % this._cryptoKeyRing.length;
|
|
208
|
+
|
|
209
|
+ if (this._cryptoKeyRing[keyIndex]) {
|
|
210
|
+ const iv = this.makeIV(encodedFrame.getMetadata().synchronizationSource, encodedFrame.timestamp);
|
|
211
|
+
|
|
212
|
+ return crypto.subtle.encrypt({
|
|
213
|
+ name: 'AES-GCM',
|
|
214
|
+ iv,
|
|
215
|
+ additionalData: new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])
|
|
216
|
+ }, this._cryptoKeyRing[keyIndex], new Uint8Array(encodedFrame.data,
|
|
217
|
+ unencryptedBytes[encodedFrame.type]))
|
|
218
|
+ .then(cipherText => {
|
|
219
|
+ const newData = new ArrayBuffer(unencryptedBytes[encodedFrame.type] + cipherText.byteLength
|
|
220
|
+ + iv.byteLength + 1);
|
|
221
|
+ const newUint8 = new Uint8Array(newData);
|
|
222
|
+
|
|
223
|
+ newUint8.set(
|
|
224
|
+ new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])); // copy first bytes.
|
|
225
|
+ newUint8.set(
|
|
226
|
+ new Uint8Array(cipherText), unencryptedBytes[encodedFrame.type]); // add ciphertext.
|
|
227
|
+ newUint8.set(
|
|
228
|
+ new Uint8Array(iv), unencryptedBytes[encodedFrame.type] + cipherText.byteLength); // append IV.
|
|
229
|
+ newUint8[unencryptedBytes[encodedFrame.type] + cipherText.byteLength + ivLength]
|
|
230
|
+ = keyIndex; // set key index.
|
|
231
|
+
|
|
232
|
+ encodedFrame.data = newData;
|
|
233
|
+
|
|
234
|
+ return controller.enqueue(encodedFrame);
|
|
235
|
+ }, e => {
|
|
236
|
+ console.error(e);
|
|
237
|
+
|
|
238
|
+ // We are not enqueuing the frame here on purpose.
|
|
239
|
+ });
|
166
|
240
|
}
|
167
|
241
|
|
168
|
|
- /**
|
169
|
|
- * Function that will be injected in a stream and will encrypt the given encoded frames.
|
170
|
|
- *
|
171
|
|
- * @param {RTCEncodedVideoFrame|RTCEncodedAudioFrame} encodedFrame - Encoded video frame.
|
172
|
|
- * @param {TransformStreamDefaultController} controller - TransportStreamController.
|
173
|
|
- *
|
174
|
|
- * The packet format is described below. One of the design goals was to not require
|
175
|
|
- * changes to the SFU which for video requires not encrypting the keyframe bit of VP8
|
176
|
|
- * as SFUs need to detect a keyframe (framemarking or the generic frame descriptor will
|
177
|
|
- * solve this eventually). This also "hides" that a client is using E2EE a bit.
|
178
|
|
- *
|
179
|
|
- * Note that this operates on the full frame, i.e. for VP8 the data described in
|
180
|
|
- * https://tools.ietf.org/html/rfc6386#section-9.1
|
181
|
|
- *
|
182
|
|
- * The VP8 payload descriptor described in
|
183
|
|
- * https://tools.ietf.org/html/rfc7741#section-4.2
|
184
|
|
- * is part of the RTP packet and not part of the frame and is not controllable by us.
|
185
|
|
- * This is fine as the SFU keeps having access to it for routing.
|
186
|
|
- *
|
187
|
|
- * The encrypted frame is formed as follows:
|
188
|
|
- * 1) Leave the first (10, 3, 1) bytes unencrypted, depending on the frame type and kind.
|
189
|
|
- * 2) Form the GCM IV for the frame as described above.
|
190
|
|
- * 3) Encrypt the rest of the frame using AES-GCM.
|
191
|
|
- * 4) Allocate space for the encrypted frame.
|
192
|
|
- * 5) Copy the unencrypted bytes to the start of the encrypted frame.
|
193
|
|
- * 6) Append the ciphertext to the encrypted frame.
|
194
|
|
- * 7) Append the IV.
|
195
|
|
- * 8) Append a single byte for the key identifier. TODO: we don't need all the bits.
|
196
|
|
- * 9) Enqueue the encrypted frame for sending.
|
|
242
|
+ /* NOTE WELL:
|
|
243
|
+ * This will send unencrypted data (only protected by DTLS transport encryption) when no key is configured.
|
|
244
|
+ * This is ok for demo purposes but should not be done once this becomes more relied upon.
|
197
|
245
|
*/
|
198
|
|
- encodeFunction(encodedFrame, controller) {
|
199
|
|
- const keyIndex = this._currentKeyIndex % this._cryptoKeyRing.length;
|
200
|
|
-
|
201
|
|
- if (this._cryptoKeyRing[keyIndex]) {
|
202
|
|
- const iv = this.makeIV(encodedFrame.getMetadata().synchronizationSource, encodedFrame.timestamp);
|
203
|
|
-
|
204
|
|
- return crypto.subtle.encrypt({
|
205
|
|
- name: 'AES-GCM',
|
206
|
|
- iv,
|
207
|
|
- additionalData: new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])
|
208
|
|
- }, this._cryptoKeyRing[keyIndex], new Uint8Array(encodedFrame.data,
|
209
|
|
- unencryptedBytes[encodedFrame.type]))
|
210
|
|
- .then(cipherText => {
|
211
|
|
- const newData = new ArrayBuffer(unencryptedBytes[encodedFrame.type] + cipherText.byteLength
|
212
|
|
- + iv.byteLength + 1);
|
213
|
|
- const newUint8 = new Uint8Array(newData);
|
214
|
|
-
|
215
|
|
- newUint8.set(
|
216
|
|
- new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])); // copy first bytes.
|
217
|
|
- newUint8.set(
|
218
|
|
- new Uint8Array(cipherText), unencryptedBytes[encodedFrame.type]); // add ciphertext.
|
219
|
|
- newUint8.set(
|
220
|
|
- new Uint8Array(iv), unencryptedBytes[encodedFrame.type] + cipherText.byteLength); // append IV.
|
221
|
|
- newUint8[unencryptedBytes[encodedFrame.type] + cipherText.byteLength + ivLength]
|
222
|
|
- = keyIndex; // set key index.
|
223
|
|
-
|
224
|
|
- encodedFrame.data = newData;
|
225
|
|
-
|
226
|
|
- return controller.enqueue(encodedFrame);
|
227
|
|
- }, e => {
|
228
|
|
- console.error(e);
|
229
|
|
-
|
230
|
|
- // We are not enqueuing the frame here on purpose.
|
231
|
|
- });
|
232
|
|
- }
|
233
|
|
-
|
234
|
|
- /* NOTE WELL:
|
235
|
|
- * This will send unencrypted data (only protected by DTLS transport encryption) when no key is configured.
|
236
|
|
- * This is ok for demo purposes but should not be done once this becomes more relied upon.
|
237
|
|
- */
|
238
|
|
- controller.enqueue(encodedFrame);
|
239
|
|
- }
|
|
246
|
+ controller.enqueue(encodedFrame);
|
|
247
|
+ }
|
240
|
248
|
|
241
|
|
- /**
|
242
|
|
- * Function that will be injected in a stream and will decrypt the given encoded frames.
|
243
|
|
- *
|
244
|
|
- * @param {RTCEncodedVideoFrame|RTCEncodedAudioFrame} encodedFrame - Encoded video frame.
|
245
|
|
- * @param {TransformStreamDefaultController} controller - TransportStreamController.
|
246
|
|
- *
|
247
|
|
- * The decrypted frame is formed as follows:
|
248
|
|
- * 1) Extract the key index from the last byte of the encrypted frame.
|
249
|
|
- * If there is no key associated with the key index, the frame is enqueued for decoding
|
250
|
|
- * and these steps terminate.
|
251
|
|
- * 2) Determine the frame type in order to look up the number of unencrypted header bytes.
|
252
|
|
- * 2) Extract the 12-byte IV from its position near the end of the packet.
|
253
|
|
- * Note: the IV is treated as opaque and not reconstructed from the input.
|
254
|
|
- * 3) Decrypt the encrypted frame content after the unencrypted bytes using AES-GCM.
|
255
|
|
- * 4) Allocate space for the decrypted frame.
|
256
|
|
- * 5) Copy the unencrypted bytes from the start of the encrypted frame.
|
257
|
|
- * 6) Append the plaintext to the decrypted frame.
|
258
|
|
- * 7) Enqueue the decrypted frame for decoding.
|
259
|
|
- */
|
260
|
|
- decodeFunction(encodedFrame, controller) {
|
261
|
|
- const data = new Uint8Array(encodedFrame.data);
|
262
|
|
- const keyIndex = data[encodedFrame.data.byteLength - 1];
|
263
|
|
-
|
264
|
|
- if (this._cryptoKeyRing[keyIndex]) {
|
265
|
|
- const iv = new Uint8Array(encodedFrame.data, encodedFrame.data.byteLength - ivLength - 1, ivLength);
|
266
|
|
- const cipherTextStart = unencryptedBytes[encodedFrame.type];
|
267
|
|
- const cipherTextLength = encodedFrame.data.byteLength - (unencryptedBytes[encodedFrame.type]
|
268
|
|
- + ivLength + 1);
|
269
|
|
-
|
270
|
|
- return crypto.subtle.decrypt({
|
271
|
|
- name: 'AES-GCM',
|
272
|
|
- iv,
|
273
|
|
- additionalData: new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])
|
274
|
|
- }, this._cryptoKeyRing[keyIndex], new Uint8Array(encodedFrame.data, cipherTextStart, cipherTextLength))
|
275
|
|
- .then(plainText => {
|
276
|
|
- const newData = new ArrayBuffer(unencryptedBytes[encodedFrame.type] + plainText.byteLength);
|
|
249
|
+ /**
|
|
250
|
+ * Function that will be injected in a stream and will decrypt the given encoded frames.
|
|
251
|
+ *
|
|
252
|
+ * @param {RTCEncodedVideoFrame|RTCEncodedAudioFrame} encodedFrame - Encoded video frame.
|
|
253
|
+ * @param {TransformStreamDefaultController} controller - TransportStreamController.
|
|
254
|
+ *
|
|
255
|
+ * The decrypted frame is formed as follows:
|
|
256
|
+ * 1) Extract the key index from the last byte of the encrypted frame.
|
|
257
|
+ * If there is no key associated with the key index, the frame is enqueued for decoding
|
|
258
|
+ * and these steps terminate.
|
|
259
|
+ * 2) Determine the frame type in order to look up the number of unencrypted header bytes.
|
|
260
|
+ * 2) Extract the 12-byte IV from its position near the end of the packet.
|
|
261
|
+ * Note: the IV is treated as opaque and not reconstructed from the input.
|
|
262
|
+ * 3) Decrypt the encrypted frame content after the unencrypted bytes using AES-GCM.
|
|
263
|
+ * 4) Allocate space for the decrypted frame.
|
|
264
|
+ * 5) Copy the unencrypted bytes from the start of the encrypted frame.
|
|
265
|
+ * 6) Append the plaintext to the decrypted frame.
|
|
266
|
+ * 7) Enqueue the decrypted frame for decoding.
|
|
267
|
+ */
|
|
268
|
+ decodeFunction(encodedFrame, controller) {
|
|
269
|
+ const data = new Uint8Array(encodedFrame.data);
|
|
270
|
+ const keyIndex = data[encodedFrame.data.byteLength - 1];
|
|
271
|
+
|
|
272
|
+ if (this._cryptoKeyRing[keyIndex]) {
|
|
273
|
+ const iv = new Uint8Array(encodedFrame.data, encodedFrame.data.byteLength - ivLength - 1, ivLength);
|
|
274
|
+ const cipherTextStart = unencryptedBytes[encodedFrame.type];
|
|
275
|
+ const cipherTextLength = encodedFrame.data.byteLength - (unencryptedBytes[encodedFrame.type]
|
|
276
|
+ + ivLength + 1);
|
|
277
|
+
|
|
278
|
+ return crypto.subtle.decrypt({
|
|
279
|
+ name: 'AES-GCM',
|
|
280
|
+ iv,
|
|
281
|
+ additionalData: new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])
|
|
282
|
+ }, this._cryptoKeyRing[keyIndex], new Uint8Array(encodedFrame.data, cipherTextStart, cipherTextLength))
|
|
283
|
+ .then(plainText => {
|
|
284
|
+ const newData = new ArrayBuffer(unencryptedBytes[encodedFrame.type] + plainText.byteLength);
|
|
285
|
+ const newUint8 = new Uint8Array(newData);
|
|
286
|
+
|
|
287
|
+ newUint8.set(new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type]));
|
|
288
|
+ newUint8.set(new Uint8Array(plainText), unencryptedBytes[encodedFrame.type]);
|
|
289
|
+
|
|
290
|
+ encodedFrame.data = newData;
|
|
291
|
+
|
|
292
|
+ return controller.enqueue(encodedFrame);
|
|
293
|
+ }, e => {
|
|
294
|
+ console.error(e);
|
|
295
|
+
|
|
296
|
+ // TODO: notify the application about error status.
|
|
297
|
+
|
|
298
|
+ // TODO: For video we need a better strategy since we do not want to based any
|
|
299
|
+ // non-error frames on a garbage keyframe.
|
|
300
|
+ if (encodedFrame.type === undefined) { // audio, replace with silence.
|
|
301
|
+ // audio, replace with silence.
|
|
302
|
+ const newData = new ArrayBuffer(3);
|
277
|
303
|
const newUint8 = new Uint8Array(newData);
|
278
|
304
|
|
279
|
|
- newUint8.set(new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type]));
|
280
|
|
- newUint8.set(new Uint8Array(plainText), unencryptedBytes[encodedFrame.type]);
|
281
|
|
-
|
|
305
|
+ newUint8.set([ 0xd8, 0xff, 0xfe ]); // opus silence frame.
|
282
|
306
|
encodedFrame.data = newData;
|
283
|
|
-
|
284
|
|
- return controller.enqueue(encodedFrame);
|
285
|
|
- }, e => {
|
286
|
|
- console.error(e);
|
287
|
|
-
|
288
|
|
- // TODO: notify the application about error status.
|
289
|
|
-
|
290
|
|
- // TODO: For video we need a better strategy since we do not want to based any
|
291
|
|
- // non-error frames on a garbage keyframe.
|
292
|
|
- if (encodedFrame.type === undefined) { // audio, replace with silence.
|
293
|
|
- // audio, replace with silence.
|
294
|
|
- const newData = new ArrayBuffer(3);
|
295
|
|
- const newUint8 = new Uint8Array(newData);
|
296
|
|
-
|
297
|
|
- newUint8.set([ 0xd8, 0xff, 0xfe ]); // opus silence frame.
|
298
|
|
- encodedFrame.data = newData;
|
299
|
|
- controller.enqueue(encodedFrame);
|
300
|
|
- }
|
301
|
|
- });
|
302
|
|
- } else if (keyIndex >= this._cryptoKeyRing.length
|
303
|
|
- && this._cryptoKeyRing[this._currentKeyIndex % this._cryptoKeyRing.length]) {
|
304
|
|
- // If we are encrypting but don't have a key for the remote drop the frame.
|
305
|
|
- // This is a heuristic since we don't know whether a packet is encrypted,
|
306
|
|
- // do not have a checksum and do not have signaling for whether a remote participant does
|
307
|
|
- // encrypt or not.
|
308
|
|
- return;
|
309
|
|
- }
|
310
|
|
-
|
311
|
|
- // TODO: this just passes through to the decoder. Is that ok? If we don't know the key yet
|
312
|
|
- // we might want to buffer a bit but it is still unclear how to do that (and for how long etc).
|
313
|
|
- controller.enqueue(encodedFrame);
|
|
307
|
+ controller.enqueue(encodedFrame);
|
|
308
|
+ }
|
|
309
|
+ });
|
|
310
|
+ } else if (keyIndex >= this._cryptoKeyRing.length
|
|
311
|
+ && this._cryptoKeyRing[this._currentKeyIndex % this._cryptoKeyRing.length]) {
|
|
312
|
+ // If we are encrypting but don't have a key for the remote drop the frame.
|
|
313
|
+ // This is a heuristic since we don't know whether a packet is encrypted,
|
|
314
|
+ // do not have a checksum and do not have signaling for whether a remote participant does
|
|
315
|
+ // encrypt or not.
|
|
316
|
+ return;
|
314
|
317
|
}
|
315
|
|
- }
|
316
|
|
- const contexts = new Map(); // Map participant id => context
|
317
|
318
|
|
318
|
|
- onmessage = async event => {
|
319
|
|
- const { operation } = event.data;
|
320
|
|
-
|
321
|
|
- if (operation === 'initialize') {
|
322
|
|
- keySalt = event.data.salt;
|
323
|
|
- } else if (operation === 'encode') {
|
324
|
|
- const { readableStream, writableStream, participantId } = event.data;
|
|
319
|
+ // TODO: this just passes through to the decoder. Is that ok? If we don't know the key yet
|
|
320
|
+ // we might want to buffer a bit but it is still unclear how to do that (and for how long etc).
|
|
321
|
+ controller.enqueue(encodedFrame);
|
|
322
|
+ }
|
|
323
|
+}
|
325
|
324
|
|
326
|
|
- if (!contexts.has(participantId)) {
|
327
|
|
- contexts.set(participantId, new Context(participantId));
|
328
|
|
- }
|
329
|
|
- const context = contexts.get(participantId);
|
330
|
|
- const transformStream = new TransformStream({
|
331
|
|
- transform: context.encodeFunction.bind(context)
|
332
|
|
- });
|
|
325
|
+const contexts = new Map(); // Map participant id => context
|
333
|
326
|
|
334
|
|
- readableStream
|
335
|
|
- .pipeThrough(new TransformStream({
|
336
|
|
- transform: polyFillEncodedFrameMetadata, // M83 polyfill.
|
337
|
|
- }))
|
338
|
|
- .pipeThrough(transformStream)
|
339
|
|
- .pipeTo(writableStream);
|
340
|
|
- if (keyBytes) {
|
341
|
|
- context.setKey(await context.deriveKey(keyBytes, keySalt));
|
342
|
|
- }
|
343
|
|
- } else if (operation === 'decode') {
|
344
|
|
- const { readableStream, writableStream, participantId } = event.data;
|
|
327
|
+onmessage = async event => {
|
|
328
|
+ const { operation } = event.data;
|
345
|
329
|
|
346
|
|
- if (!contexts.has(participantId)) {
|
347
|
|
- contexts.set(participantId, new Context(participantId));
|
348
|
|
- }
|
349
|
|
- const context = contexts.get(participantId);
|
350
|
|
- const transformStream = new TransformStream({
|
351
|
|
- transform: context.decodeFunction.bind(context)
|
352
|
|
- });
|
|
330
|
+ if (operation === 'initialize') {
|
|
331
|
+ _keySalt = event.data.salt;
|
|
332
|
+ } else if (operation === 'encode') {
|
|
333
|
+ const { readableStream, writableStream, participantId } = event.data;
|
353
|
334
|
|
354
|
|
- readableStream
|
355
|
|
- .pipeThrough(new TransformStream({
|
356
|
|
- transform: polyFillEncodedFrameMetadata, // M83 polyfill.
|
357
|
|
- }))
|
358
|
|
- .pipeThrough(transformStream)
|
359
|
|
- .pipeTo(writableStream);
|
360
|
|
- if (keyBytes) {
|
361
|
|
- context.setKey(await context.deriveKey(keyBytes, keySalt));
|
362
|
|
- }
|
363
|
|
- } else if (operation === 'setKey') {
|
364
|
|
- keyBytes = event.data.key;
|
365
|
|
- contexts.forEach(async context => {
|
366
|
|
- if (keyBytes) {
|
367
|
|
- context.setKey(await context.deriveKey(keyBytes, keySalt));
|
368
|
|
- } else {
|
369
|
|
- context.setKey(false);
|
370
|
|
- }
|
371
|
|
- });
|
372
|
|
- } else {
|
373
|
|
- console.error('e2ee worker', operation);
|
|
335
|
+ if (!contexts.has(participantId)) {
|
|
336
|
+ contexts.set(participantId, new Context(participantId));
|
374
|
337
|
}
|
375
|
|
- };
|
376
|
|
-`;
|
|
338
|
+ const context = contexts.get(participantId);
|
|
339
|
+ const transformStream = new TransformStream({
|
|
340
|
+ transform: context.encodeFunction.bind(context)
|
|
341
|
+ });
|
|
342
|
+
|
|
343
|
+ readableStream
|
|
344
|
+ .pipeThrough(new TransformStream({
|
|
345
|
+ transform: polyFillEncodedFrameMetadata // M83 polyfill.
|
|
346
|
+ }))
|
|
347
|
+ .pipeThrough(transformStream)
|
|
348
|
+ .pipeTo(writableStream);
|
|
349
|
+ if (_keyBytes) {
|
|
350
|
+ context.setKey(await context.deriveKey(_keyBytes, _keySalt));
|
|
351
|
+ }
|
|
352
|
+ } else if (operation === 'decode') {
|
|
353
|
+ const { readableStream, writableStream, participantId } = event.data;
|
377
|
354
|
|
378
|
|
-export const createWorkerScript = () => URL.createObjectURL(new Blob([ code ], { type: 'application/javascript' }));
|
|
355
|
+ if (!contexts.has(participantId)) {
|
|
356
|
+ contexts.set(participantId, new Context(participantId));
|
|
357
|
+ }
|
|
358
|
+ const context = contexts.get(participantId);
|
|
359
|
+ const transformStream = new TransformStream({
|
|
360
|
+ transform: context.decodeFunction.bind(context)
|
|
361
|
+ });
|
|
362
|
+
|
|
363
|
+ readableStream
|
|
364
|
+ .pipeThrough(new TransformStream({
|
|
365
|
+ transform: polyFillEncodedFrameMetadata // M83 polyfill.
|
|
366
|
+ }))
|
|
367
|
+ .pipeThrough(transformStream)
|
|
368
|
+ .pipeTo(writableStream);
|
|
369
|
+ if (_keyBytes) {
|
|
370
|
+ context.setKey(await context.deriveKey(_keyBytes, _keySalt));
|
|
371
|
+ }
|
|
372
|
+ } else if (operation === 'setKey') {
|
|
373
|
+ _keyBytes = event.data.key;
|
|
374
|
+ contexts.forEach(async context => {
|
|
375
|
+ if (_keyBytes) {
|
|
376
|
+ context.setKey(await context.deriveKey(_keyBytes, _keySalt));
|
|
377
|
+ } else {
|
|
378
|
+ context.setKey(false);
|
|
379
|
+ }
|
|
380
|
+ });
|
|
381
|
+ } else {
|
|
382
|
+ console.error('e2ee worker', operation);
|
|
383
|
+ }
|
|
384
|
+};
|