modified lib-jitsi-meet dev repo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Worker.js 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /* global TransformStream */
  2. // Worker for E2EE/Insertable streams.
  3. //
  4. /**
  5. * Polyfill RTCEncoded(Audio|Video)Frame.getMetadata() (not available in M83, available M84+).
  6. * The polyfill can not be done on the prototype since its not exposed in workers. Instead,
  7. * it is done as another transformation to keep it separate.
  8. */
  9. function polyFillEncodedFrameMetadata(encodedFrame, controller) {
  10. if (!encodedFrame.getMetadata) {
  11. encodedFrame.getMetadata = function() {
  12. return {
  13. // TODO: provide a more complete polyfill based on additionalData for video.
  14. synchronizationSource: this.synchronizationSource,
  15. contributingSources: this.contributingSources
  16. };
  17. };
  18. }
  19. controller.enqueue(encodedFrame);
  20. }
  21. // We use a ringbuffer of keys so we can change them and still decode packets that were
  22. // encrypted with an old key.
  23. // In the future when we dont rely on a globally shared key we will actually use it. For
  24. // now set the size to 1 which means there is only a single key. This causes some
  25. // glitches when changing the key but its ok.
  26. const keyRingSize = 1;
  27. // We use a 96 bit IV for AES GCM. This is signalled in plain together with the
  28. // packet. See https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams
  29. const ivLength = 12;
  30. // We use a 128 bit key for AES GCM.
  31. const keyGenParameters = {
  32. name: 'AES-GCM',
  33. length: 128
  34. };
  35. // We copy the first bytes of the VP8 payload unencrypted.
  36. // For keyframes this is 10 bytes, for non-keyframes (delta) 3. See
  37. // https://tools.ietf.org/html/rfc6386#section-9.1
  38. // This allows the bridge to continue detecting keyframes (only one byte needed in the JVB)
  39. // and is also a bit easier for the VP8 decoder (i.e. it generates funny garbage pictures
  40. // instead of being unable to decode).
  41. // This is a bit for show and we might want to reduce to 1 unconditionally in the final version.
  42. //
  43. // For audio (where frame.type is not set) we do not encrypt the opus TOC byte:
  44. // https://tools.ietf.org/html/rfc6716#section-3.1
  45. const unencryptedBytes = {
  46. key: 10,
  47. delta: 3,
  48. undefined: 1 // frame.type is not set on audio
  49. };
  50. // Salt used in key derivation
  51. // FIXME: We currently use the MUC room name for this which has the same lifetime
  52. // as this worker. While not (pseudo)random as recommended in
  53. // https://developer.mozilla.org/en-US/docs/Web/API/Pbkdf2Params
  54. // this is easily available and the same for all participants.
  55. // We currently do not enforce a minimum length of 16 bytes either.
  56. let _keySalt;
  57. // Raw keyBytes used to derive the key.
  58. let _keyBytes;
  59. /**
  60. * Derives a AES-GCM key from the input using PBKDF2
  61. * The key length can be configured above and should be either 128 or 256 bits.
  62. * @param {Uint8Array} keyBytes - Value to derive key from
  63. * @param {Uint8Array} salt - Salt used in key derivation
  64. */
  65. async function deriveKey(keyBytes, salt) {
  66. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/importKey
  67. const material = await crypto.subtle.importKey('raw', keyBytes,
  68. 'PBKDF2', false, [ 'deriveBits', 'deriveKey' ]);
  69. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/deriveKey#PBKDF2
  70. return crypto.subtle.deriveKey({
  71. name: 'PBKDF2',
  72. salt,
  73. iterations: 100000,
  74. hash: 'SHA-256'
  75. }, material, keyGenParameters, false, [ 'encrypt', 'decrypt' ]);
  76. }
  77. /** Per-participant context holding the cryptographic keys and
  78. * encode/decode functions
  79. */
  80. class Context {
  81. /**
  82. * @param {string} id - local muc resourcepart
  83. */
  84. constructor(id) {
  85. // An array (ring) of keys that we use for sending and receiving.
  86. this._cryptoKeyRing = new Array(keyRingSize);
  87. // A pointer to the currently used key.
  88. this._currentKeyIndex = -1;
  89. // We keep track of how many frames we have sent per ssrc.
  90. // Starts with a random offset similar to the RTP sequence number.
  91. this._sendCounts = new Map();
  92. this._id = id;
  93. }
  94. /**
  95. * Derives a per-participant key.
  96. * @param {Uint8Array} keyBytes - Value to derive key from
  97. * @param {Uint8Array} salt - Salt used in key derivation
  98. */
  99. async deriveKey(keyBytes, salt) {
  100. const encoder = new TextEncoder();
  101. const idBytes = encoder.encode(this._id);
  102. // Separate both parts by a null byte to avoid ambiguity attacks.
  103. const participantSalt = new Uint8Array(salt.byteLength + idBytes.byteLength + 1);
  104. participantSalt.set(salt);
  105. participantSalt.set(idBytes, salt.byteLength + 1);
  106. return deriveKey(keyBytes, participantSalt);
  107. }
  108. /**
  109. * Sets a key and starts using it for encrypting.
  110. * @param {CryptoKey} key
  111. */
  112. setKey(key) {
  113. this._currentKeyIndex++;
  114. this._cryptoKeyRing[this._currentKeyIndex % this._cryptoKeyRing.length] = key;
  115. }
  116. /**
  117. * Construct the IV used for AES-GCM and sent (in plain) with the packet similar to
  118. * https://tools.ietf.org/html/rfc7714#section-8.1
  119. * It concatenates
  120. * - the 32 bit synchronization source (SSRC) given on the encoded frame,
  121. * - the 32 bit rtp timestamp given on the encoded frame,
  122. * - a send counter that is specific to the SSRC. Starts at a random number.
  123. * The send counter is essentially the pictureId but we currently have to implement this ourselves.
  124. * There is no XOR with a salt. Note that this IV leaks the SSRC to the receiver but since this is
  125. * randomly generated and SFUs may not rewrite this is considered acceptable.
  126. * The SSRC is used to allow demultiplexing multiple streams with the same key, as described in
  127. * https://tools.ietf.org/html/rfc3711#section-4.1.1
  128. * The RTP timestamp is 32 bits and advances by the codec clock rate (90khz for video, 48khz for
  129. * opus audio) every second. For video it rolls over roughly every 13 hours.
  130. * The send counter will advance at the frame rate (30fps for video, 50fps for 20ms opus audio)
  131. * every second. It will take a long time to roll over.
  132. *
  133. * See also https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams
  134. */
  135. makeIV(synchronizationSource, timestamp) {
  136. const iv = new ArrayBuffer(ivLength);
  137. const ivView = new DataView(iv);
  138. // having to keep our own send count (similar to a picture id) is not ideal.
  139. if (!this._sendCounts.has(synchronizationSource)) {
  140. // Initialize with a random offset, similar to the RTP sequence number.
  141. this._sendCounts.set(synchronizationSource, Math.floor(Math.random() * 0xFFFF));
  142. }
  143. const sendCount = this._sendCounts.get(synchronizationSource);
  144. ivView.setUint32(0, synchronizationSource);
  145. ivView.setUint32(4, timestamp);
  146. ivView.setUint32(8, sendCount % 0xFFFF);
  147. this._sendCounts.set(synchronizationSource, sendCount + 1);
  148. return iv;
  149. }
  150. /**
  151. * Function that will be injected in a stream and will encrypt the given encoded frames.
  152. *
  153. * @param {RTCEncodedVideoFrame|RTCEncodedAudioFrame} encodedFrame - Encoded video frame.
  154. * @param {TransformStreamDefaultController} controller - TransportStreamController.
  155. *
  156. * The packet format is described below. One of the design goals was to not require
  157. * changes to the SFU which for video requires not encrypting the keyframe bit of VP8
  158. * as SFUs need to detect a keyframe (framemarking or the generic frame descriptor will
  159. * solve this eventually). This also "hides" that a client is using E2EE a bit.
  160. *
  161. * Note that this operates on the full frame, i.e. for VP8 the data described in
  162. * https://tools.ietf.org/html/rfc6386#section-9.1
  163. *
  164. * The VP8 payload descriptor described in
  165. * https://tools.ietf.org/html/rfc7741#section-4.2
  166. * is part of the RTP packet and not part of the frame and is not controllable by us.
  167. * This is fine as the SFU keeps having access to it for routing.
  168. *
  169. * The encrypted frame is formed as follows:
  170. * 1) Leave the first (10, 3, 1) bytes unencrypted, depending on the frame type and kind.
  171. * 2) Form the GCM IV for the frame as described above.
  172. * 3) Encrypt the rest of the frame using AES-GCM.
  173. * 4) Allocate space for the encrypted frame.
  174. * 5) Copy the unencrypted bytes to the start of the encrypted frame.
  175. * 6) Append the ciphertext to the encrypted frame.
  176. * 7) Append the IV.
  177. * 8) Append a single byte for the key identifier. TODO: we don't need all the bits.
  178. * 9) Enqueue the encrypted frame for sending.
  179. */
  180. encodeFunction(encodedFrame, controller) {
  181. const keyIndex = this._currentKeyIndex % this._cryptoKeyRing.length;
  182. if (this._cryptoKeyRing[keyIndex]) {
  183. const iv = this.makeIV(encodedFrame.getMetadata().synchronizationSource, encodedFrame.timestamp);
  184. return crypto.subtle.encrypt({
  185. name: 'AES-GCM',
  186. iv,
  187. additionalData: new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])
  188. }, this._cryptoKeyRing[keyIndex], new Uint8Array(encodedFrame.data,
  189. unencryptedBytes[encodedFrame.type]))
  190. .then(cipherText => {
  191. const newData = new ArrayBuffer(unencryptedBytes[encodedFrame.type] + cipherText.byteLength
  192. + iv.byteLength + 1);
  193. const newUint8 = new Uint8Array(newData);
  194. newUint8.set(
  195. new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])); // copy first bytes.
  196. newUint8.set(
  197. new Uint8Array(cipherText), unencryptedBytes[encodedFrame.type]); // add ciphertext.
  198. newUint8.set(
  199. new Uint8Array(iv), unencryptedBytes[encodedFrame.type] + cipherText.byteLength); // append IV.
  200. newUint8[unencryptedBytes[encodedFrame.type] + cipherText.byteLength + ivLength]
  201. = keyIndex; // set key index.
  202. encodedFrame.data = newData;
  203. return controller.enqueue(encodedFrame);
  204. }, e => {
  205. console.error(e);
  206. // We are not enqueuing the frame here on purpose.
  207. });
  208. }
  209. /* NOTE WELL:
  210. * This will send unencrypted data (only protected by DTLS transport encryption) when no key is configured.
  211. * This is ok for demo purposes but should not be done once this becomes more relied upon.
  212. */
  213. controller.enqueue(encodedFrame);
  214. }
  215. /**
  216. * Function that will be injected in a stream and will decrypt the given encoded frames.
  217. *
  218. * @param {RTCEncodedVideoFrame|RTCEncodedAudioFrame} encodedFrame - Encoded video frame.
  219. * @param {TransformStreamDefaultController} controller - TransportStreamController.
  220. *
  221. * The decrypted frame is formed as follows:
  222. * 1) Extract the key index from the last byte of the encrypted frame.
  223. * If there is no key associated with the key index, the frame is enqueued for decoding
  224. * and these steps terminate.
  225. * 2) Determine the frame type in order to look up the number of unencrypted header bytes.
  226. * 2) Extract the 12-byte IV from its position near the end of the packet.
  227. * Note: the IV is treated as opaque and not reconstructed from the input.
  228. * 3) Decrypt the encrypted frame content after the unencrypted bytes using AES-GCM.
  229. * 4) Allocate space for the decrypted frame.
  230. * 5) Copy the unencrypted bytes from the start of the encrypted frame.
  231. * 6) Append the plaintext to the decrypted frame.
  232. * 7) Enqueue the decrypted frame for decoding.
  233. */
  234. decodeFunction(encodedFrame, controller) {
  235. const data = new Uint8Array(encodedFrame.data);
  236. const keyIndex = data[encodedFrame.data.byteLength - 1];
  237. if (this._cryptoKeyRing[keyIndex]) {
  238. const iv = new Uint8Array(encodedFrame.data, encodedFrame.data.byteLength - ivLength - 1, ivLength);
  239. const cipherTextStart = unencryptedBytes[encodedFrame.type];
  240. const cipherTextLength = encodedFrame.data.byteLength - (unencryptedBytes[encodedFrame.type]
  241. + ivLength + 1);
  242. return crypto.subtle.decrypt({
  243. name: 'AES-GCM',
  244. iv,
  245. additionalData: new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])
  246. }, this._cryptoKeyRing[keyIndex], new Uint8Array(encodedFrame.data, cipherTextStart, cipherTextLength))
  247. .then(plainText => {
  248. const newData = new ArrayBuffer(unencryptedBytes[encodedFrame.type] + plainText.byteLength);
  249. const newUint8 = new Uint8Array(newData);
  250. newUint8.set(new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type]));
  251. newUint8.set(new Uint8Array(plainText), unencryptedBytes[encodedFrame.type]);
  252. encodedFrame.data = newData;
  253. return controller.enqueue(encodedFrame);
  254. }, e => {
  255. console.error(e);
  256. // TODO: notify the application about error status.
  257. // TODO: For video we need a better strategy since we do not want to based any
  258. // non-error frames on a garbage keyframe.
  259. if (encodedFrame.type === undefined) { // audio, replace with silence.
  260. // audio, replace with silence.
  261. const newData = new ArrayBuffer(3);
  262. const newUint8 = new Uint8Array(newData);
  263. newUint8.set([ 0xd8, 0xff, 0xfe ]); // opus silence frame.
  264. encodedFrame.data = newData;
  265. controller.enqueue(encodedFrame);
  266. }
  267. });
  268. } else if (keyIndex >= this._cryptoKeyRing.length
  269. && this._cryptoKeyRing[this._currentKeyIndex % this._cryptoKeyRing.length]) {
  270. // If we are encrypting but don't have a key for the remote drop the frame.
  271. // This is a heuristic since we don't know whether a packet is encrypted,
  272. // do not have a checksum and do not have signaling for whether a remote participant does
  273. // encrypt or not.
  274. return;
  275. }
  276. // TODO: this just passes through to the decoder. Is that ok? If we don't know the key yet
  277. // we might want to buffer a bit but it is still unclear how to do that (and for how long etc).
  278. controller.enqueue(encodedFrame);
  279. }
  280. }
  281. const contexts = new Map(); // Map participant id => context
  282. onmessage = async event => {
  283. const { operation } = event.data;
  284. if (operation === 'initialize') {
  285. _keySalt = event.data.salt;
  286. } else if (operation === 'encode') {
  287. const { readableStream, writableStream, participantId } = event.data;
  288. if (!contexts.has(participantId)) {
  289. contexts.set(participantId, new Context(participantId));
  290. }
  291. const context = contexts.get(participantId);
  292. const transformStream = new TransformStream({
  293. transform: context.encodeFunction.bind(context)
  294. });
  295. readableStream
  296. .pipeThrough(new TransformStream({
  297. transform: polyFillEncodedFrameMetadata // M83 polyfill.
  298. }))
  299. .pipeThrough(transformStream)
  300. .pipeTo(writableStream);
  301. if (_keyBytes) {
  302. context.setKey(await context.deriveKey(_keyBytes, _keySalt));
  303. }
  304. } else if (operation === 'decode') {
  305. const { readableStream, writableStream, participantId } = event.data;
  306. if (!contexts.has(participantId)) {
  307. contexts.set(participantId, new Context(participantId));
  308. }
  309. const context = contexts.get(participantId);
  310. const transformStream = new TransformStream({
  311. transform: context.decodeFunction.bind(context)
  312. });
  313. readableStream
  314. .pipeThrough(new TransformStream({
  315. transform: polyFillEncodedFrameMetadata // M83 polyfill.
  316. }))
  317. .pipeThrough(transformStream)
  318. .pipeTo(writableStream);
  319. if (_keyBytes) {
  320. context.setKey(await context.deriveKey(_keyBytes, _keySalt));
  321. }
  322. } else if (operation === 'setKey') {
  323. _keyBytes = event.data.key;
  324. contexts.forEach(async context => {
  325. if (_keyBytes) {
  326. context.setKey(await context.deriveKey(_keyBytes, _keySalt));
  327. } else {
  328. context.setKey(false);
  329. }
  330. });
  331. } else {
  332. console.error('e2ee worker', operation);
  333. }
  334. };