You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

E2EEContext.js 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. /* global __filename, TransformStream */
  2. import { getLogger } from 'jitsi-meet-logger';
  3. const logger = getLogger(__filename);
  4. // We use a ringbuffer of keys so we can change them and still decode packets that were
  5. // encrypted with an old key.
  6. // In the future when we dont rely on a globally shared key we will actually use it. For
  7. // now set the size to 1 which means there is only a single key. This causes some
  8. // glitches when changing the key but its ok.
  9. const keyRingSize = 1;
  10. // We use a 96 bit IV for AES GCM. This is signalled in plain together with the
  11. // packet. See https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams
  12. const ivLength = 12;
  13. // We copy the first bytes of the VP8 payload unencrypted.
  14. // For keyframes this is 10 bytes, for non-keyframes (delta) 3. See
  15. // https://tools.ietf.org/html/rfc6386#section-9.1
  16. // This allows the bridge to continue detecting keyframes (only one byte needed in the JVB)
  17. // and is also a bit easier for the VP8 decoder (i.e. it generates funny garbage pictures
  18. // instead of being unable to decode).
  19. // This is a bit for show and we might want to reduce to 1 unconditionally in the final version.
  20. //
  21. // For audio (where frame.type is not set) we do not encrypt the opus TOC byte:
  22. // https://tools.ietf.org/html/rfc6716#section-3.1
  23. const unencryptedBytes = {
  24. key: 10,
  25. delta: 3,
  26. undefined: 1 // frame.type is not set on audio
  27. };
  28. // Flag to set on senders / receivers to avoid setting up the encryption transform
  29. // more than once.
  30. const kJitsiE2EE = Symbol('kJitsiE2EE');
  31. /**
  32. * Context encapsulating the cryptography bits required for E2EE.
  33. * This uses the WebRTC Insertable Streams API which is explained in
  34. * https://github.com/alvestrand/webrtc-media-streams/blob/master/explainer.md
  35. * that provides access to the encoded frames and allows them to be transformed.
  36. *
  37. * The encoded frame format is explained below in the _encodeFunction method.
  38. * High level design goals were:.
  39. * - do not require changes to existing SFUs and retain (VP8) metadata.
  40. * - allow the SFU to rewrite SSRCs, timestamp, pictureId.
  41. * - allow for the key to be rotated frequently.
  42. */
  43. export default class E2EEcontext {
  44. /**
  45. * Build a new E2EE context instance, which will be used in a given conference.
  46. *
  47. * @param {string} options.salt - Salt to be used for key deviation.
  48. * FIXME: We currently use the MUC room name for this which has the same lifetime
  49. * as this context. While not (pseudo)random as recommended in
  50. * https://developer.mozilla.org/en-US/docs/Web/API/Pbkdf2Params
  51. * this is easily available and the same for all participants.
  52. * We currently do not enforce a minimum length of 16 bytes either.
  53. */
  54. constructor(options) {
  55. this._options = options;
  56. // An array (ring) of keys that we use for sending and receiving.
  57. this._cryptoKeyRing = new Array(keyRingSize);
  58. // A pointer to the currently used key.
  59. this._currentKeyIndex = -1;
  60. // We keep track of how many frames we have sent per ssrc.
  61. // Starts with a random offset similar to the RTP sequence number.
  62. this._sendCounts = new Map();
  63. // Initialize the salt and convert it once.
  64. const encoder = new TextEncoder();
  65. this._salt = encoder.encode(options.salt);
  66. }
  67. /**
  68. * Handles the given {@code RTCRtpReceiver} by creating a {@code TransformStream} which will injecct
  69. * a frame decoder.
  70. *
  71. * @param {RTCRtpReceiver} receiver - The receiver which will get the decoding function injected.
  72. * @param {string} kind - The kind of track this receiver belongs to.
  73. */
  74. handleReceiver(receiver, kind) {
  75. if (receiver[kJitsiE2EE]) {
  76. return;
  77. }
  78. const receiverStreams
  79. = kind === 'video' ? receiver.createEncodedVideoStreams() : receiver.createEncodedAudioStreams();
  80. const transform = new TransformStream({
  81. transform: this._decodeFunction.bind(this)
  82. });
  83. receiverStreams.readableStream
  84. .pipeThrough(transform)
  85. .pipeTo(receiverStreams.writableStream);
  86. receiver[kJitsiE2EE] = true;
  87. }
  88. /**
  89. * Handles the given {@code RTCRtpSender} by creating a {@code TransformStream} which will injecct
  90. * a frame encoder.
  91. *
  92. * @param {RTCRtpSender} sender - The sender which will get the encoding funcction injected.
  93. * @param {string} kind - The kind of track this sender belongs to.
  94. */
  95. handleSender(sender, kind) {
  96. if (sender[kJitsiE2EE]) {
  97. return;
  98. }
  99. const senderStreams
  100. = kind === 'video' ? sender.createEncodedVideoStreams() : sender.createEncodedAudioStreams();
  101. const transform = new TransformStream({
  102. transform: this._encodeFunction.bind(this)
  103. });
  104. senderStreams.readableStream
  105. .pipeThrough(transform)
  106. .pipeTo(senderStreams.writableStream);
  107. sender[kJitsiE2EE] = true;
  108. }
  109. /**
  110. * Sets the key to be used for E2EE.
  111. *
  112. * @param {string} value - Value to be used as the new key. May be falsy to disable end-to-end encryption.
  113. */
  114. async setKey(value) {
  115. let key;
  116. if (value) {
  117. const encoder = new TextEncoder();
  118. key = await this._deriveKey(encoder.encode(value));
  119. } else {
  120. key = false;
  121. }
  122. this._currentKeyIndex++;
  123. this._cryptoKeyRing[this._currentKeyIndex % this._cryptoKeyRing.length] = key;
  124. }
  125. /**
  126. * Derives a AES-GCM key with 128 bits from the input using PBKDF2
  127. * The salt is configured in the constructor of this class.
  128. * @param {Uint8Array} keyBytes - Value to derive key from
  129. */
  130. async _deriveKey(keyBytes) {
  131. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/importKey
  132. const material = await crypto.subtle.importKey('raw', keyBytes,
  133. 'PBKDF2', false, [ 'deriveBits', 'deriveKey' ]);
  134. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/deriveKey#PBKDF2
  135. return crypto.subtle.deriveKey({
  136. name: 'PBKDF2',
  137. salt: this._salt,
  138. iterations: 100000,
  139. hash: 'SHA-256'
  140. }, material, {
  141. name: 'AES-GCM',
  142. length: 128
  143. }, false, [ 'encrypt', 'decrypt' ]);
  144. }
  145. /**
  146. * Construct the IV used for AES-GCM and sent (in plain) with the packet similar to
  147. * https://tools.ietf.org/html/rfc7714#section-8.1
  148. * It concatenates
  149. * - the 32 bit synchronization source (SSRC) given on the encoded frame,
  150. * - the 32 bit rtp timestamp given on the encoded frame,
  151. * - a send counter that is specific to the SSRC. Starts at a random number.
  152. * The send counter is essentially the pictureId but we currently have to implement this ourselves.
  153. * There is no XOR with a salt. Note that this IV leaks the SSRC to the receiver but since this is
  154. * randomly generated and SFUs may not rewrite this is considered acceptable.
  155. * The SSRC is used to allow demultiplexing multiple streams with the same key, as described in
  156. * https://tools.ietf.org/html/rfc3711#section-4.1.1
  157. * The RTP timestamp is 32 bits and advances by the codec clock rate (90khz for video, 48khz for
  158. * opus audio) every second. For video it rolls over roughly every 13 hours.
  159. * The send counter will advance at the frame rate (30fps for video, 50fps for 20ms opus audio)
  160. * every second. It will take a long time to roll over.
  161. *
  162. * See also https://developer.mozilla.org/en-US/docs/Web/API/AesGcmParams
  163. */
  164. _makeIV(synchronizationSource, timestamp) {
  165. const iv = new ArrayBuffer(ivLength);
  166. const ivView = new DataView(iv);
  167. // having to keep our own send count (similar to a picture id) is not ideal.
  168. if (!this._sendCounts.has(synchronizationSource)) {
  169. // Initialize with a random offset, similar to the RTP sequence number.
  170. this._sendCounts.set(synchronizationSource, Math.floor(Math.random() * 0xFFFF));
  171. }
  172. const sendCount = this._sendCounts.get(synchronizationSource);
  173. ivView.setUint32(0, synchronizationSource);
  174. ivView.setUint32(4, timestamp);
  175. ivView.setUint32(8, sendCount % 0xFFFF);
  176. this._sendCounts.set(synchronizationSource, sendCount + 1);
  177. return iv;
  178. }
  179. /**
  180. * Function that will be injected in a stream and will encrypt the given encoded frames.
  181. *
  182. * @param {RTCEncodedVideoFrame|RTCEncodedAudioFrame} encodedFrame - Encoded video frame.
  183. * @param {TransformStreamDefaultController} controller - TransportStreamController.
  184. *
  185. * The packet format is described below. One of the design goals was to not require
  186. * changes to the SFU which for video requires not encrypting the keyframe bit of VP8
  187. * as SFUs need to detect a keyframe (framemarking or the generic frame descriptor will
  188. * solve this eventually). This also "hides" that a client is using E2EE a bit.
  189. *
  190. * Note that this operates on the full frame, i.e. for VP8 the data described in
  191. * https://tools.ietf.org/html/rfc6386#section-9.1
  192. *
  193. * The VP8 payload descriptor described in
  194. * https://tools.ietf.org/html/rfc7741#section-4.2
  195. * is part of the RTP packet and not part of the frame and is not controllable by us.
  196. * This is fine as the SFU keeps having access to it for routing.
  197. *
  198. * The encrypted frame is formed as follows:
  199. * 1) Leave the first (10, 3, 1) bytes unencrypted, depending on the frame type and kind.
  200. * 2) Form the GCM IV for the frame as described above.
  201. * 3) Encrypt the rest of the frame using AES-GCM.
  202. * 4) Allocate space for the encrypted frame.
  203. * 5) Copy the unencrypted bytes to the start of the encrypted frame.
  204. * 6) Append the ciphertext to the encrypted frame.
  205. * 7) Append the IV.
  206. * 8) Append a single byte for the key identifier. TODO: we don't need all the bits.
  207. * 9) Enqueue the encrypted frame for sending.
  208. */
  209. _encodeFunction(encodedFrame, controller) {
  210. const keyIndex = this._currentKeyIndex % this._cryptoKeyRing.length;
  211. if (this._cryptoKeyRing[keyIndex]) {
  212. const iv = this._makeIV(encodedFrame.synchronizationSource, encodedFrame.timestamp);
  213. return crypto.subtle.encrypt({
  214. name: 'AES-GCM',
  215. iv,
  216. additionalData: new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])
  217. }, this._cryptoKeyRing[keyIndex], new Uint8Array(encodedFrame.data, unencryptedBytes[encodedFrame.type]))
  218. .then(cipherText => {
  219. const newData = new ArrayBuffer(unencryptedBytes[encodedFrame.type] + cipherText.byteLength
  220. + iv.byteLength + 1);
  221. const newUint8 = new Uint8Array(newData);
  222. newUint8.set(
  223. new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])); // copy first bytes.
  224. newUint8.set(
  225. new Uint8Array(cipherText), unencryptedBytes[encodedFrame.type]); // add ciphertext.
  226. newUint8.set(
  227. new Uint8Array(iv), unencryptedBytes[encodedFrame.type] + cipherText.byteLength); // append IV.
  228. newUint8[unencryptedBytes[encodedFrame.type] + cipherText.byteLength + ivLength]
  229. = keyIndex; // set key index.
  230. encodedFrame.data = newData;
  231. return controller.enqueue(encodedFrame);
  232. }, e => {
  233. logger.error(e);
  234. // We are not enqueuing the frame here on purpose.
  235. });
  236. }
  237. /* NOTE WELL:
  238. * This will send unencrypted data (only protected by DTLS transport encryption) when no key is configured.
  239. * This is ok for demo purposes but should not be done once this becomes more relied upon.
  240. */
  241. controller.enqueue(encodedFrame);
  242. }
  243. /**
  244. * Function that will be injected in a stream and will decrypt the given encoded frames.
  245. *
  246. * @param {RTCEncodedVideoFrame|RTCEncodedAudioFrame} encodedFrame - Encoded video frame.
  247. * @param {TransformStreamDefaultController} controller - TransportStreamController.
  248. *
  249. * The decrypted frame is formed as follows:
  250. * 1) Extract the key index from the last byte of the encrypted frame.
  251. * If there is no key associated with the key index, the frame is enqueued for decoding
  252. * and these steps terminate.
  253. * 2) Determine the frame type in order to look up the number of unencrypted header bytes.
  254. * 2) Extract the 12-byte IV from its position near the end of the packet.
  255. * Note: the IV is treated as opaque and not reconstructed from the input.
  256. * 3) Decrypt the encrypted frame content after the unencrypted bytes using AES-GCM.
  257. * 4) Allocate space for the decrypted frame.
  258. * 5) Copy the unencrypted bytes from the start of the encrypted frame.
  259. * 6) Append the plaintext to the decrypted frame.
  260. * 7) Enqueue the decrypted frame for decoding.
  261. */
  262. _decodeFunction(encodedFrame, controller) {
  263. const data = new Uint8Array(encodedFrame.data);
  264. const keyIndex = data[encodedFrame.data.byteLength - 1];
  265. if (this._cryptoKeyRing[keyIndex]) {
  266. const iv = new Uint8Array(encodedFrame.data, encodedFrame.data.byteLength - ivLength - 1, ivLength);
  267. const cipherTextStart = unencryptedBytes[encodedFrame.type];
  268. const cipherTextLength = encodedFrame.data.byteLength - (unencryptedBytes[encodedFrame.type]
  269. + ivLength + 1);
  270. return crypto.subtle.decrypt({
  271. name: 'AES-GCM',
  272. iv,
  273. additionalData: new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type])
  274. }, this._cryptoKeyRing[keyIndex], new Uint8Array(encodedFrame.data, cipherTextStart, cipherTextLength))
  275. .then(plainText => {
  276. const newData = new ArrayBuffer(unencryptedBytes[encodedFrame.type] + plainText.byteLength);
  277. const newUint8 = new Uint8Array(newData);
  278. newUint8.set(new Uint8Array(encodedFrame.data, 0, unencryptedBytes[encodedFrame.type]));
  279. newUint8.set(new Uint8Array(plainText), unencryptedBytes[encodedFrame.type]);
  280. encodedFrame.data = newData;
  281. return controller.enqueue(encodedFrame);
  282. }, e => {
  283. logger.error(e);
  284. if (encodedFrame.type === undefined) { // audio, replace with silence.
  285. const newData = new ArrayBuffer(3);
  286. const newUint8 = new Uint8Array(newData);
  287. newUint8.set([ 0xd8, 0xff, 0xfe ]); // opus silence frame.
  288. encodedFrame.data = newData;
  289. } else { // video, replace with a 320x180px black frame
  290. const newData = new ArrayBuffer(60);
  291. const newUint8 = new Uint8Array(newData);
  292. newUint8.set([
  293. 0xb0, 0x05, 0x00, 0x9d, 0x01, 0x2a, 0xa0, 0x00, 0x5a, 0x00, 0x39, 0x03, 0x00, 0x00, 0x1c, 0x22,
  294. 0x16, 0x16, 0x22, 0x66, 0x12, 0x20, 0x04, 0x90, 0x40, 0x00, 0xc5, 0x01, 0xe0, 0x7c, 0x4d, 0x2f,
  295. 0xfa, 0xdd, 0x4d, 0xa5, 0x7f, 0x89, 0xa5, 0xff, 0x5b, 0xa9, 0xb4, 0xaf, 0xf1, 0x34, 0xbf, 0xeb,
  296. 0x75, 0x36, 0x95, 0xfe, 0x26, 0x96, 0x60, 0xfe, 0xff, 0xba, 0xff, 0x40
  297. ]);
  298. encodedFrame.data = newData;
  299. }
  300. // TODO: notify the application about error status.
  301. controller.enqueue(encodedFrame);
  302. });
  303. }
  304. // TODO: this just passes through to the decoder. Is that ok? If we don't know the key yet
  305. // we might want to buffer a bit but it is still unclear how to do that (and for how long etc).
  306. controller.enqueue(encodedFrame);
  307. }
  308. }