You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TrackVADEmitter.js 8.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. // @flow
  2. import { createRnnoiseProcessorPromise, getSampleLength } from '../../../../rnnoise';
  3. import EventEmitter from 'events';
  4. import JitsiMeetJS from '../../../lib-jitsi-meet';
  5. import logger from '../../logger';
  6. import { VAD_SCORE_PUBLISHED } from './Events';
  7. /**
  8. * The structure used by TrackVADEmitter to relay a score
  9. */
  10. export type VADScore = {
  11. /**
  12. * Device ID associated with the VAD score
  13. */
  14. deviceId: string,
  15. /**
  16. * The PCM score from 0 - 1 i.e. 0.60
  17. */
  18. score: number,
  19. /**
  20. * Epoch time at which PCM was recorded
  21. */
  22. timestamp: number
  23. };
  24. /**
  25. * Connects an audio JitsiLocalTrack to a RnnoiseProcessor using WebAudio ScriptProcessorNode.
  26. * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
  27. * The PCM is processed by the rnnoise module and a VAD (voice activity detection) score is obtained, the
  28. * score is published to consumers via an EventEmitter.
  29. * After work is done with this service the destroy method needs to be called for a proper cleanup.
  30. */
  31. export default class TrackVADEmitter extends EventEmitter {
  32. /**
  33. * The AudioContext instance.
  34. */
  35. _audioContext: AudioContext;
  36. /**
  37. * The MediaStreamAudioSourceNode instance.
  38. */
  39. _audioSource: MediaStreamAudioSourceNode;
  40. /**
  41. * The ScriptProcessorNode instance.
  42. */
  43. _audioProcessingNode: ScriptProcessorNode;
  44. /**
  45. * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
  46. */
  47. _bufferResidue: Float32Array;
  48. /**
  49. * State flag, check if the instance was destroyed
  50. */
  51. _destroyed: boolean = false;
  52. /**
  53. * The JitsiLocalTrack instance.
  54. */
  55. _localTrack: Object;
  56. /**
  57. * Device ID of the target microphone.
  58. */
  59. _micDeviceId: string;
  60. /**
  61. * Callback function that will be called by the ScriptProcessNode with raw PCM data, depending on the set sample
  62. * rate.
  63. */
  64. _onAudioProcess: (audioEvent: Object) => void;
  65. /**
  66. * Sample rate of the ScriptProcessorNode.
  67. */
  68. _procNodeSampleRate: number;
  69. /**
  70. * Rnnoise adapter that allows us to calculate VAD score for PCM samples
  71. */
  72. _rnnoiseProcessor: Object;
  73. /**
  74. * PCM Sample size expected by the RnnoiseProcessor instance.
  75. */
  76. _rnnoiseSampleSize: number;
  77. /**
  78. * Constructor.
  79. *
  80. * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024,
  81. * 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
  82. * @param {Object} rnnoiseProcessor - Rnnoise adapter that allows us to calculate VAD score
  83. * for PCM samples.
  84. * @param {Object} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
  85. */
  86. constructor(procNodeSampleRate: number, rnnoiseProcessor: Object, jitsiLocalTrack: Object) {
  87. super();
  88. this._procNodeSampleRate = procNodeSampleRate;
  89. this._rnnoiseProcessor = rnnoiseProcessor;
  90. this._localTrack = jitsiLocalTrack;
  91. this._micDeviceId = jitsiLocalTrack.getDeviceId();
  92. this._bufferResidue = new Float32Array([]);
  93. this._audioContext = new AudioContext();
  94. this._rnnoiseSampleSize = getSampleLength();
  95. this._onAudioProcess = this._onAudioProcess.bind(this);
  96. this._initializeAudioContext();
  97. this._connectAudioGraph();
  98. logger.log(`Constructed VAD emitter for device: ${this._micDeviceId}`);
  99. }
  100. /**
  101. * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
  102. *
  103. * @param {string} micDeviceId - Target microphone device id.
  104. * @param {number} procNodeSampleRate - Sample rate of the proc node.
  105. * @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
  106. */
  107. static async create(micDeviceId: string, procNodeSampleRate: number) {
  108. let rnnoiseProcessor = null;
  109. let localTrack = null;
  110. try {
  111. logger.log(`Initializing TrackVADEmitter for device: ${micDeviceId}`);
  112. rnnoiseProcessor = await createRnnoiseProcessorPromise();
  113. localTrack = await JitsiMeetJS.createLocalTracks({
  114. devices: [ 'audio' ],
  115. micDeviceId
  116. });
  117. // We only expect one audio track when specifying a device id.
  118. if (!localTrack[0]) {
  119. throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
  120. }
  121. return new TrackVADEmitter(procNodeSampleRate, rnnoiseProcessor, localTrack[0]);
  122. } catch (error) {
  123. logger.error(`Failed to create TrackVADEmitter for ${micDeviceId} with error: ${error}`);
  124. if (rnnoiseProcessor) {
  125. rnnoiseProcessor.destroy();
  126. }
  127. if (localTrack) {
  128. localTrack.stopStream();
  129. }
  130. throw error;
  131. }
  132. }
  133. /**
  134. * Sets up the audio graph in the AudioContext.
  135. *
  136. * @returns {Promise<void>}
  137. */
  138. _initializeAudioContext() {
  139. this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
  140. // TODO AudioProcessingNode is deprecated check and replace with alternative.
  141. // We don't need stereo for determining the VAD score so we create a single chanel processing node.
  142. this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
  143. this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
  144. }
  145. /**
  146. * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
  147. * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
  148. * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
  149. * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.
  150. *
  151. * @param {AudioProcessingEvent} audioEvent - Audio event.
  152. * @returns {void}
  153. */
  154. _onAudioProcess(audioEvent: Object) {
  155. // Prepend the residue PCM buffer from the previous process callback.
  156. const inData = audioEvent.inputBuffer.getChannelData(0);
  157. const completeInData = [ ...this._bufferResidue, ...inData ];
  158. const sampleTimestamp = Date.now();
  159. let i = 0;
  160. for (; i + this._rnnoiseSampleSize < completeInData.length; i += this._rnnoiseSampleSize) {
  161. const pcmSample = completeInData.slice(i, i + this._rnnoiseSampleSize);
  162. const vadScore = this._rnnoiseProcessor.calculateAudioFrameVAD(pcmSample);
  163. this.emit(VAD_SCORE_PUBLISHED, {
  164. timestamp: sampleTimestamp,
  165. score: vadScore,
  166. deviceId: this._micDeviceId
  167. });
  168. }
  169. this._bufferResidue = completeInData.slice(i, completeInData.length);
  170. }
  171. /**
  172. * Connects the nodes in the AudioContext to start the flow of audio data.
  173. *
  174. * @returns {void}
  175. */
  176. _connectAudioGraph() {
  177. this._audioSource.connect(this._audioProcessingNode);
  178. this._audioProcessingNode.connect(this._audioContext.destination);
  179. }
  180. /**
  181. * Disconnects the nodes in the AudioContext.
  182. *
  183. * @returns {void}
  184. */
  185. _disconnectAudioGraph() {
  186. // Even thought we disconnect the processing node it seems that some callbacks remain queued,
  187. // resulting in calls with and uninitialized context.
  188. // eslint-disable-next-line no-empty-function
  189. this._audioProcessingNode.onaudioprocess = () => {};
  190. this._audioProcessingNode.disconnect();
  191. this._audioSource.disconnect();
  192. }
  193. /**
  194. * Cleanup potentially acquired resources.
  195. *
  196. * @returns {void}
  197. */
  198. _cleanupResources() {
  199. logger.debug(`Cleaning up resources for device ${this._micDeviceId}!`);
  200. this._disconnectAudioGraph();
  201. this._localTrack.stopStream();
  202. this._rnnoiseProcessor.destroy();
  203. }
  204. /**
  205. * Destroy TrackVADEmitter instance (release resources and stop callbacks).
  206. *
  207. * @returns {void}
  208. */
  209. destroy() {
  210. if (this._destroyed) {
  211. return;
  212. }
  213. logger.log(`Destroying TrackVADEmitter for mic: ${this._micDeviceId}`);
  214. this._cleanupResources();
  215. this._destroyed = true;
  216. }
  217. }