You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TrackVADEmitter.js 8.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. import EventEmitter from 'events';
  2. import RTC from '../RTC/RTC';
  3. import { createAudioContext } from '../webaudio/WebAudioUtils';
  4. import { VAD_SCORE_PUBLISHED } from './DetectionEvents';
  5. /**
  6. * Connects an audio JitsiLocalTrack to a vadProcessor using WebAudio ScriptProcessorNode.
  7. * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
  8. * The PCM is processed by the injected vad module and a voice activity detection score is obtained, the
  9. * score is published to consumers via an EventEmitter.
  10. * After work is done with this service the destroy method needs to be called for a proper cleanup.
  11. *
  12. * @fires VAD_SCORE_PUBLISHED
  13. */
  14. export default class TrackVADEmitter extends EventEmitter {
  15. /**
  16. * Constructor.
  17. *
  18. * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024,
  19. * 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
  20. * @param {Object} vadProcessor - VAD processor that allows us to calculate VAD score for PCM samples.
  21. * @param {JitsiLocalTrack} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
  22. */
  23. constructor(procNodeSampleRate, vadProcessor, jitsiLocalTrack) {
  24. super();
  25. /**
  26. * Sample rate of the ScriptProcessorNode.
  27. */
  28. this._procNodeSampleRate = procNodeSampleRate;
  29. /**
  30. * VAD Processor that allows us to calculate VAD score for PCM samples
  31. */
  32. this._vadProcessor = vadProcessor;
  33. /**
  34. * The JitsiLocalTrack instance.
  35. */
  36. this._localTrack = jitsiLocalTrack;
  37. /**
  38. * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
  39. */
  40. this._bufferResidue = new Float32Array([]);
  41. /**
  42. * The AudioContext instance with the preferred sample frequency.
  43. */
  44. this._audioContext = createAudioContext({ sampleRate: vadProcessor.getRequiredPCMFrequency() });
  45. /**
  46. * PCM Sample size expected by the VAD Processor instance. We cache it here as this value is used extensively,
  47. * saves a couple of function calls.
  48. */
  49. this._vadSampleSize = vadProcessor.getSampleLength();
  50. /**
  51. * Event listener function that will be called by the ScriptProcessNode with raw PCM data, depending on the set
  52. * sample rate.
  53. */
  54. this._onAudioProcess = this._onAudioProcess.bind(this);
  55. this._initializeAudioContext();
  56. }
  57. /**
  58. * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
  59. *
  60. * @param {string} micDeviceId - Target microphone device id.
  61. * @param {number} procNodeSampleRate - Sample rate of the proc node.
  62. * @param {Object} vadProcessor -Module that calculates the voice activity score for a certain audio PCM sample.
  63. * The processor needs to implement the following functions:
  64. * - <tt>getSampleLength()</tt> - Returns the sample size accepted by getSampleLength.
  65. * - <tt>getRequiredPCMFrequency()</tt> - Returns the PCM frequency at which the processor operates.
  66. * - <tt>calculateAudioFrameVAD(pcmSample)</tt> - Process a 32 float pcm sample of getSampleLength size.
  67. * @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
  68. */
  69. static create(micDeviceId, procNodeSampleRate, vadProcessor) {
  70. return RTC.obtainAudioAndVideoPermissions({
  71. devices: [ 'audio' ],
  72. micDeviceId
  73. }).then(localTrack => {
  74. // We only expect one audio track when specifying a device id.
  75. if (!localTrack[0]) {
  76. throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
  77. }
  78. return new TrackVADEmitter(procNodeSampleRate, vadProcessor, localTrack[0]);
  79. // We have no exception handling at this point as there is nothing to clean up, the vadProcessor
  80. // life cycle is handled by whoever created this instance.
  81. });
  82. }
  83. /**
  84. * Sets up the audio graph in the AudioContext.
  85. *
  86. * @returns {void}
  87. */
  88. _initializeAudioContext() {
  89. this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
  90. // TODO AudioProcessingNode is deprecated in the web audio specifications and the recommended replacement
  91. // is audio worklet, however at the point of implementation AudioProcessingNode was still de de facto way
  92. // of achieving this functionality and supported in all major browsers as opposed to audio worklet which
  93. // was only available in Chrome. This todo is just a reminder that we should replace AudioProcessingNode
  94. // with audio worklet when it's mature enough and has more browser support.
  95. // We don't need stereo for determining the VAD score so we create a single channel processing node.
  96. this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
  97. }
  98. /**
  99. * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
  100. * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
  101. * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
  102. * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.\
  103. *
  104. *
  105. * @param {AudioProcessingEvent} audioEvent - Audio event.
  106. * @returns {void}
  107. * @fires VAD_SCORE_PUBLISHED
  108. */
  109. _onAudioProcess(audioEvent) {
  110. // Prepend the residue PCM buffer from the previous process callback.
  111. const inData = audioEvent.inputBuffer.getChannelData(0);
  112. const completeInData = [ ...this._bufferResidue, ...inData ];
  113. const sampleTimestamp = Date.now();
  114. let i = 0;
  115. for (; i + this._vadSampleSize < completeInData.length; i += this._vadSampleSize) {
  116. const pcmSample = completeInData.slice(i, i + this._vadSampleSize);
  117. // The VAD processor might change the values inside the array so we make a copy.
  118. const vadScore = this._vadProcessor.calculateAudioFrameVAD(pcmSample.slice());
  119. this.emit(VAD_SCORE_PUBLISHED, {
  120. timestamp: sampleTimestamp,
  121. score: vadScore,
  122. pcmData: pcmSample,
  123. deviceId: this._localTrack.getDeviceId()
  124. });
  125. }
  126. this._bufferResidue = completeInData.slice(i, completeInData.length);
  127. }
  128. /**
  129. * Connects the nodes in the AudioContext to start the flow of audio data.
  130. *
  131. * @returns {void}
  132. */
  133. _connectAudioGraph() {
  134. this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
  135. this._audioSource.connect(this._audioProcessingNode);
  136. this._audioProcessingNode.connect(this._audioContext.destination);
  137. }
  138. /**
  139. * Disconnects the nodes in the AudioContext.
  140. *
  141. * @returns {void}
  142. */
  143. _disconnectAudioGraph() {
  144. // Even thought we disconnect the processing node it seems that some callbacks remain queued,
  145. // resulting in calls with and uninitialized context.
  146. // eslint-disable-next-line no-empty-function
  147. this._audioProcessingNode.onaudioprocess = () => {};
  148. this._audioProcessingNode.disconnect();
  149. this._audioSource.disconnect();
  150. }
  151. /**
  152. * Cleanup potentially acquired resources.
  153. *
  154. * @returns {void}
  155. */
  156. _cleanupResources() {
  157. this._disconnectAudioGraph();
  158. this._localTrack.stopStream();
  159. }
  160. /**
  161. * Get the associated track device ID.
  162. *
  163. * @returns {string}
  164. */
  165. getDeviceId() {
  166. return this._localTrack.getDeviceId();
  167. }
  168. /**
  169. * Get the associated track label.
  170. *
  171. * @returns {string}
  172. */
  173. getTrackLabel() {
  174. return this._localTrack.getDeviceLabel();
  175. }
  176. /**
  177. * Start the emitter by connecting the audio graph.
  178. *
  179. * @returns {void}
  180. */
  181. start() {
  182. this._connectAudioGraph();
  183. }
  184. /**
  185. * Stops the emitter by disconnecting the audio graph.
  186. *
  187. * @returns {void}
  188. */
  189. stop() {
  190. this._disconnectAudioGraph();
  191. this._bufferResidue = [];
  192. }
  193. /**
  194. * Destroy TrackVADEmitter instance (release resources and stop callbacks).
  195. *
  196. * @returns {void}
  197. */
  198. destroy() {
  199. if (this._destroyed) {
  200. return;
  201. }
  202. this._cleanupResources();
  203. this._destroyed = true;
  204. }
  205. }