You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TrackVADEmitter.js 8.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. import EventEmitter from 'events';
  2. import RTC from '../RTC/RTC';
  3. import { VAD_SCORE_PUBLISHED } from './DetectionEvents';
  4. /**
  5. * Connects an audio JitsiLocalTrack to a vadProcessor using WebAudio ScriptProcessorNode.
  6. * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
  7. * The PCM is processed by the injected vad module and a voice activity detection score is obtained, the
  8. * score is published to consumers via an EventEmitter.
  9. * After work is done with this service the destroy method needs to be called for a proper cleanup.
  10. *
  11. * @fires VAD_SCORE_PUBLISHED
  12. */
  13. export default class TrackVADEmitter extends EventEmitter {
  14. /**
  15. * Constructor.
  16. *
  17. * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024,
  18. * 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
  19. * @param {Object} vadProcessor - VAD processor that allows us to calculate VAD score for PCM samples.
  20. * @param {JitsiLocalTrack} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
  21. */
  22. constructor(procNodeSampleRate, vadProcessor, jitsiLocalTrack) {
  23. super();
  24. /**
  25. * Sample rate of the ScriptProcessorNode.
  26. */
  27. this._procNodeSampleRate = procNodeSampleRate;
  28. /**
  29. * VAD Processor that allows us to calculate VAD score for PCM samples
  30. */
  31. this._vadProcessor = vadProcessor;
  32. /**
  33. * The JitsiLocalTrack instance.
  34. */
  35. this._localTrack = jitsiLocalTrack;
  36. /**
  37. * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
  38. */
  39. this._bufferResidue = new Float32Array([]);
  40. /**
  41. * The AudioContext instance with the preferred sample frequency.
  42. */
  43. this._audioContext = new AudioContext({ sampleRate: vadProcessor.getRequiredPCMFrequency() });
  44. /**
  45. * PCM Sample size expected by the VAD Processor instance. We cache it here as this value is used extensively,
  46. * saves a couple of function calls.
  47. */
  48. this._vadSampleSize = vadProcessor.getSampleLength();
  49. /**
  50. * Event listener function that will be called by the ScriptProcessNode with raw PCM data, depending on the set
  51. * sample rate.
  52. */
  53. this._onAudioProcess = this._onAudioProcess.bind(this);
  54. this._initializeAudioContext();
  55. }
  56. /**
  57. * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
  58. *
  59. * @param {string} micDeviceId - Target microphone device id.
  60. * @param {number} procNodeSampleRate - Sample rate of the proc node.
  61. * @param {Object} vadProcessor -Module that calculates the voice activity score for a certain audio PCM sample.
  62. * The processor needs to implement the following functions:
  63. * - <tt>getSampleLength()</tt> - Returns the sample size accepted by getSampleLength.
  64. * - <tt>getRequiredPCMFrequency()</tt> - Returns the PCM frequency at which the processor operates.
  65. * - <tt>calculateAudioFrameVAD(pcmSample)</tt> - Process a 32 float pcm sample of getSampleLength size.
  66. * @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
  67. */
  68. static create(micDeviceId, procNodeSampleRate, vadProcessor) {
  69. return RTC.obtainAudioAndVideoPermissions({
  70. devices: [ 'audio' ],
  71. micDeviceId
  72. }).then(localTrack => {
  73. // We only expect one audio track when specifying a device id.
  74. if (!localTrack[0]) {
  75. throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
  76. }
  77. return new TrackVADEmitter(procNodeSampleRate, vadProcessor, localTrack[0]);
  78. // We have no exception handling at this point as there is nothing to clean up, the vadProcessor
  79. // life cycle is handled by whoever created this instance.
  80. });
  81. }
  82. /**
  83. * Sets up the audio graph in the AudioContext.
  84. *
  85. * @returns {Promise<void>}
  86. */
  87. _initializeAudioContext() {
  88. this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
  89. // TODO AudioProcessingNode is deprecated in the web audio specifications and the recommended replacement
  90. // is audio worklet, however at the point of implementation AudioProcessingNode was still de de facto way
  91. // of achieving this functionality and supported in all major browsers as opposed to audio worklet which
  92. // was only available in Chrome. This todo is just a reminder that we should replace AudioProcessingNode
  93. // with audio worklet when it's mature enough and has more browser support.
  94. // We don't need stereo for determining the VAD score so we create a single channel processing node.
  95. this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
  96. }
  97. /**
  98. * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
  99. * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
  100. * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
  101. * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.\
  102. *
  103. *
  104. * @param {AudioProcessingEvent} audioEvent - Audio event.
  105. * @returns {void}
  106. * @fires VAD_SCORE_PUBLISHED
  107. */
  108. _onAudioProcess(audioEvent) {
  109. // Prepend the residue PCM buffer from the previous process callback.
  110. const inData = audioEvent.inputBuffer.getChannelData(0);
  111. const completeInData = [ ...this._bufferResidue, ...inData ];
  112. const sampleTimestamp = Date.now();
  113. let i = 0;
  114. for (; i + this._vadSampleSize < completeInData.length; i += this._vadSampleSize) {
  115. const pcmSample = completeInData.slice(i, i + this._vadSampleSize);
  116. const vadScore = this._vadProcessor.calculateAudioFrameVAD(pcmSample);
  117. this.emit(VAD_SCORE_PUBLISHED, {
  118. timestamp: sampleTimestamp,
  119. score: vadScore,
  120. deviceId: this._localTrack.getDeviceId()
  121. });
  122. }
  123. this._bufferResidue = completeInData.slice(i, completeInData.length);
  124. }
  125. /**
  126. * Connects the nodes in the AudioContext to start the flow of audio data.
  127. *
  128. * @returns {void}
  129. */
  130. _connectAudioGraph() {
  131. this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
  132. this._audioSource.connect(this._audioProcessingNode);
  133. this._audioProcessingNode.connect(this._audioContext.destination);
  134. }
  135. /**
  136. * Disconnects the nodes in the AudioContext.
  137. *
  138. * @returns {void}
  139. */
  140. _disconnectAudioGraph() {
  141. // Even thought we disconnect the processing node it seems that some callbacks remain queued,
  142. // resulting in calls with and uninitialized context.
  143. // eslint-disable-next-line no-empty-function
  144. this._audioProcessingNode.onaudioprocess = () => {};
  145. this._audioProcessingNode.disconnect();
  146. this._audioSource.disconnect();
  147. }
  148. /**
  149. * Cleanup potentially acquired resources.
  150. *
  151. * @returns {void}
  152. */
  153. _cleanupResources() {
  154. this._disconnectAudioGraph();
  155. this._localTrack.stopStream();
  156. }
  157. /**
  158. * Get the associated track device ID.
  159. *
  160. * @returns {string}
  161. */
  162. getDeviceId() {
  163. return this._localTrack.getDeviceId();
  164. }
  165. /**
  166. * Get the associated track label.
  167. *
  168. * @returns {string}
  169. */
  170. getTrackLabel() {
  171. return this._localTrack.getDeviceLabel();
  172. }
  173. /**
  174. * Start the emitter by connecting the audio graph.
  175. *
  176. * @returns {void}
  177. */
  178. start() {
  179. this._connectAudioGraph();
  180. }
  181. /**
  182. * Stops the emitter by disconnecting the audio graph.
  183. *
  184. * @returns {void}
  185. */
  186. stop() {
  187. this._disconnectAudioGraph();
  188. this._bufferResidue = [];
  189. }
  190. /**
  191. * Destroy TrackVADEmitter instance (release resources and stop callbacks).
  192. *
  193. * @returns {void}
  194. */
  195. destroy() {
  196. if (this._destroyed) {
  197. return;
  198. }
  199. this._cleanupResources();
  200. this._destroyed = true;
  201. }
  202. }