modified lib-jitsi-meet dev repo
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TrackVADEmitter.js 8.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. import EventEmitter from 'events';
  2. import RTC from '../RTC/RTC';
  3. import { VAD_SCORE_PUBLISHED } from './DetectionEvents';
  4. /**
  5. * Connects an audio JitsiLocalTrack to a vadProcessor using WebAudio ScriptProcessorNode.
  6. * Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
  7. * The PCM is processed by the injected vad module and a voice activity detection score is obtained, the
  8. * score is published to consumers via an EventEmitter.
  9. * After work is done with this service the destroy method needs to be called for a proper cleanup.
  10. *
  11. * @fires VAD_SCORE_PUBLISHED
  12. */
  13. export default class TrackVADEmitter extends EventEmitter {
  14. /**
  15. * Constructor.
  16. *
  17. * @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024,
  18. * 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
  19. * @param {Object} vadProcessor - VAD processor that allows us to calculate VAD score for PCM samples.
  20. * @param {JitsiLocalTrack} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
  21. */
  22. constructor(procNodeSampleRate, vadProcessor, jitsiLocalTrack) {
  23. super();
  24. /**
  25. * Sample rate of the ScriptProcessorNode.
  26. */
  27. this._procNodeSampleRate = procNodeSampleRate;
  28. /**
  29. * VAD Processor that allows us to calculate VAD score for PCM samples
  30. */
  31. this._vadProcessor = vadProcessor;
  32. /**
  33. * The JitsiLocalTrack instance.
  34. */
  35. this._localTrack = jitsiLocalTrack;
  36. /**
  37. * Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
  38. */
  39. this._bufferResidue = new Float32Array([]);
  40. /**
  41. * The AudioContext instance with the preferred sample frequency.
  42. */
  43. this._audioContext = new AudioContext({ sampleRate: vadProcessor.getRequiredPCMFrequency() });
  44. /**
  45. * PCM Sample size expected by the VAD Processor instance. We cache it here as this value is used extensively,
  46. * saves a couple of function calls.
  47. */
  48. this._vadSampleSize = vadProcessor.getSampleLength();
  49. /**
  50. * Event listener function that will be called by the ScriptProcessNode with raw PCM data, depending on the set
  51. * sample rate.
  52. */
  53. this._onAudioProcess = this._onAudioProcess.bind(this);
  54. this._initializeAudioContext();
  55. }
  56. /**
  57. * Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
  58. *
  59. * @param {string} micDeviceId - Target microphone device id.
  60. * @param {number} procNodeSampleRate - Sample rate of the proc node.
  61. * @param {Object} vadProcessor -Module that calculates the voice activity score for a certain audio PCM sample.
  62. * The processor needs to implement the following functions:
  63. * - <tt>getSampleLength()</tt> - Returns the sample size accepted by getSampleLength.
  64. * - <tt>getRequiredPCMFrequency()</tt> - Returns the PCM frequency at which the processor operates.
  65. * - <tt>calculateAudioFrameVAD(pcmSample)</tt> - Process a 32 float pcm sample of getSampleLength size.
  66. * @returns {Promise<TrackVADEmitter>} - Promise resolving in a new instance of TrackVADEmitter.
  67. */
  68. static create(micDeviceId, procNodeSampleRate, vadProcessor) {
  69. return RTC.obtainAudioAndVideoPermissions({
  70. devices: [ 'audio' ],
  71. micDeviceId
  72. }).then(localTrack => {
  73. // We only expect one audio track when specifying a device id.
  74. if (!localTrack[0]) {
  75. throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
  76. }
  77. return new TrackVADEmitter(procNodeSampleRate, vadProcessor, localTrack[0]);
  78. // We have no exception handling at this point as there is nothing to clean up, the vadProcessor
  79. // life cycle is handled by whoever created this instance.
  80. });
  81. }
  82. /**
  83. * Sets up the audio graph in the AudioContext.
  84. *
  85. * @returns {Promise<void>}
  86. */
  87. _initializeAudioContext() {
  88. this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
  89. // TODO AudioProcessingNode is deprecated check and replace with alternative.
  90. // We don't need stereo for determining the VAD score so we create a single channel processing node.
  91. this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
  92. }
  93. /**
  94. * ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
  95. * Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
  96. * of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
  97. * to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.\
  98. *
  99. *
  100. * @param {AudioProcessingEvent} audioEvent - Audio event.
  101. * @returns {void}
  102. * @fires VAD_SCORE_PUBLISHED
  103. */
  104. _onAudioProcess(audioEvent) {
  105. // Prepend the residue PCM buffer from the previous process callback.
  106. const inData = audioEvent.inputBuffer.getChannelData(0);
  107. const completeInData = [ ...this._bufferResidue, ...inData ];
  108. const sampleTimestamp = Date.now();
  109. let i = 0;
  110. for (; i + this._vadSampleSize < completeInData.length; i += this._vadSampleSize) {
  111. const pcmSample = completeInData.slice(i, i + this._vadSampleSize);
  112. const vadScore = this._vadProcessor.calculateAudioFrameVAD(pcmSample);
  113. /**
  114. * VAD score publish event
  115. *
  116. * @event VAD_SCORE_PUBLISHED
  117. * @type {Object}
  118. * @property {Date} timestamp - Exact time at which processed PCM sample was generated.
  119. * @property {number} score - VAD score on a scale from 0 to 1 (i.e. 0.7)
  120. * @property {string} deviceId - Device id of the associated track.
  121. */
  122. this.emit(VAD_SCORE_PUBLISHED, {
  123. timestamp: sampleTimestamp,
  124. score: vadScore,
  125. deviceId: this._localTrack.getDeviceId()
  126. });
  127. }
  128. this._bufferResidue = completeInData.slice(i, completeInData.length);
  129. }
  130. /**
  131. * Connects the nodes in the AudioContext to start the flow of audio data.
  132. *
  133. * @returns {void}
  134. */
  135. _connectAudioGraph() {
  136. this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
  137. this._audioSource.connect(this._audioProcessingNode);
  138. this._audioProcessingNode.connect(this._audioContext.destination);
  139. }
  140. /**
  141. * Disconnects the nodes in the AudioContext.
  142. *
  143. * @returns {void}
  144. */
  145. _disconnectAudioGraph() {
  146. // Even thought we disconnect the processing node it seems that some callbacks remain queued,
  147. // resulting in calls with and uninitialized context.
  148. // eslint-disable-next-line no-empty-function
  149. this._audioProcessingNode.onaudioprocess = () => {};
  150. this._audioProcessingNode.disconnect();
  151. this._audioSource.disconnect();
  152. }
  153. /**
  154. * Cleanup potentially acquired resources.
  155. *
  156. * @returns {void}
  157. */
  158. _cleanupResources() {
  159. this._disconnectAudioGraph();
  160. this._localTrack.stopStream();
  161. }
  162. /**
  163. * Get the associated track device ID.
  164. *
  165. * @returns {string}
  166. */
  167. getDeviceId() {
  168. return this._localTrack.getDeviceId();
  169. }
  170. /**
  171. * Get the associated track label.
  172. *
  173. * @returns {string}
  174. */
  175. getTrackLabel() {
  176. return this._localTrack.getDeviceLabel();
  177. }
  178. /**
  179. * Start the emitter by connecting the audio graph.
  180. *
  181. * @returns {void}
  182. */
  183. start() {
  184. this._connectAudioGraph();
  185. }
  186. /**
  187. * Stops the emitter by disconnecting the audio graph.
  188. *
  189. * @returns {void}
  190. */
  191. stop() {
  192. this._disconnectAudioGraph();
  193. this._bufferResidue = [];
  194. }
  195. /**
  196. * Destroy TrackVADEmitter instance (release resources and stop callbacks).
  197. *
  198. * @returns {void}
  199. */
  200. destroy() {
  201. if (this._destroyed) {
  202. return;
  203. }
  204. this._cleanupResources();
  205. this._destroyed = true;
  206. }
  207. }