import EventEmitter from 'events';
import RTC from '../RTC/RTC';
import { createAudioContext } from '../webaudio/WebAudioUtils';
import { VAD_SCORE_PUBLISHED } from './DetectionEvents';
/**
* Connects an audio JitsiLocalTrack to a vadProcessor using WebAudio ScriptProcessorNode.
* Once an object is created audio from the local track flows through the ScriptProcessorNode as raw PCM.
* The PCM is processed by the injected vad module and a voice activity detection score is obtained, the
* score is published to consumers via an EventEmitter.
* After work is done with this service the destroy method needs to be called for a proper cleanup.
*
* @fires VAD_SCORE_PUBLISHED
*/
export default class TrackVADEmitter extends EventEmitter {
/**
* Constructor.
*
* @param {number} procNodeSampleRate - Sample rate of the ScriptProcessorNode. Possible values 256, 512, 1024,
* 2048, 4096, 8192, 16384. Passing other values will default to closes neighbor.
* @param {Object} vadProcessor - VAD processor that allows us to calculate VAD score for PCM samples.
* @param {JitsiLocalTrack} jitsiLocalTrack - JitsiLocalTrack corresponding to micDeviceId.
*/
constructor(procNodeSampleRate, vadProcessor, jitsiLocalTrack) {
super();
/**
* Sample rate of the ScriptProcessorNode.
*/
this._procNodeSampleRate = procNodeSampleRate;
/**
* VAD Processor that allows us to calculate VAD score for PCM samples
*/
this._vadProcessor = vadProcessor;
/**
* The JitsiLocalTrack instance.
*/
this._localTrack = jitsiLocalTrack;
/**
* Buffer to hold residue PCM resulting after a ScriptProcessorNode callback
*/
this._bufferResidue = new Float32Array([]);
/**
* The AudioContext instance with the preferred sample frequency.
*/
this._audioContext = createAudioContext({ sampleRate: vadProcessor.getRequiredPCMFrequency() });
/**
* PCM Sample size expected by the VAD Processor instance. We cache it here as this value is used extensively,
* saves a couple of function calls.
*/
this._vadSampleSize = vadProcessor.getSampleLength();
/**
* Event listener function that will be called by the ScriptProcessNode with raw PCM data, depending on the set
* sample rate.
*/
this._onAudioProcess = this._onAudioProcess.bind(this);
this._initializeAudioContext();
}
/**
* Factory method that sets up all the necessary components for the creation of the TrackVADEmitter.
*
* @param {string} micDeviceId - Target microphone device id.
* @param {number} procNodeSampleRate - Sample rate of the proc node.
* @param {Object} vadProcessor -Module that calculates the voice activity score for a certain audio PCM sample.
* The processor needs to implement the following functions:
* - getSampleLength() - Returns the sample size accepted by getSampleLength.
* - getRequiredPCMFrequency() - Returns the PCM frequency at which the processor operates.
* - calculateAudioFrameVAD(pcmSample) - Process a 32 float pcm sample of getSampleLength size.
* @returns {Promise} - Promise resolving in a new instance of TrackVADEmitter.
*/
static create(micDeviceId, procNodeSampleRate, vadProcessor) {
return RTC.obtainAudioAndVideoPermissions({
devices: [ 'audio' ],
micDeviceId
}).then(localTrack => {
// We only expect one audio track when specifying a device id.
if (!localTrack[0]) {
throw new Error(`Failed to create jitsi local track for device id: ${micDeviceId}`);
}
return new TrackVADEmitter(procNodeSampleRate, vadProcessor, localTrack[0]);
// We have no exception handling at this point as there is nothing to clean up, the vadProcessor
// life cycle is handled by whoever created this instance.
});
}
/**
* Sets up the audio graph in the AudioContext.
*
* @returns {void}
*/
_initializeAudioContext() {
this._audioSource = this._audioContext.createMediaStreamSource(this._localTrack.stream);
// TODO AudioProcessingNode is deprecated in the web audio specifications and the recommended replacement
// is audio worklet, however at the point of implementation AudioProcessingNode was still de de facto way
// of achieving this functionality and supported in all major browsers as opposed to audio worklet which
// was only available in Chrome. This todo is just a reminder that we should replace AudioProcessingNode
// with audio worklet when it's mature enough and has more browser support.
// We don't need stereo for determining the VAD score so we create a single channel processing node.
this._audioProcessingNode = this._audioContext.createScriptProcessor(this._procNodeSampleRate, 1, 1);
}
/**
* ScriptProcessorNode callback, the input parameters contains the PCM audio that is then sent to rnnoise.
* Rnnoise only accepts PCM samples of 480 bytes whereas the webaudio processor node can't sample at a multiple
* of 480 thus after each _onAudioProcess callback there will remain and PCM buffer residue equal
* to _procNodeSampleRate / 480 which will be added to the next sample buffer and so on.\
*
*
* @param {AudioProcessingEvent} audioEvent - Audio event.
* @returns {void}
* @fires VAD_SCORE_PUBLISHED
*/
_onAudioProcess(audioEvent) {
// Prepend the residue PCM buffer from the previous process callback.
const inData = audioEvent.inputBuffer.getChannelData(0);
const completeInData = [ ...this._bufferResidue, ...inData ];
const sampleTimestamp = Date.now();
let i = 0;
for (; i + this._vadSampleSize < completeInData.length; i += this._vadSampleSize) {
const pcmSample = completeInData.slice(i, i + this._vadSampleSize);
// The VAD processor might change the values inside the array so we make a copy.
const vadScore = this._vadProcessor.calculateAudioFrameVAD(pcmSample.slice());
this.emit(VAD_SCORE_PUBLISHED, {
timestamp: sampleTimestamp,
score: vadScore,
pcmData: pcmSample,
deviceId: this._localTrack.getDeviceId()
});
}
this._bufferResidue = completeInData.slice(i, completeInData.length);
}
/**
* Connects the nodes in the AudioContext to start the flow of audio data.
*
* @returns {void}
*/
_connectAudioGraph() {
this._audioProcessingNode.onaudioprocess = this._onAudioProcess;
this._audioSource.connect(this._audioProcessingNode);
this._audioProcessingNode.connect(this._audioContext.destination);
}
/**
* Disconnects the nodes in the AudioContext.
*
* @returns {void}
*/
_disconnectAudioGraph() {
// Even thought we disconnect the processing node it seems that some callbacks remain queued,
// resulting in calls with and uninitialized context.
// eslint-disable-next-line no-empty-function
this._audioProcessingNode.onaudioprocess = () => {};
this._audioProcessingNode.disconnect();
this._audioSource.disconnect();
}
/**
* Cleanup potentially acquired resources.
*
* @returns {void}
*/
_cleanupResources() {
this._disconnectAudioGraph();
this._localTrack.stopStream();
}
/**
* Get the associated track device ID.
*
* @returns {string}
*/
getDeviceId() {
return this._localTrack.getDeviceId();
}
/**
* Get the associated track label.
*
* @returns {string}
*/
getTrackLabel() {
return this._localTrack.getDeviceLabel();
}
/**
* Start the emitter by connecting the audio graph.
*
* @returns {void}
*/
start() {
this._connectAudioGraph();
}
/**
* Stops the emitter by disconnecting the audio graph.
*
* @returns {void}
*/
stop() {
this._disconnectAudioGraph();
this._bufferResidue = [];
}
/**
* Destroy TrackVADEmitter instance (release resources and stop callbacks).
*
* @returns {void}
*/
destroy() {
if (this._destroyed) {
return;
}
this._cleanupResources();
this._destroyed = true;
}
}