import { EventEmitter } from 'events';
import * as JitsiConferenceEvents from '../../JitsiConferenceEvents';
import { getLogger } from 'jitsi-meet-logger';
import { VAD_SCORE_PUBLISHED, VAD_TALK_WHILE_MUTED } from './DetectionEvents';
import TrackVADEmitter from './TrackVADEmitter';
const logger = getLogger(__filename);
/**
* The threshold which the average VAD values for a span of time needs to exceed to trigger an event.
* @type {number}
*/
const VAD_AVG_THRESHOLD = 0.6;
/**
* The VAD score needed to trigger the processing algorithm, i.e. if a sample has the VAD score >= VAD_VOICE_LEVEL
* we start processing all scores for a time span defined by const PROCESS_TIME_FRAME_SPAN_MS.
* @type {number}
*/
const VAD_VOICE_LEVEL = 0.9;
/**
* Sample rate of TrackVADEmitter, it defines how many audio samples are processed at a time.
* @type {number}
*/
const VAD_EMITTER_SAMPLE_RATE = 4096;
/**
* Time span over which we calculate an average score used to determine if we trigger the event.
* @type {number}
*/
const PROCESS_TIME_FRAME_SPAN_MS = 1500;
/**
* Detect user trying to speak while is locally muted and fires an event using a TrackVADEmitter.
*/
export default class VADTalkMutedDetection extends EventEmitter {
/**
* Creates VADTalkMutedDetection
* @param {JitsiConference} conference - JitsiConference instance that created us.
* @param {Object} createVADProcessor - Function that creates a Voice activity detection processor. The processor
* needs to implement the following functions:
* - getSampleLength() - Returns the sample size accepted by getSampleLength.
* - getRequiredPCMFrequency() - Returns the PCM frequency at which the processor operates.
* - calculateAudioFrameVAD(pcmSample) - Process a 32 float pcm sample of getSampleLength size.
* @constructor
*/
constructor(conference, createVADProcessor) {
super();
/**
* Member function that instantiates a VAD processor.
*/
this._createVADProcessor = createVADProcessor;
/**
* Current {@link TrackVADEmitter}. VAD Emitter uses a {@link JitsiLocalTrack} and VAD processor to generate
* period voice probability scores.
*/
this._vadEmitter = null;
/**
* Flag which denotes the current state of the detection service i.e.if there is already a processing operation
* ongoing.
*/
this._processing = false;
/**
* Buffer that keeps the VAD scores for a period of time.
*/
this._scoreArray = [];
/**
* Promise used to chain create and destroy operations associated with TRACK_ADDED and TRACK_REMOVED events
* coming from the conference.
* Because we have an async created component (VAD Processor) we need to make sure that it's initialized before
* we destroy it ( when changing the device for instance), or when we use it from an external point of entry
* i.e. (TRACK_MUTE_CHANGED event callback).
*/
this._vadInitTracker = null;
this._processVADScore = this._processVADScore.bind(this);
/**
* {@link JitsiConference} bindings.
*/
conference.on(JitsiConferenceEvents.TRACK_ADDED, this._trackAdded.bind(this));
conference.on(JitsiConferenceEvents.TRACK_REMOVED, this._trackRemoved.bind(this));
conference.on(JitsiConferenceEvents.TRACK_MUTE_CHANGED, this._trackMuteChanged.bind(this));
// TODO do we need to handle the case where tracks are removed, make sure this cleans up properly so
// we don't have any leeks i.e. stale JitsiLocalTracks
}
/**
* Determine if the current score is high enough that we should start the final score processing, and make sure
* there isn't already a process operation ongoing.
*
* @param {number} score - PCM sample VAD score.
* @return {boolean}
*/
_shouldStartVADCompute(vadScore) {
return vadScore > VAD_VOICE_LEVEL && !this._processing;
}
/**
* Determine if the computed score over the configured timestamp should trigger an event.
*
* @param {number} computedScore - Computed VAD score.
* @returns {boolean} - Should or shouldn't trigger.
*/
_shouldTriggerNotification(computedScore) {
return computedScore > VAD_AVG_THRESHOLD;
}
/**
* Start the {@link TrackVADEmitter} and attach the event listener.
* @returns {void}
*/
_startVADEmitter() {
this._vadEmitter.on(VAD_SCORE_PUBLISHED, this._processVADScore);
this._vadEmitter.start();
}
/**
* Stop the {@link TrackVADEmitter} and detach the event listener.
* @returns {void}
*/
_stopVADEmitter() {
this._vadEmitter.removeListener(VAD_SCORE_PUBLISHED, this._processVADScore);
this._vadEmitter.stop();
}
/**
* Calculates the average value of a Float32Array.
*
* @param {Float32Array} scoreArray - Array of vad scores.
* @returns {number} - Score average.
*/
_calculateAverage(scoreArray) {
let avg = 0;
if (scoreArray.length) {
const sum = scoreArray.reduce((a, b) => a + b);
avg = sum / scoreArray.length;
}
return avg;
}
/**
* Compute cumulative VAD score function called once the PROCESS_TIME_FRAME_SPAN_MS timeout has elapsed.
* @returns {void}
* @fires VAD_TALK_WHILE_MUTED
*/
_calculateVADScore() {
const score = this._calculateAverage(this._scoreArray);
if (this._shouldTriggerNotification(score)) {
/**
* User is talking while the mic is muted, generate event.
*
* @event VAD_TALK_WHILE_MUTED.
* @type {Object}
*/
this.emit(VAD_TALK_WHILE_MUTED, {});
// Event was fired. Stop event emitter and remove listeners so no residue events kick off after this point
// and a single VAD_TALK_WHILE_MUTED is generated per mic muted state.
this._stopVADEmitter();
}
// We reset the context in case a new process phase needs to be triggered.
this._reset();
}
/**
* Listens for {@link TrackVADEmitter} events and processes them.
*
* @param {Object} vadScore -VAD score emitted by {@link TrackVADEmitter}
* @param {Date} vadScore.timestamp - Exact time at which processed PCM sample was generated.
* @param {number} vadScore.score - VAD score on a scale from 0 to 1 (i.e. 0.7)
* @param {string} vadScore.deviceId - Device id of the associated track.
* @listens VAD_SCORE_PUBLISHED
*/
_processVADScore(vadScore) {
// Because we remove all listeners on the vadEmitter once the main event is triggered,
// there is no need to check for rogue events.
if (this._shouldStartVADCompute(vadScore.score)) {
this._processing = true;
// Start gathering VAD scores for the configured period of time.
this._processTimeout = setTimeout(this._calculateVADScore.bind(this), PROCESS_TIME_FRAME_SPAN_MS);
}
// There is a processing phase on going, add score to buffer array.
if (this._processing) {
this._scoreArray.push(vadScore.score);
}
}
/**
* Reset the processing context, clear buffer, cancel the timeout trigger.
*
* @returns {void}
*/
_reset() {
this._processing = false;
this._scoreArray = [];
clearTimeout(this._processTimeout);
}
/**
* Notifies the detector that a track was added to the associated {@link JitsiConference}.
* Only take into account local audio tracks.
* @param {JitsiTrack} track - The added track.
* @returns {void}
* @listens TRACK_ADDED
*/
_trackAdded(track) {
if (track.isLocalAudioTrack()) {
// Keep a track promise so we take into account successive TRACK_ADD events being generated so that we
// destroy/create the processing context in the proper order.
this._vadInitTracker = this._createVADProcessor()
.then(vadProcessor =>
TrackVADEmitter.create(track.getDeviceId(), VAD_EMITTER_SAMPLE_RATE, vadProcessor)
)
.then(vadEmitter => {
logger.debug('Created VAD emitter for track: ', track.getTrackLabel());
this._vadEmitter = vadEmitter;
if (track.isMuted()) {
this._startVADEmitter();
}
});
}
}
/**
* Notifies the detector that the mute state of a {@link JitsiConference} track has changed. Only takes into account
* local audio tracks. In case the track was muted the detector starts the {@link TrackVADEmitter} otherwise it's
* stopped.
* @param {JitsiTrack} track - The track whose mute state has changed.
* @returns {void}
* @listens TRACK_MUTE_CHANGED
*/
_trackMuteChanged(track) {
if (track.isLocalAudioTrack() && this._vadInitTracker) {
// On a mute toggle reset the state.
this._vadInitTracker.then(() => {
// Reset the processing context in between muted states so that each individual mute phase can generate
// it's own event.
this._reset();
if (track.isMuted()) {
this._startVADEmitter();
} else {
this._stopVADEmitter();
}
});
}
}
/**
* Notifies the detector that a track associated with the {@link JitsiConference} was removed. Only takes into
* account local audio tracks. Cleans up resources associated with the track and resets the processing context.
*
* @param {JitsiTrack} track - The removed track.
* @returns {void}
* @listens TRACK_REMOVED
*/
_trackRemoved(track) {
if (track.isLocalAudioTrack() && this._vadInitTracker) {
// Use the promise to make sure operations are in sequence.
this._vadInitTracker.then(() => {
logger.debug('Removing track from VAD detection - ', track.getTrackLabel());
if (this._vadEmitter) {
this._stopVADEmitter();
this._reset();
this._vadEmitter.destroy();
this._vadEmitter = null;
}
this._vadInitTracker = null;
});
}
}
}