You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

VADTalkMutedDetection.js 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. import { EventEmitter } from 'events';
  2. import { VAD_SCORE_PUBLISHED, VAD_TALK_WHILE_MUTED } from './DetectionEvents';
  3. import { getLogger } from 'jitsi-meet-logger';
  4. import TrackVADEmitter from './TrackVADEmitter';
  5. import * as JitsiConferenceEvents from '../../JitsiConferenceEvents';
  6. const logger = getLogger(__filename);
  7. /**
  8. * The threshold which the average VAD values for a span of time needs to exceed to trigger an event.
  9. * @type {number}
  10. */
  11. const VAD_AVG_THRESHOLD = 0.6;
  12. /**
  13. * The VAD score needed to trigger the processing algorithm, i.e. if a sample has the VAD score >= VAD_VOICE_LEVEL
  14. * we start processing all scores for a time span defined by const PROCESS_TIME_FRAME_SPAN_MS.
  15. * @type {number}
  16. */
  17. const VAD_VOICE_LEVEL = 0.9;
  18. /**
  19. * Sample rate of TrackVADEmitter, it defines how many audio samples are processed at a time.
  20. * @type {number}
  21. */
  22. const VAD_EMITTER_SAMPLE_RATE = 4096;
  23. /**
  24. * Time span over which we calculate an average score used to determine if we trigger the event.
  25. * @type {number}
  26. */
  27. const PROCESS_TIME_FRAME_SPAN_MS = 1500;
  28. /**
  29. * Detect user trying to speak while is locally muted and fires an event using a TrackVADEmitter.
  30. */
  31. export default class VADTalkMutedDetection extends EventEmitter {
  32. /**
  33. * Creates <tt>VADTalkMutedDetection</tt>
  34. * @param {JitsiConference} conference - JitsiConference instance that created us.
  35. * @param {Object} createVADProcessor - Function that creates a Voice activity detection processor. The processor
  36. * needs to implement the following functions:
  37. * - <tt>getSampleLength()</tt> - Returns the sample size accepted by getSampleLength.
  38. * - <tt>getRequiredPCMFrequency()</tt> - Returns the PCM frequency at which the processor operates.
  39. * - <tt>calculateAudioFrameVAD(pcmSample)</tt> - Process a 32 float pcm sample of getSampleLength size.
  40. * @constructor
  41. */
  42. constructor(conference, createVADProcessor) {
  43. super();
  44. /**
  45. * Member function that instantiates a VAD processor.
  46. */
  47. this._createVADProcessor = createVADProcessor;
  48. /**
  49. * Current {@link TrackVADEmitter}. VAD Emitter uses a {@link JitsiLocalTrack} and VAD processor to generate
  50. * period voice probability scores.
  51. */
  52. this._vadEmitter = null;
  53. /**
  54. * Flag which denotes the current state of the detection service i.e.if there is already a processing operation
  55. * ongoing.
  56. */
  57. this._processing = false;
  58. /**
  59. * Buffer that keeps the VAD scores for a period of time.
  60. */
  61. this._scoreArray = [];
  62. /**
  63. * Promise used to chain create and destroy operations associated with TRACK_ADDED and TRACK_REMOVED events
  64. * coming from the conference.
  65. * Because we have an async created component (VAD Processor) we need to make sure that it's initialized before
  66. * we destroy it ( when changing the device for instance), or when we use it from an external point of entry
  67. * i.e. (TRACK_MUTE_CHANGED event callback).
  68. */
  69. this._vadInitTracker = null;
  70. this._processVADScore = this._processVADScore.bind(this);
  71. /**
  72. * {@link JitsiConference} bindings.
  73. */
  74. conference.on(JitsiConferenceEvents.TRACK_ADDED, this._trackAdded.bind(this));
  75. conference.on(JitsiConferenceEvents.TRACK_REMOVED, this._trackRemoved.bind(this));
  76. conference.on(JitsiConferenceEvents.TRACK_MUTE_CHANGED, this._trackMuteChanged.bind(this));
  77. // TODO do we need to handle the case where tracks are removed, make sure this cleans up properly so
  78. // we don't have any leeks i.e. stale JitsiLocalTracks
  79. }
  80. /**
  81. * Determine if the current score is high enough that we should start the final score processing, and make sure
  82. * there isn't already a process operation ongoing.
  83. *
  84. * @param {number} score - PCM sample VAD score.
  85. * @return {boolean}
  86. */
  87. _shouldStartVADCompute(vadScore) {
  88. return vadScore > VAD_VOICE_LEVEL && !this._processing;
  89. }
  90. /**
  91. * Determine if the computed score over the configured timestamp should trigger an event.
  92. *
  93. * @param {number} computedScore - Computed VAD score.
  94. * @returns {boolean} - Should or shouldn't trigger.
  95. */
  96. _shouldTriggerNotification(computedScore) {
  97. return computedScore > VAD_AVG_THRESHOLD;
  98. }
  99. /**
  100. * Start the {@link TrackVADEmitter} and attach the event listener.
  101. * @returns {void}
  102. */
  103. _startVADEmitter() {
  104. this._vadEmitter.on(VAD_SCORE_PUBLISHED, this._processVADScore);
  105. this._vadEmitter.start();
  106. }
  107. /**
  108. * Stop the {@link TrackVADEmitter} and detach the event listener.
  109. * @returns {void}
  110. */
  111. _stopVADEmitter() {
  112. this._vadEmitter.removeListener(VAD_SCORE_PUBLISHED, this._processVADScore);
  113. this._vadEmitter.stop();
  114. }
  115. /**
  116. * Calculates the average value of a Float32Array.
  117. *
  118. * @param {Float32Array} scoreArray - Array of vad scores.
  119. * @returns {number} - Score average.
  120. */
  121. _calculateAverage(scoreArray) {
  122. let avg = 0;
  123. if (scoreArray.length) {
  124. const sum = scoreArray.reduce((a, b) => a + b);
  125. avg = sum / scoreArray.length;
  126. }
  127. return avg;
  128. }
  129. /**
  130. * Compute cumulative VAD score function called once the PROCESS_TIME_FRAME_SPAN_MS timeout has elapsed.
  131. * @returns {void}
  132. * @fires VAD_TALK_WHILE_MUTED
  133. */
  134. _calculateVADScore() {
  135. const score = this._calculateAverage(this._scoreArray);
  136. if (this._shouldTriggerNotification(score)) {
  137. /**
  138. * User is talking while the mic is muted, generate event.
  139. *
  140. * @event VAD_TALK_WHILE_MUTED.
  141. * @type {Object}
  142. */
  143. this.emit(VAD_TALK_WHILE_MUTED, {});
  144. // Event was fired. Stop event emitter and remove listeners so no residue events kick off after this point
  145. // and a single VAD_TALK_WHILE_MUTED is generated per mic muted state.
  146. this._stopVADEmitter();
  147. }
  148. // We reset the context in case a new process phase needs to be triggered.
  149. this._reset();
  150. }
  151. /**
  152. * Listens for {@link TrackVADEmitter} events and processes them.
  153. *
  154. * @param {Object} vadScore -VAD score emitted by {@link TrackVADEmitter}
  155. * @param {Date} vadScore.timestamp - Exact time at which processed PCM sample was generated.
  156. * @param {number} vadScore.score - VAD score on a scale from 0 to 1 (i.e. 0.7)
  157. * @param {string} vadScore.deviceId - Device id of the associated track.
  158. * @listens VAD_SCORE_PUBLISHED
  159. */
  160. _processVADScore(vadScore) {
  161. // Because we remove all listeners on the vadEmitter once the main event is triggered,
  162. // there is no need to check for rogue events.
  163. if (this._shouldStartVADCompute(vadScore.score)) {
  164. this._processing = true;
  165. // Start gathering VAD scores for the configured period of time.
  166. this._processTimeout = setTimeout(this._calculateVADScore.bind(this), PROCESS_TIME_FRAME_SPAN_MS);
  167. }
  168. // There is a processing phase on going, add score to buffer array.
  169. if (this._processing) {
  170. this._scoreArray.push(vadScore.score);
  171. }
  172. }
  173. /**
  174. * Reset the processing context, clear buffer, cancel the timeout trigger.
  175. *
  176. * @returns {void}
  177. */
  178. _reset() {
  179. this._processing = false;
  180. this._scoreArray = [];
  181. clearTimeout(this._processTimeout);
  182. }
  183. /**
  184. * Notifies the detector that a track was added to the associated {@link JitsiConference}.
  185. * Only take into account local audio tracks.
  186. * @param {JitsiTrack} track - The added track.
  187. * @returns {void}
  188. * @listens TRACK_ADDED
  189. */
  190. _trackAdded(track) {
  191. if (track.isLocalAudioTrack()) {
  192. // Keep a track promise so we take into account successive TRACK_ADD events being generated so that we
  193. // destroy/create the processing context in the proper order.
  194. this._vadInitTracker = this._createVADProcessor()
  195. .then(vadProcessor =>
  196. TrackVADEmitter.create(track.getDeviceId(), VAD_EMITTER_SAMPLE_RATE, vadProcessor)
  197. )
  198. .then(vadEmitter => {
  199. logger.debug('Created VAD emitter for track: ', track.getTrackLabel());
  200. this._vadEmitter = vadEmitter;
  201. if (track.isMuted()) {
  202. this._startVADEmitter();
  203. }
  204. });
  205. }
  206. }
  207. /**
  208. * Notifies the detector that the mute state of a {@link JitsiConference} track has changed. Only takes into account
  209. * local audio tracks. In case the track was muted the detector starts the {@link TrackVADEmitter} otherwise it's
  210. * stopped.
  211. * @param {JitsiTrack} track - The track whose mute state has changed.
  212. * @returns {void}
  213. * @listens TRACK_MUTE_CHANGED
  214. */
  215. _trackMuteChanged(track) {
  216. if (track.isLocalAudioTrack() && this._vadInitTracker) {
  217. // On a mute toggle reset the state.
  218. this._vadInitTracker.then(() => {
  219. // Reset the processing context in between muted states so that each individual mute phase can generate
  220. // it's own event.
  221. this._reset();
  222. if (track.isMuted()) {
  223. this._startVADEmitter();
  224. } else {
  225. this._stopVADEmitter();
  226. }
  227. });
  228. }
  229. }
  230. /**
  231. * Notifies the detector that a track associated with the {@link JitsiConference} was removed. Only takes into
  232. * account local audio tracks. Cleans up resources associated with the track and resets the processing context.
  233. *
  234. * @param {JitsiTrack} track - The removed track.
  235. * @returns {void}
  236. * @listens TRACK_REMOVED
  237. */
  238. _trackRemoved(track) {
  239. if (track.isLocalAudioTrack() && this._vadInitTracker) {
  240. // Use the promise to make sure operations are in sequence.
  241. this._vadInitTracker.then(() => {
  242. logger.debug('Removing track from VAD detection - ', track.getTrackLabel());
  243. if (this._vadEmitter) {
  244. this._stopVADEmitter();
  245. this._reset();
  246. this._vadEmitter.destroy();
  247. this._vadEmitter = null;
  248. }
  249. this._vadInitTracker = null;
  250. });
  251. }
  252. }
  253. }