您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

VADTalkMutedDetection.js 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. import { EventEmitter } from 'events';
  2. import { getLogger } from 'jitsi-meet-logger';
  3. import * as JitsiConferenceEvents from '../../JitsiConferenceEvents';
  4. import { VAD_SCORE_PUBLISHED, VAD_TALK_WHILE_MUTED } from './DetectionEvents';
  5. import TrackVADEmitter from './TrackVADEmitter';
  6. const logger = getLogger(__filename);
  7. /**
  8. * The threshold which the average VAD values for a span of time needs to exceed to trigger an event.
  9. * @type {number}
  10. */
  11. const VAD_AVG_THRESHOLD = 0.6;
  12. /**
  13. * The VAD score needed to trigger the processing algorithm, i.e. if a sample has the VAD score >= VAD_VOICE_LEVEL
  14. * we start processing all scores for a time span defined by const PROCESS_TIME_FRAME_SPAN_MS.
  15. * @type {number}
  16. */
  17. const VAD_VOICE_LEVEL = 0.9;
  18. /**
  19. * Sample rate of TrackVADEmitter, it defines how many audio samples are processed at a time.
  20. * @type {number}
  21. */
  22. const VAD_EMITTER_SAMPLE_RATE = 4096;
  23. /**
  24. * Time span over which we calculate an average score used to determine if we trigger the event.
  25. * @type {number}
  26. */
  27. const PROCESS_TIME_FRAME_SPAN_MS = 1500;
  28. /**
  29. * Detect user trying to speak while is locally muted and fires an event using a TrackVADEmitter.
  30. */
  31. export default class VADTalkMutedDetection extends EventEmitter {
  32. /**
  33. * Creates <tt>VADTalkMutedDetection</tt>
  34. * @param {JitsiConference} conference - JitsiConference instance that created us.
  35. * @param {Object} createVADProcessor - Function that creates a Voice activity detection processor. The processor
  36. * needs to implement the following functions:
  37. * - <tt>getSampleLength()</tt> - Returns the sample size accepted by getSampleLength.
  38. * - <tt>getRequiredPCMFrequency()</tt> - Returns the PCM frequency at which the processor operates.
  39. * - <tt>calculateAudioFrameVAD(pcmSample)</tt> - Process a 32 float pcm sample of getSampleLength size.
  40. * @constructor
  41. */
  42. constructor(conference, createVADProcessor) {
  43. super();
  44. /**
  45. * Member function that instantiates a VAD processor.
  46. */
  47. this._createVADProcessor = createVADProcessor;
  48. /**
  49. * Current {@link TrackVADEmitter}. VAD Emitter uses a {@link JitsiLocalTrack} and VAD processor to generate
  50. * period voice probability scores.
  51. */
  52. this._vadEmitter = null;
  53. /**
  54. * Flag which denotes the current state of the detection service i.e.if there is already a processing operation
  55. * ongoing.
  56. */
  57. this._processing = false;
  58. /**
  59. * Buffer that keeps the VAD scores for a period of time.
  60. */
  61. this._scoreArray = [];
  62. /**
  63. * Promise used to chain create and destroy operations associated with TRACK_ADDED and TRACK_REMOVED events
  64. * coming from the conference.
  65. * Because we have an async created component (VAD Processor) we need to make sure that it's initialized before
  66. * we destroy it ( when changing the device for instance), or when we use it from an external point of entry
  67. * i.e. (TRACK_MUTE_CHANGED event callback).
  68. */
  69. this._vadInitTracker = Promise.resolve();
  70. /**
  71. * Listens for {@link TrackVADEmitter} events and processes them.
  72. */
  73. this._processVADScore = this._processVADScore.bind(this);
  74. /**
  75. * {@link JitsiConference} bindings.
  76. */
  77. conference.on(JitsiConferenceEvents.TRACK_ADDED, this._trackAdded.bind(this));
  78. conference.on(JitsiConferenceEvents.TRACK_REMOVED, this._trackRemoved.bind(this));
  79. conference.on(JitsiConferenceEvents.TRACK_MUTE_CHANGED, this._trackMuteChanged.bind(this));
  80. }
  81. /**
  82. * Start the {@link TrackVADEmitter} and attach the event listener.
  83. * @returns {void}
  84. */
  85. _startVADEmitter() {
  86. this._vadEmitter.on(VAD_SCORE_PUBLISHED, this._processVADScore);
  87. this._vadEmitter.start();
  88. }
  89. /**
  90. * Stop the {@link TrackVADEmitter} and detach the event listener.
  91. * @returns {void}
  92. */
  93. _stopVADEmitter() {
  94. this._vadEmitter.removeListener(VAD_SCORE_PUBLISHED, this._processVADScore);
  95. this._vadEmitter.stop();
  96. }
  97. /**
  98. * Calculates the average value of a Float32Array.
  99. *
  100. * @param {Float32Array} scoreArray - Array of vad scores.
  101. * @returns {number} - Score average.
  102. */
  103. _calculateAverage(scoreArray) {
  104. return scoreArray.length > 0 ? scoreArray.reduce((a, b) => a + b) / scoreArray.length : 0;
  105. }
  106. /**
  107. * Compute cumulative VAD score function called once the PROCESS_TIME_FRAME_SPAN_MS timeout has elapsed.
  108. * @returns {void}
  109. * @fires VAD_TALK_WHILE_MUTED
  110. */
  111. _calculateVADScore() {
  112. const score = this._calculateAverage(this._scoreArray);
  113. if (score > VAD_AVG_THRESHOLD) {
  114. this.emit(VAD_TALK_WHILE_MUTED, {});
  115. // Event was fired. Stop event emitter and remove listeners so no residue events kick off after this point
  116. // and a single VAD_TALK_WHILE_MUTED is generated per mic muted state.
  117. this._stopVADEmitter();
  118. }
  119. // We reset the context in case a new process phase needs to be triggered.
  120. this._reset();
  121. }
  122. /**
  123. * Listens for {@link TrackVADEmitter} events and processes them.
  124. *
  125. * @param {Object} vadScore -VAD score emitted by {@link TrackVADEmitter}
  126. * @param {Date} vadScore.timestamp - Exact time at which processed PCM sample was generated.
  127. * @param {number} vadScore.score - VAD score on a scale from 0 to 1 (i.e. 0.7)
  128. * @param {string} vadScore.deviceId - Device id of the associated track.
  129. * @listens VAD_SCORE_PUBLISHED
  130. */
  131. _processVADScore(vadScore) {
  132. // Because we remove all listeners on the vadEmitter once the main event is triggered,
  133. // there is no need to check for rogue events.
  134. if (vadScore.score > VAD_VOICE_LEVEL && !this._processing) {
  135. this._processing = true;
  136. // Start gathering VAD scores for the configured period of time.
  137. this._processTimeout = setTimeout(this._calculateVADScore.bind(this), PROCESS_TIME_FRAME_SPAN_MS);
  138. }
  139. // There is a processing phase on going, add score to buffer array.
  140. if (this._processing) {
  141. this._scoreArray.push(vadScore.score);
  142. }
  143. }
  144. /**
  145. * Reset the processing context, clear buffer, cancel the timeout trigger.
  146. *
  147. * @returns {void}
  148. */
  149. _reset() {
  150. this._processing = false;
  151. this._scoreArray = [];
  152. clearTimeout(this._processTimeout);
  153. }
  154. /**
  155. * Notifies the detector that a track was added to the associated {@link JitsiConference}.
  156. * Only take into account local audio tracks.
  157. * @param {JitsiTrack} track - The added track.
  158. * @returns {void}
  159. * @listens TRACK_ADDED
  160. */
  161. _trackAdded(track) {
  162. if (track.isLocalAudioTrack()) {
  163. // Keep a track promise so we take into account successive TRACK_ADD events being generated so that we
  164. // destroy/create the processing context in the proper order.
  165. this._vadInitTracker = this._vadInitTracker.then(() => this._createVADProcessor())
  166. .then(vadProcessor =>
  167. TrackVADEmitter.create(track.getDeviceId(), VAD_EMITTER_SAMPLE_RATE, vadProcessor)
  168. )
  169. .then(vadEmitter => {
  170. logger.debug('Created VAD emitter for track: ', track.getTrackLabel());
  171. this._vadEmitter = vadEmitter;
  172. if (track.isMuted()) {
  173. this._startVADEmitter();
  174. }
  175. });
  176. }
  177. }
  178. /**
  179. * Notifies the detector that the mute state of a {@link JitsiConference} track has changed. Only takes into account
  180. * local audio tracks. In case the track was muted the detector starts the {@link TrackVADEmitter} otherwise it's
  181. * stopped.
  182. * @param {JitsiTrack} track - The track whose mute state has changed.
  183. * @returns {void}
  184. * @listens TRACK_MUTE_CHANGED
  185. */
  186. _trackMuteChanged(track) {
  187. if (track.isLocalAudioTrack()) {
  188. // On a mute toggle reset the state.
  189. this._vadInitTracker = this._vadInitTracker.then(() => {
  190. // Reset the processing context in between muted states so that each individual mute phase can generate
  191. // it's own event.
  192. this._reset();
  193. if (track.isMuted()) {
  194. this._startVADEmitter();
  195. } else {
  196. this._stopVADEmitter();
  197. }
  198. });
  199. }
  200. }
  201. /**
  202. * Notifies the detector that a track associated with the {@link JitsiConference} was removed. Only takes into
  203. * account local audio tracks. Cleans up resources associated with the track and resets the processing context.
  204. *
  205. * @param {JitsiTrack} track - The removed track.
  206. * @returns {void}
  207. * @listens TRACK_REMOVED
  208. */
  209. _trackRemoved(track) {
  210. if (track.isLocalAudioTrack()) {
  211. // Use the promise to make sure operations are in sequence.
  212. this._vadInitTracker = this._vadInitTracker.then(() => {
  213. logger.debug('Removing track from VAD detection - ', track.getTrackLabel());
  214. if (this._vadEmitter) {
  215. this._stopVADEmitter();
  216. this._reset();
  217. this._vadEmitter.destroy();
  218. this._vadEmitter = null;
  219. }
  220. });
  221. }
  222. }
  223. }