Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

RnnoiseProcessor.ts 6.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. /* eslint-disable no-bitwise */
  2. interface IRnnoiseModule extends EmscriptenModule {
  3. _rnnoise_create: () => number;
  4. _rnnoise_destroy: (context: number) => void;
  5. _rnnoise_process_frame: (context: number, input: number, output: number) => number;
  6. }
  7. /**
  8. * Constant. Rnnoise default sample size, samples of different size won't work.
  9. */
  10. export const RNNOISE_SAMPLE_LENGTH = 480;
  11. /**
  12. * Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4.
  13. */
  14. const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4;
  15. /**
  16. * Constant. Rnnoise only takes operates on 44.1Khz float 32 little endian PCM.
  17. */
  18. const PCM_FREQUENCY = 44100;
  19. /**
  20. * Used to shift a 32 bit number by 16 bits.
  21. */
  22. const SHIFT_16_BIT_NR = 32768;
  23. /**
  24. * Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
  25. * memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
  26. * detection) scores.
  27. */
  28. export default class RnnoiseProcessor {
  29. /**
  30. * Rnnoise context object needed to perform the audio processing.
  31. */
  32. private _context: number;
  33. /**
  34. * State flag, check if the instance was destroyed.
  35. */
  36. private _destroyed = false;
  37. /**
  38. * WASM interface through which calls to rnnoise are made.
  39. */
  40. private _wasmInterface: IRnnoiseModule;
  41. /**
  42. * WASM dynamic memory buffer used as input for rnnoise processing method.
  43. */
  44. private _wasmPcmInput: number;
  45. /**
  46. * The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
  47. */
  48. private _wasmPcmInputF32Index: number;
  49. /**
  50. * Constructor.
  51. *
  52. * @class
  53. * @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
  54. */
  55. constructor(wasmInterface: IRnnoiseModule) {
  56. // Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
  57. // i.e. in case of exception there are no side effects.
  58. try {
  59. this._wasmInterface = wasmInterface;
  60. // For VAD score purposes only allocate the buffers once and reuse them
  61. this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
  62. this._wasmPcmInputF32Index = this._wasmPcmInput >> 2;
  63. if (!this._wasmPcmInput) {
  64. throw Error('Failed to create wasm input memory buffer!');
  65. }
  66. this._context = this._wasmInterface._rnnoise_create();
  67. } catch (error) {
  68. // release can be called even if not all the components were initialized.
  69. this.destroy();
  70. throw error;
  71. }
  72. }
  73. /**
  74. * Release resources associated with the wasm context. If something goes downhill here
  75. * i.e. Exception is thrown, there is nothing much we can do.
  76. *
  77. * @returns {void}
  78. */
  79. _releaseWasmResources(): void {
  80. // For VAD score purposes only allocate the buffers once and reuse them
  81. if (this._wasmPcmInput) {
  82. this._wasmInterface._free(this._wasmPcmInput);
  83. }
  84. if (this._context) {
  85. this._wasmInterface._rnnoise_destroy(this._context);
  86. }
  87. }
  88. /**
  89. * Rnnoise can only operate on a certain PCM array size.
  90. *
  91. * @returns {number} - The PCM sample array size as required by rnnoise.
  92. */
  93. getSampleLength(): number {
  94. return RNNOISE_SAMPLE_LENGTH;
  95. }
  96. /**
  97. * Rnnoise can only operate on a certain format of PCM sample namely float 32 44.1Kz.
  98. *
  99. * @returns {number} - PCM sample frequency as required by rnnoise.
  100. */
  101. getRequiredPCMFrequency(): number {
  102. return PCM_FREQUENCY;
  103. }
  104. /**
  105. * Release any resources required by the rnnoise context this needs to be called
  106. * before destroying any context that uses the processor.
  107. *
  108. * @returns {void}
  109. */
  110. destroy(): void {
  111. // Attempting to release a non initialized processor, do nothing.
  112. if (this._destroyed) {
  113. return;
  114. }
  115. this._releaseWasmResources();
  116. this._destroyed = true;
  117. }
  118. /**
  119. * Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
  120. * The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
  121. *
  122. * @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
  123. * @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90.
  124. */
  125. calculateAudioFrameVAD(pcmFrame: Float32Array): number {
  126. return this.processAudioFrame(pcmFrame);
  127. }
  128. /**
  129. * Process an audio frame, optionally denoising the input pcmFrame and returning the Voice Activity Detection score
  130. * for a raw Float32 PCM sample Array.
  131. * The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
  132. *
  133. * @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples. Parameter is also used as output
  134. * when {@code shouldDenoise} is true.
  135. * @param {boolean} shouldDenoise - Should the denoised frame be returned in pcmFrame.
  136. * @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
  137. */
  138. processAudioFrame(pcmFrame: Float32Array, shouldDenoise: Boolean = false): number {
  139. // Convert 32 bit Float PCM samples to 16 bit Float PCM samples as that's what rnnoise accepts as input
  140. for (let i = 0; i < RNNOISE_SAMPLE_LENGTH; i++) {
  141. this._wasmInterface.HEAPF32[this._wasmPcmInputF32Index + i] = pcmFrame[i] * SHIFT_16_BIT_NR;
  142. }
  143. // Use the same buffer for input/output, rnnoise supports this behavior
  144. const vadScore = this._wasmInterface._rnnoise_process_frame(
  145. this._context,
  146. this._wasmPcmInput,
  147. this._wasmPcmInput
  148. );
  149. // Rnnoise denoises the frame by default but we can avoid unnecessary operations if the calling
  150. // client doesn't use the denoised frame.
  151. if (shouldDenoise) {
  152. // Convert back to 32 bit PCM
  153. for (let i = 0; i < RNNOISE_SAMPLE_LENGTH; i++) {
  154. pcmFrame[i] = this._wasmInterface.HEAPF32[this._wasmPcmInputF32Index + i] / SHIFT_16_BIT_NR;
  155. }
  156. }
  157. return vadScore;
  158. }
  159. }