您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

RnnoiseProcessor.js 6.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. // @flow
  2. /**
  3. * Constant. Rnnoise default sample size, samples of different size won't work.
  4. */
  5. export const RNNOISE_SAMPLE_LENGTH: number = 480;
  6. /**
  7. * Constant. Rnnoise only takes inputs of 480 PCM float32 samples thus 480*4.
  8. */
  9. const RNNOISE_BUFFER_SIZE: number = RNNOISE_SAMPLE_LENGTH * 4;
  10. /**
  11. * Constant. Rnnoise only takes operates on 44.1Khz float 32 little endian PCM.
  12. */
  13. const PCM_FREQUENCY: number = 44100;
  14. /**
  15. * Represents an adaptor for the rnnoise library compiled to webassembly. The class takes care of webassembly
  16. * memory management and exposes rnnoise functionality such as PCM audio denoising and VAD (voice activity
  17. * detection) scores.
  18. */
  19. export default class RnnoiseProcessor {
  20. /**
  21. * Rnnoise context object needed to perform the audio processing.
  22. */
  23. _context: ?Object;
  24. /**
  25. * State flag, check if the instance was destroyed.
  26. */
  27. _destroyed: boolean = false;
  28. /**
  29. * WASM interface through which calls to rnnoise are made.
  30. */
  31. _wasmInterface: Object;
  32. /**
  33. * WASM dynamic memory buffer used as input for rnnoise processing method.
  34. */
  35. _wasmPcmInput: Object;
  36. /**
  37. * The Float32Array index representing the start point in the wasm heap of the _wasmPcmInput buffer.
  38. */
  39. _wasmPcmInputF32Index: number;
  40. /**
  41. * WASM dynamic memory buffer used as output for rnnoise processing method.
  42. */
  43. _wasmPcmOutput: Object;
  44. /**
  45. * Constructor.
  46. *
  47. * @class
  48. * @param {Object} wasmInterface - WebAssembly module interface that exposes rnnoise functionality.
  49. */
  50. constructor(wasmInterface: Object) {
  51. // Considering that we deal with dynamic allocated memory employ exception safety strong guarantee
  52. // i.e. in case of exception there are no side effects.
  53. try {
  54. this._wasmInterface = wasmInterface;
  55. // For VAD score purposes only allocate the buffers once and reuse them
  56. this._wasmPcmInput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
  57. if (!this._wasmPcmInput) {
  58. throw Error('Failed to create wasm input memory buffer!');
  59. }
  60. this._wasmPcmOutput = this._wasmInterface._malloc(RNNOISE_BUFFER_SIZE);
  61. if (!this._wasmPcmOutput) {
  62. wasmInterface._free(this._wasmPcmInput);
  63. throw Error('Failed to create wasm output memory buffer!');
  64. }
  65. // The HEAPF32.set function requires an index relative to a Float32 array view of the wasm memory model
  66. // which is an array of bytes. This means we have to divide it by the size of a float to get the index
  67. // relative to a Float32 Array.
  68. this._wasmPcmInputF32Index = this._wasmPcmInput / 4;
  69. this._context = this._wasmInterface._rnnoise_create();
  70. } catch (error) {
  71. // release can be called even if not all the components were initialized.
  72. this._releaseWasmResources();
  73. throw error;
  74. }
  75. }
  76. /**
  77. * Copy the input PCM Audio Sample to the wasm input buffer.
  78. *
  79. * @param {Float32Array} pcmSample - Array containing 16 bit format PCM sample stored in 32 Floats .
  80. * @returns {void}
  81. */
  82. _copyPCMSampleToWasmBuffer(pcmSample: Float32Array) {
  83. this._wasmInterface.HEAPF32.set(pcmSample, this._wasmPcmInputF32Index);
  84. }
  85. /**
  86. * Convert 32 bit Float PCM samples to 16 bit Float PCM samples and store them in 32 bit Floats.
  87. *
  88. * @param {Float32Array} f32Array - Array containing 32 bit PCM samples.
  89. * @returns {void}
  90. */
  91. _convertTo16BitPCM(f32Array: Float32Array) {
  92. for (const [ index, value ] of f32Array.entries()) {
  93. f32Array[index] = value * 0x7fff;
  94. }
  95. }
  96. /**
  97. * Release resources associated with the wasm context. If something goes downhill here
  98. * i.e. Exception is thrown, there is nothing much we can do.
  99. *
  100. * @returns {void}
  101. */
  102. _releaseWasmResources() {
  103. // For VAD score purposes only allocate the buffers once and reuse them
  104. if (this._wasmPcmInput) {
  105. this._wasmInterface._free(this._wasmPcmInput);
  106. this._wasmPcmInput = null;
  107. }
  108. if (this._wasmPcmOutput) {
  109. this._wasmInterface._free(this._wasmPcmOutput);
  110. this._wasmPcmOutput = null;
  111. }
  112. if (this._context) {
  113. this._wasmInterface._rnnoise_destroy(this._context);
  114. this._context = null;
  115. }
  116. }
  117. /**
  118. * Rnnoise can only operate on a certain PCM array size.
  119. *
  120. * @returns {number} - The PCM sample array size as required by rnnoise.
  121. */
  122. getSampleLength() {
  123. return RNNOISE_SAMPLE_LENGTH;
  124. }
  125. /**
  126. * Rnnoise can only operate on a certain format of PCM sample namely float 32 44.1Kz.
  127. *
  128. * @returns {number} - PCM sample frequency as required by rnnoise.
  129. */
  130. getRequiredPCMFrequency() {
  131. return PCM_FREQUENCY;
  132. }
  133. /**
  134. * Release any resources required by the rnnoise context this needs to be called
  135. * before destroying any context that uses the processor.
  136. *
  137. * @returns {void}
  138. */
  139. destroy() {
  140. // Attempting to release a non initialized processor, do nothing.
  141. if (this._destroyed) {
  142. return;
  143. }
  144. this._releaseWasmResources();
  145. this._destroyed = true;
  146. }
  147. /**
  148. * Calculate the Voice Activity Detection for a raw Float32 PCM sample Array.
  149. * The size of the array must be of exactly 480 samples, this constraint comes from the rnnoise library.
  150. *
  151. * @param {Float32Array} pcmFrame - Array containing 32 bit PCM samples.
  152. * @returns {Float} Contains VAD score in the interval 0 - 1 i.e. 0.90 .
  153. */
  154. calculateAudioFrameVAD(pcmFrame: Float32Array) {
  155. if (this._destroyed) {
  156. throw new Error('RnnoiseProcessor instance is destroyed, please create another one!');
  157. }
  158. const pcmFrameLength = pcmFrame.length;
  159. if (pcmFrameLength !== RNNOISE_SAMPLE_LENGTH) {
  160. throw new Error(`Rnnoise can only process PCM frames of 480 samples! Input sample was:${pcmFrameLength}`);
  161. }
  162. this._convertTo16BitPCM(pcmFrame);
  163. this._copyPCMSampleToWasmBuffer(pcmFrame);
  164. return this._wasmInterface._rnnoise_process_frame(this._context, this._wasmPcmOutput, this._wasmPcmInput);
  165. }
  166. }