modified lib-jitsi-meet dev repo
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

transcriber.js 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. import AudioRecorder from './audioRecorder';
  2. import SphinxService from './transcriptionServices/SphinxTranscriptionService';
  3. const BEFORE_STATE = 'before';
  4. const RECORDING_STATE = 'recording';
  5. const TRANSCRIBING_STATE = 'transcribing';
  6. const FINISHED_STATE = 'finished';
  7. // the amount of characters each line in the transcription will have
  8. const MAXIMUM_SENTENCE_LENGTH = 80;
  9. /**
  10. * This is the main object for handing the Transcription. It interacts with
  11. * the audioRecorder to record every person in a conference and sends the
  12. * recorder audio to a transcriptionService. The returned speech-to-text result
  13. * will be merged to create a transcript
  14. * @param {AudioRecorder} audioRecorder An audioRecorder recording a conference
  15. */
  16. function Transcriber() {
  17. // the object which can record all audio in the conference
  18. this.audioRecorder = new AudioRecorder();
  19. // this object can send the recorder audio to a speech-to-text service
  20. this.transcriptionService = new SphinxService();
  21. // holds a counter to keep track if merging can start
  22. this.counter = null;
  23. // holds the date when transcription started which makes it possible
  24. // to calculate the offset between recordings
  25. this.startTime = null;
  26. // will hold the transcription once it is completed
  27. this.transcription = null;
  28. // this will be a method which will be called once the transcription is done
  29. // with the transcription as parameter
  30. this.callback = null;
  31. // stores all the retrieved speech-to-text results to merge together
  32. // this value will store an Array<Word> object
  33. this.results = [];
  34. // Stores the current state of the transcription process
  35. this.state = BEFORE_STATE;
  36. // Used in the updateTranscription method to add a new line when the
  37. // sentence becomes to long
  38. this.lineLength = 0;
  39. }
  40. /**
  41. * Method to start the transcription process. It will tell the audioRecorder
  42. * to start storing all audio streams and record the start time for merging
  43. * purposes
  44. */
  45. Transcriber.prototype.start = function start() {
  46. if (this.state !== BEFORE_STATE) {
  47. throw new Error(
  48. `The transcription can only start when it's in the "${
  49. BEFORE_STATE}" state. It's currently in the "${
  50. this.state}" state`);
  51. }
  52. this.state = RECORDING_STATE;
  53. this.audioRecorder.start();
  54. this.startTime = new Date();
  55. };
  56. /**
  57. * Method to stop the transcription process. It will tell the audioRecorder to
  58. * stop, and get all the recorded audio to send it to the transcription service
  59. * @param callback a callback which will receive the transcription
  60. */
  61. Transcriber.prototype.stop = function stop(callback) {
  62. if (this.state !== RECORDING_STATE) {
  63. throw new Error(
  64. `The transcription can only stop when it's in the "${
  65. RECORDING_STATE}" state. It's currently in the "${
  66. this.state}" state`);
  67. }
  68. // stop the recording
  69. console.log('stopping recording and sending audio files');
  70. this.audioRecorder.stop();
  71. // and send all recorded audio to the transcription service
  72. const callBack = blobCallBack.bind(null, this);
  73. this.audioRecorder.getRecordingResults().forEach(recordingResult => {
  74. this.transcriptionService.send(recordingResult, callBack);
  75. this.counter++;
  76. });
  77. // set the state to "transcribing" so that maybeMerge() functions correctly
  78. this.state = TRANSCRIBING_STATE;
  79. // and store the callback for later
  80. this.callback = callback;
  81. };
  82. /**
  83. * This method gets the answer from the transcription service, calculates the
  84. * offset and adds is to every Word object. It will also start the merging
  85. * when every send request has been received
  86. *
  87. * note: Make sure to bind this as a Transcription object
  88. * @param {Transcriber} transcriber the transcriber instance
  89. * @param {RecordingResult} answer a RecordingResult object with a defined
  90. * WordArray
  91. */
  92. function blobCallBack(transcriber, answer) {
  93. console.log(
  94. 'retrieved an answer from the transcription service. The answer has an'
  95. + ` array of length: ${answer.wordArray.length}`);
  96. // first add the offset between the start of the transcription and
  97. // the start of the recording to all start and end times
  98. if (answer.wordArray.length > 0) {
  99. let offset = answer.startTime.getUTCMilliseconds()
  100. - transcriber.startTime.getUTCMilliseconds();
  101. // transcriber time will always be earlier
  102. if (offset < 0) {
  103. offset = 0; // presume 0 if it somehow not earlier
  104. }
  105. let array = '[';
  106. answer.wordArray.forEach(wordObject => {
  107. wordObject.begin += offset;
  108. wordObject.end += offset;
  109. array += `${wordObject.word},`;
  110. });
  111. array += ']';
  112. console.log(array);
  113. // give a name value to the Array object so that the merging can access
  114. // the name value without having to use the whole recordingResult object
  115. // in the algorithm
  116. answer.wordArray.name = answer.name;
  117. }
  118. // then store the array and decrease the counter
  119. transcriber.results.push(answer.wordArray);
  120. transcriber.counter--;
  121. console.log(`current counter: ${transcriber.counter}`);
  122. // and check if all results have been received.
  123. transcriber.maybeMerge();
  124. }
  125. /**
  126. * this method will check if the counter is zero. If it is, it will call
  127. * the merging method
  128. */
  129. Transcriber.prototype.maybeMerge = function() {
  130. if (this.state === TRANSCRIBING_STATE && this.counter === 0) {
  131. // make sure to include the events in the result arrays before
  132. // merging starts
  133. this.merge();
  134. }
  135. };
  136. /**
  137. * This method will merge all speech-to-text arrays together in one
  138. * readable transcription string
  139. */
  140. Transcriber.prototype.merge = function() {
  141. console.log(
  142. `starting merge process!\n The length of the array: ${
  143. this.results.length}`);
  144. this.transcription = '';
  145. // the merging algorithm will look over all Word objects who are at pos 0 in
  146. // every array. It will then select the one closest in time to the
  147. // previously placed word, while removing the selected word from its array
  148. // note: words can be skipped the skipped word's begin and end time somehow
  149. // end up between the closest word start and end time
  150. const arrays = this.results;
  151. // arrays of Word objects
  152. const potentialWords = []; // array of the first Word objects
  153. // check if any arrays are already empty and remove them
  154. hasPopulatedArrays(arrays);
  155. // populate all the potential Words for a first time
  156. arrays.forEach(array => pushWordToSortedArray(potentialWords, array));
  157. // keep adding words to transcription until all arrays are exhausted
  158. while (hasPopulatedArrays(arrays)) {
  159. // first select the lowest array;
  160. let lowestWordArray = arrays[0];
  161. arrays.forEach(wordArray => {
  162. if (wordArray[0].begin < lowestWordArray[0].begin) {
  163. lowestWordArray = wordArray;
  164. }
  165. });
  166. // put the word in the transcription
  167. let wordToAdd = lowestWordArray.shift();
  168. this.updateTranscription(wordToAdd, lowestWordArray.name);
  169. // keep going until a word in another array has a smaller time
  170. // or the array is empty
  171. while (lowestWordArray.length > 0) {
  172. let foundSmaller = false;
  173. const wordToCompare = lowestWordArray[0].begin;
  174. arrays.forEach(wordArray => {
  175. if (wordArray[0].begin < wordToCompare) {
  176. foundSmaller = true;
  177. }
  178. });
  179. // add next word if no smaller time has been found
  180. if (foundSmaller) {
  181. break;
  182. }
  183. wordToAdd = lowestWordArray.shift();
  184. this.updateTranscription(wordToAdd, null);
  185. }
  186. }
  187. // set the state to finished and do the necessary left-over tasks
  188. this.state = FINISHED_STATE;
  189. if (this.callback) {
  190. this.callback(this.transcription);
  191. }
  192. };
  193. /**
  194. * Appends a word object to the transcription. It will make a new line with a
  195. * name if a name is specified
  196. * @param {Word} word the Word object holding the word to append
  197. * @param {String|null} name the name of a new speaker. Null if not applicable
  198. */
  199. Transcriber.prototype.updateTranscription = function(word, name) {
  200. if (name !== undefined && name !== null) {
  201. this.transcription += `\n${name}:`;
  202. this.lineLength = name.length + 1; // +1 for the semi-colon
  203. }
  204. if (this.lineLength + word.word.length > MAXIMUM_SENTENCE_LENGTH) {
  205. this.transcription += '\n ';
  206. this.lineLength = 4; // because of the 4 spaces after the new line
  207. }
  208. this.transcription += ` ${word.word}`;
  209. this.lineLength += word.word.length + 1; // +1 for the space
  210. };
  211. /**
  212. * Check if the given 2 dimensional array has any non-zero Word-arrays in them.
  213. * All zero-element arrays inside will be removed
  214. * If any non-zero-element arrays are found, the method will return true.
  215. * otherwise it will return false
  216. * @param {Array<Array>} twoDimensionalArray the array to check
  217. * @returns {boolean} true if any non-zero arrays inside, otherwise false
  218. */
  219. function hasPopulatedArrays(twoDimensionalArray) {
  220. for (let i = 0; i < twoDimensionalArray.length; i++) {
  221. if (twoDimensionalArray[i].length === 0) {
  222. twoDimensionalArray.splice(i, 1);
  223. }
  224. }
  225. return twoDimensionalArray.length > 0;
  226. }
  227. /**
  228. * Push a word to the right location in a sorted array. The array is sorted
  229. * from lowest to highest start time. Every word is stored in an object which
  230. * includes the name of the person saying the word.
  231. *
  232. * @param {Array<Word>} array the sorted array to push to
  233. * @param {Word} word the word to push into the array
  234. */
  235. function pushWordToSortedArray(array, word) {
  236. if (array.length === 0) {
  237. array.push(word);
  238. } else {
  239. if (array[array.length - 1].begin <= word.begin) {
  240. array.push(word);
  241. return;
  242. }
  243. for (let i = 0; i < array.length; i++) {
  244. if (word.begin < array[i].begin) {
  245. array.splice(i, 0, word);
  246. return;
  247. }
  248. }
  249. array.push(word); // fail safe
  250. }
  251. }
  252. /**
  253. * Gives the transcriber a JitsiTrack holding an audioStream to transcribe.
  254. * The JitsiTrack is given to the audioRecorder. If it doesn't hold an
  255. * audiostream, it will not be added by the audioRecorder
  256. * @param {JitsiTrack} track the track to give to the audioRecorder
  257. */
  258. Transcriber.prototype.addTrack = function(track) {
  259. this.audioRecorder.addTrack(track);
  260. };
  261. /**
  262. * Remove the given track from the auioRecorder
  263. * @param track
  264. */
  265. Transcriber.prototype.removeTrack = function(track) {
  266. this.audioRecorder.removeTrack(track);
  267. };
  268. /**
  269. * Will return the created transcription if it's avialable or throw an error
  270. * when it's not done yet
  271. * @returns {String} the transcription as a String
  272. */
  273. Transcriber.prototype.getTranscription = function() {
  274. if (this.state !== FINISHED_STATE) {
  275. throw new Error(
  276. `The transcription can only be retrieved when it's in the "${
  277. FINISHED_STATE}" state. It's currently in the "${
  278. this.state}" state`);
  279. }
  280. return this.transcription;
  281. };
  282. /**
  283. * Returns the current state of the transcription process
  284. */
  285. Transcriber.prototype.getState = function() {
  286. return this.state;
  287. };
  288. /**
  289. * Resets the state to the "before" state, such that it's again possible to
  290. * call the start method
  291. */
  292. Transcriber.prototype.reset = function() {
  293. this.state = BEFORE_STATE;
  294. this.counter = null;
  295. this.transcription = null;
  296. this.startTime = null;
  297. this.callback = null;
  298. this.results = [];
  299. this.lineLength = 0;
  300. };
  301. export default Transcriber;