You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. const AudioRecorder = require('./audioRecorder');
  2. const SphinxService = require(
  3. './transcriptionServices/SphinxTranscriptionService');
  4. const BEFORE_STATE = 'before';
  5. const RECORDING_STATE = 'recording';
  6. const TRANSCRIBING_STATE = 'transcribing';
  7. const FINISHED_STATE = 'finished';
  8. // the amount of characters each line in the transcription will have
  9. const MAXIMUM_SENTENCE_LENGTH = 80;
  10. /**
  11. * This is the main object for handing the Transcription. It interacts with
  12. * the audioRecorder to record every person in a conference and sends the
  13. * recorder audio to a transcriptionService. The returned speech-to-text result
  14. * will be merged to create a transcript
  15. * @param {AudioRecorder} audioRecorder An audioRecorder recording a conference
  16. */
  17. const transcriber = function() {
  18. // the object which can record all audio in the conference
  19. this.audioRecorder = new AudioRecorder();
  20. // this object can send the recorder audio to a speech-to-text service
  21. this.transcriptionService = new SphinxService();
  22. // holds a counter to keep track if merging can start
  23. this.counter = null;
  24. // holds the date when transcription started which makes it possible
  25. // to calculate the offset between recordings
  26. this.startTime = null;
  27. // will hold the transcription once it is completed
  28. this.transcription = null;
  29. // this will be a method which will be called once the transcription is done
  30. // with the transcription as parameter
  31. this.callback = null;
  32. // stores all the retrieved speech-to-text results to merge together
  33. // this value will store an Array<Word> object
  34. this.results = [];
  35. // Stores the current state of the transcription process
  36. this.state = BEFORE_STATE;
  37. // Used in the updateTranscription method to add a new line when the
  38. // sentence becomes to long
  39. this.lineLength = 0;
  40. };
  41. /**
  42. * Method to start the transcription process. It will tell the audioRecorder
  43. * to start storing all audio streams and record the start time for merging
  44. * purposes
  45. */
  46. transcriber.prototype.start = function start() {
  47. if(this.state !== BEFORE_STATE) {
  48. throw new Error(
  49. `The transcription can only start when it's in the "${
  50. BEFORE_STATE}" state. It's currently in the "${
  51. this.state}" state`);
  52. }
  53. this.state = RECORDING_STATE;
  54. this.audioRecorder.start();
  55. this.startTime = new Date();
  56. };
  57. /**
  58. * Method to stop the transcription process. It will tell the audioRecorder to
  59. * stop, and get all the recorded audio to send it to the transcription service
  60. * @param callback a callback which will receive the transcription
  61. */
  62. transcriber.prototype.stop = function stop(callback) {
  63. if(this.state !== RECORDING_STATE) {
  64. throw new Error(
  65. `The transcription can only stop when it's in the "${
  66. RECORDING_STATE}" state. It's currently in the "${
  67. this.state}" state`);
  68. }
  69. // stop the recording
  70. console.log('stopping recording and sending audio files');
  71. this.audioRecorder.stop();
  72. // and send all recorded audio the the transcription service
  73. const t = this;
  74. const callBack = blobCallBack.bind(this);
  75. this.audioRecorder.getRecordingResults().forEach(function(recordingResult) {
  76. t.transcriptionService.send(recordingResult, callBack);
  77. t.counter++;
  78. });
  79. // set the state to "transcribing" so that maybeMerge() functions correctly
  80. this.state = TRANSCRIBING_STATE;
  81. // and store the callback for later
  82. this.callback = callback;
  83. };
  84. /**
  85. * This method gets the answer from the transcription service, calculates the
  86. * offset and adds is to every Word object. It will also start the merging
  87. * when every send request has been received
  88. *
  89. * note: Make sure to bind this as a Transcription object
  90. *
  91. * @param {RecordingResult} answer a RecordingResult object with a defined
  92. * WordArray
  93. */
  94. const blobCallBack = function(answer) {
  95. console.log(
  96. 'retrieved an answer from the transcription service. The answer has an'
  97. + ` array of length: ${answer.wordArray.length}`);
  98. // first add the offset between the start of the transcription and
  99. // the start of the recording to all start and end times
  100. if(answer.wordArray.length > 0) {
  101. let offset = answer.startTime.getUTCMilliseconds()
  102. - this.startTime.getUTCMilliseconds();
  103. // transcriber time will always be earlier
  104. if (offset < 0) {
  105. offset = 0; // presume 0 if it somehow not earlier
  106. }
  107. let array = '[';
  108. answer.wordArray.forEach(function(wordObject) {
  109. wordObject.begin += offset;
  110. wordObject.end += offset;
  111. array += `${wordObject.word},`;
  112. });
  113. array += ']';
  114. console.log(array);
  115. // give a name value to the Array object so that the merging can access
  116. // the name value without having to use the whole recordingResult object
  117. // in the algorithm
  118. answer.wordArray.name = answer.name;
  119. }
  120. // then store the array and decrease the counter
  121. this.results.push(answer.wordArray);
  122. this.counter--;
  123. console.log(`current counter: ${this.counter}`);
  124. // and check if all results have been received.
  125. this.maybeMerge();
  126. };
  127. /**
  128. * this method will check if the counter is zero. If it is, it will call
  129. * the merging method
  130. */
  131. transcriber.prototype.maybeMerge = function() {
  132. if(this.state === TRANSCRIBING_STATE && this.counter === 0) {
  133. // make sure to include the events in the result arrays before
  134. // merging starts
  135. this.merge();
  136. }
  137. };
  138. /**
  139. * This method will merge all speech-to-text arrays together in one
  140. * readable transcription string
  141. */
  142. transcriber.prototype.merge = function() {
  143. console.log(
  144. `starting merge process!\n The length of the array: ${
  145. this.results.length}`);
  146. this.transcription = '';
  147. // the merging algorithm will look over all Word objects who are at pos 0 in
  148. // every array. It will then select the one closest in time to the
  149. // previously placed word, while removing the selected word from its array
  150. // note: words can be skipped the skipped word's begin and end time somehow
  151. // end up between the closest word start and end time
  152. const arrays = this.results;
  153. // arrays of Word objects
  154. const potentialWords = []; // array of the first Word objects
  155. // check if any arrays are already empty and remove them
  156. hasPopulatedArrays(arrays);
  157. // populate all the potential Words for a first time
  158. arrays.forEach(function(array) {
  159. pushWordToSortedArray(potentialWords, array);
  160. });
  161. // keep adding words to transcription until all arrays are exhausted
  162. let lowestWordArray;
  163. let wordToAdd;
  164. let foundSmaller;
  165. while(hasPopulatedArrays(arrays)) {
  166. // first select the lowest array;
  167. lowestWordArray = arrays[0];
  168. arrays.forEach(function(wordArray) {
  169. if(wordArray[0].begin < lowestWordArray[0].begin) {
  170. lowestWordArray = wordArray;
  171. }
  172. });
  173. // put the word in the transcription
  174. wordToAdd = lowestWordArray.shift();
  175. this.updateTranscription(wordToAdd,lowestWordArray.name);
  176. // keep going until a word in another array has a smaller time
  177. // or the array is empty
  178. while(!foundSmaller && lowestWordArray.length > 0) {
  179. arrays.forEach(function(wordArray) {
  180. if(wordArray[0].begin < lowestWordArray[0].begin) {
  181. foundSmaller = true;
  182. }
  183. });
  184. // add next word if no smaller time has been found
  185. if(!foundSmaller) {
  186. wordToAdd = lowestWordArray.shift();
  187. this.updateTranscription(wordToAdd, null);
  188. }
  189. }
  190. }
  191. // set the state to finished and do the necessary left-over tasks
  192. this.state = FINISHED_STATE;
  193. if(this.callback) {
  194. this.callback(this.transcription);
  195. }
  196. };
  197. /**
  198. * Appends a word object to the transcription. It will make a new line with a
  199. * name if a name is specified
  200. * @param {Word} word the Word object holding the word to append
  201. * @param {String|null} name the name of a new speaker. Null if not applicable
  202. */
  203. transcriber.prototype.updateTranscription = function(word, name) {
  204. if(name !== undefined && name !== null) {
  205. this.transcription += `\n${name}:`;
  206. this.lineLength = name.length + 1; // +1 for the semi-colon
  207. }
  208. if(this.lineLength + word.word.length > MAXIMUM_SENTENCE_LENGTH) {
  209. this.transcription += '\n ';
  210. this.lineLength = 4; // because of the 4 spaces after the new line
  211. }
  212. this.transcription += ` ${word.word}`;
  213. this.lineLength += word.word.length + 1; // +1 for the space
  214. };
  215. /**
  216. * Check if the given 2 dimensional array has any non-zero Word-arrays in them.
  217. * All zero-element arrays inside will be removed
  218. * If any non-zero-element arrays are found, the method will return true.
  219. * otherwise it will return false
  220. * @param {Array<Array>} twoDimensionalArray the array to check
  221. * @returns {boolean} true if any non-zero arrays inside, otherwise false
  222. */
  223. const hasPopulatedArrays = function(twoDimensionalArray) {
  224. for(let i = 0; i < twoDimensionalArray.length; i++) {
  225. if(twoDimensionalArray[i].length === 0) {
  226. twoDimensionalArray.splice(i, 1);
  227. }
  228. }
  229. return twoDimensionalArray.length > 0;
  230. };
  231. /**
  232. * Push a word to the right location in a sorted array. The array is sorted
  233. * from lowest to highest start time. Every word is stored in an object which
  234. * includes the name of the person saying the word.
  235. *
  236. * @param {Array<Word>} array the sorted array to push to
  237. * @param {Word} word the word to push into the array
  238. */
  239. const pushWordToSortedArray = function(array, word) {
  240. if(array.length === 0) {
  241. array.push(word);
  242. } else{
  243. if(array[array.length - 1].begin <= word.begin) {
  244. array.push(word);
  245. return;
  246. }
  247. let i;
  248. for(i = 0; i < array.length; i++) {
  249. if(word.begin < array[i].begin) {
  250. array.splice(i, 0, word);
  251. return;
  252. }
  253. }
  254. array.push(word); // fail safe
  255. }
  256. };
  257. /**
  258. * Gives the transcriber a JitsiTrack holding an audioStream to transcribe.
  259. * The JitsiTrack is given to the audioRecorder. If it doesn't hold an
  260. * audiostream, it will not be added by the audioRecorder
  261. * @param {JitsiTrack} track the track to give to the audioRecorder
  262. */
  263. transcriber.prototype.addTrack = function(track) {
  264. this.audioRecorder.addTrack(track);
  265. };
  266. /**
  267. * Remove the given track from the auioRecorder
  268. * @param track
  269. */
  270. transcriber.prototype.removeTrack = function(track) {
  271. this.audioRecorder.removeTrack(track);
  272. };
  273. /**
  274. * Will return the created transcription if it's avialable or throw an error
  275. * when it's not done yet
  276. * @returns {String} the transcription as a String
  277. */
  278. transcriber.prototype.getTranscription = function() {
  279. if(this.state !== FINISHED_STATE) {
  280. throw new Error(
  281. `The transcription can only be retrieved when it's in the "${
  282. FINISHED_STATE}" state. It's currently in the "${
  283. this.state}" state`);
  284. }
  285. return this.transcription;
  286. };
  287. /**
  288. * Returns the current state of the transcription process
  289. */
  290. transcriber.prototype.getState = function() {
  291. return this.state;
  292. };
  293. /**
  294. * Resets the state to the "before" state, such that it's again possible to
  295. * call the start method
  296. */
  297. transcriber.prototype.reset = function() {
  298. this.state = BEFORE_STATE;
  299. this.counter = null;
  300. this.transcription = null;
  301. this.startTime = null;
  302. this.callback = null;
  303. this.results = [];
  304. this.lineLength = 0;
  305. };
  306. module.exports = transcriber;