您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

transcriber.js 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. var AudioRecorder = require( './audioRecorder');
  2. var SphinxService = require(
  3. './transcriptionServices/SphinxTranscriptionService');
  4. var BEFORE_STATE = "before";
  5. var RECORDING_STATE = "recording";
  6. var TRANSCRIBING_STATE = "transcribing";
  7. var FINISHED_STATE = "finished";
  8. //the amount of characters each line in the transcription will have
  9. var MAXIMUM_SENTENCE_LENGTH = 80;
  10. /**
  11. * This is the main object for handing the Transcription. It interacts with
  12. * the audioRecorder to record every person in a conference and sends the
  13. * recorder audio to a transcriptionService. The returned speech-to-text result
  14. * will be merged to create a transcript
  15. * @param {AudioRecorder} audioRecorder An audioRecorder recording a conference
  16. */
  17. var transcriber = function() {
  18. //the object which can record all audio in the conference
  19. this.audioRecorder = new AudioRecorder();
  20. //this object can send the recorder audio to a speech-to-text service
  21. this.transcriptionService = new SphinxService();
  22. //holds a counter to keep track if merging can start
  23. this.counter = null;
  24. //holds the date when transcription started which makes it possible
  25. //to calculate the offset between recordings
  26. this.startTime = null;
  27. //will hold the transcription once it is completed
  28. this.transcription = null;
  29. //this will be a method which will be called once the transcription is done
  30. //with the transcription as parameter
  31. this.callback = null;
  32. //stores all the retrieved speech-to-text results to merge together
  33. //this value will store an Array<Word> object
  34. this.results = [];
  35. // Stores the current state of the transcription process
  36. this.state = BEFORE_STATE;
  37. //Used in the updateTranscription method to add a new line when the
  38. //sentence becomes to long
  39. this.lineLength = 0;
  40. };
  41. /**
  42. * Method to start the transcription process. It will tell the audioRecorder
  43. * to start storing all audio streams and record the start time for merging
  44. * purposes
  45. */
  46. transcriber.prototype.start = function start() {
  47. if(this.state !== BEFORE_STATE){
  48. throw new Error("The transcription can only start when it's in the" +
  49. "\"" + BEFORE_STATE + "\" state. It's currently in the " +
  50. "\"" + this.state + "\" state");
  51. }
  52. this.state = RECORDING_STATE;
  53. this.audioRecorder.start();
  54. this.startTime = new Date();
  55. };
  56. /**
  57. * Method to stop the transcription process. It will tell the audioRecorder to
  58. * stop, and get all the recorded audio to send it to the transcription service
  59. * @param callback a callback which will receive the transcription
  60. */
  61. transcriber.prototype.stop = function stop(callback) {
  62. if(this.state !== RECORDING_STATE){
  63. throw new Error("The transcription can only stop when it's in the" +
  64. "\"" + RECORDING_STATE + "\" state. It's currently in the " +
  65. "\"" + this.state + "\" state");
  66. }
  67. //stop the recording
  68. console.log("stopping recording and sending audio files");
  69. this.audioRecorder.stop();
  70. //and send all recorded audio the the transcription service
  71. var t = this;
  72. var callBack = blobCallBack.bind(this);
  73. this.audioRecorder.getRecordingResults().forEach(function(recordingResult){
  74. t.transcriptionService.send(recordingResult, callBack);
  75. t.counter++;
  76. });
  77. //set the state to "transcribing" so that maybeMerge() functions correctly
  78. this.state = TRANSCRIBING_STATE;
  79. //and store the callback for later
  80. this.callback = callback;
  81. };
  82. /**
  83. * This method gets the answer from the transcription service, calculates the
  84. * offset and adds is to every Word object. It will also start the merging
  85. * when every send request has been received
  86. *
  87. * note: Make sure to bind this as a Transcription object
  88. *
  89. * @param {RecordingResult} answer a RecordingResult object with a defined
  90. * WordArray
  91. */
  92. var blobCallBack = function(answer){
  93. console.log("retrieved an answer from the transcription service. The" +
  94. " answer has an array of length: " + answer.wordArray.length);
  95. //first add the offset between the start of the transcription and
  96. //the start of the recording to all start and end times
  97. if(answer.wordArray.length > 0) {
  98. var offset = answer.startTime.getUTCMilliseconds() -
  99. this.startTime.getUTCMilliseconds();
  100. //transcriber time will always be earlier
  101. if (offset < 0) {
  102. offset = 0; //presume 0 if it somehow not earlier
  103. }
  104. var array = "[";
  105. answer.wordArray.forEach(function(wordObject) {
  106. wordObject.begin += offset;
  107. wordObject.end += offset;
  108. array += wordObject.word+",";
  109. });
  110. array += "]";
  111. console.log(array);
  112. //give a name value to the Array object so that the merging can access
  113. //the name value without having to use the whole recordingResult object
  114. //in the algorithm
  115. answer.wordArray.name = answer.name;
  116. }
  117. //then store the array and decrease the counter
  118. this.results.push(answer.wordArray);
  119. this.counter--;
  120. console.log("current counter: " + this.counter);
  121. //and check if all results have been received.
  122. this.maybeMerge();
  123. };
  124. /**
  125. * this method will check if the counter is zero. If it is, it will call
  126. * the merging method
  127. */
  128. transcriber.prototype.maybeMerge = function(){
  129. if(this.state === TRANSCRIBING_STATE && this.counter === 0){
  130. //make sure to include the events in the result arrays before
  131. //merging starts
  132. this.merge();
  133. }
  134. };
  135. /**
  136. * This method will merge all speech-to-text arrays together in one
  137. * readable transcription string
  138. */
  139. transcriber.prototype.merge = function() {
  140. console.log("starting merge process!\n The length of the array: " +
  141. this.results.length);
  142. this.transcription = "";
  143. //the merging algorithm will look over all Word objects who are at pos 0 in
  144. //every array. It will then select the one closest in time to the
  145. //previously placed word, while removing the selected word from its array
  146. //note: words can be skipped the skipped word's begin and end time somehow
  147. //end up between the closest word start and end time
  148. var arrays = this.results;
  149. //arrays of Word objects
  150. var potentialWords = []; //array of the first Word objects
  151. //check if any arrays are already empty and remove them
  152. hasPopulatedArrays(arrays);
  153. //populate all the potential Words for a first time
  154. arrays.forEach(function (array){
  155. pushWordToSortedArray(potentialWords, array);
  156. });
  157. //keep adding words to transcription until all arrays are exhausted
  158. var lowestWordArray;
  159. var wordToAdd;
  160. var foundSmaller;
  161. while(hasPopulatedArrays(arrays)){
  162. //first select the lowest array;
  163. lowestWordArray = arrays[0];
  164. arrays.forEach(function(wordArray){
  165. if(wordArray[0].begin < lowestWordArray[0].begin){
  166. lowestWordArray = wordArray;
  167. }
  168. });
  169. //put the word in the transcription
  170. wordToAdd = lowestWordArray.shift();
  171. this.updateTranscription(wordToAdd,lowestWordArray.name);
  172. //keep going until a word in another array has a smaller time
  173. //or the array is empty
  174. while(!foundSmaller && lowestWordArray.length > 0){
  175. arrays.forEach(function(wordArray){
  176. if(wordArray[0].begin < lowestWordArray[0].begin){
  177. foundSmaller = true;
  178. }
  179. });
  180. //add next word if no smaller time has been found
  181. if(!foundSmaller){
  182. wordToAdd = lowestWordArray.shift();
  183. this.updateTranscription(wordToAdd, null);
  184. }
  185. }
  186. }
  187. //set the state to finished and do the necessary left-over tasks
  188. this.state = FINISHED_STATE;
  189. if(this.callback){
  190. this.callback(this.transcription);
  191. }
  192. };
  193. /**
  194. * Appends a word object to the transcription. It will make a new line with a
  195. * name if a name is specified
  196. * @param {Word} word the Word object holding the word to append
  197. * @param {String|null} name the name of a new speaker. Null if not applicable
  198. */
  199. transcriber.prototype.updateTranscription = function(word, name){
  200. if(name !== undefined && name !== null){
  201. this.transcription += "\n" + name + ":";
  202. this.lineLength = name.length + 1; //+1 for the semi-colon
  203. }
  204. if(this.lineLength + word.word.length > MAXIMUM_SENTENCE_LENGTH){
  205. this.transcription += "\n ";
  206. this.lineLength = 4; //because of the 4 spaces after the new line
  207. }
  208. this.transcription += " " + word.word;
  209. this.lineLength += word.word.length + 1; //+1 for the space
  210. };
  211. /**
  212. * Check if the given 2 dimensional array has any non-zero Word-arrays in them.
  213. * All zero-element arrays inside will be removed
  214. * If any non-zero-element arrays are found, the method will return true.
  215. * otherwise it will return false
  216. * @param {Array<Array>} twoDimensionalArray the array to check
  217. * @returns {boolean} true if any non-zero arrays inside, otherwise false
  218. */
  219. var hasPopulatedArrays = function(twoDimensionalArray){
  220. var i;
  221. for(i = 0; i < twoDimensionalArray.length; i++){
  222. if(twoDimensionalArray[i].length === 0){
  223. twoDimensionalArray.splice(i, 1);
  224. }
  225. }
  226. return twoDimensionalArray.length > 0;
  227. };
  228. /**
  229. * Push a word to the right location in a sorted array. The array is sorted
  230. * from lowest to highest start time. Every word is stored in an object which
  231. * includes the name of the person saying the word.
  232. *
  233. * @param {Array<Word>} array the sorted array to push to
  234. * @param {Word} word the word to push into the array
  235. */
  236. var pushWordToSortedArray = function(array, word){
  237. if(array.length === 0) {
  238. array.push(word);
  239. }
  240. else{
  241. if(array[array.length - 1].begin <= word.begin){
  242. array.push(word);
  243. return;
  244. }
  245. var i;
  246. for(i = 0; i < array.length; i++){
  247. if(word.begin < array[i].begin){
  248. array.splice(i, 0, word);
  249. return;
  250. }
  251. }
  252. array.push(word); //fail safe
  253. }
  254. };
  255. /**
  256. * Gives the transcriber a JitsiTrack holding an audioStream to transcribe.
  257. * The JitsiTrack is given to the audioRecorder. If it doesn't hold an
  258. * audiostream, it will not be added by the audioRecorder
  259. * @param {JitsiTrack} track the track to give to the audioRecorder
  260. */
  261. transcriber.prototype.addTrack = function(track){
  262. this.audioRecorder.addTrack(track);
  263. };
  264. /**
  265. * Remove the given track from the auioRecorder
  266. * @param track
  267. */
  268. transcriber.prototype.removeTrack = function(track){
  269. this.audioRecorder.removeTrack(track);
  270. };
  271. /**
  272. * Will return the created transcription if it's avialable or throw an error
  273. * when it's not done yet
  274. * @returns {String} the transcription as a String
  275. */
  276. transcriber.prototype.getTranscription = function(){
  277. if(this.state !== FINISHED_STATE){
  278. throw new Error("The transcription can only be retrieved when it's in" +
  279. " the\"" + FINISHED_STATE + "\" state. It's currently in the " +
  280. "\"" + this.state + "\" state");
  281. }
  282. return this.transcription;
  283. };
  284. /**
  285. * Returns the current state of the transcription process
  286. */
  287. transcriber.prototype.getState = function(){
  288. return this.state;
  289. };
  290. /**
  291. * Resets the state to the "before" state, such that it's again possible to
  292. * call the start method
  293. */
  294. transcriber.prototype.reset = function() {
  295. this.state = BEFORE_STATE;
  296. this.counter = null;
  297. this.transcription = null;
  298. this.startTime = null;
  299. this.callback = null;
  300. this.results = [];
  301. this.lineLength = 0;
  302. };
  303. module.exports = transcriber;