|
@@ -0,0 +1,317 @@
|
|
1
|
+var AudioRecorder = require( './audioRecorder');
|
|
2
|
+var SphinxService = require(
|
|
3
|
+ './transcriptionServices/SphinxTranscriptionService');
|
|
4
|
+
|
|
5
|
+var BEFORE_STATE = "before";
|
|
6
|
+var RECORDING_STATE = "recording";
|
|
7
|
+var TRANSCRIBING_STATE = "transcribing";
|
|
8
|
+var FINISHED_STATE = "finished";
|
|
9
|
+
|
|
10
|
+//the amount of characters each line in the transcription will have
|
|
11
|
+var MAXIMUM_SENTENCE_LENGTH = 80;
|
|
12
|
+
|
|
13
|
+/**
|
|
14
|
+ * This is the main object for handing the Transcription. It interacts with
|
|
15
|
+ * the audioRecorder to record every person in a conference and sends the
|
|
16
|
+ * recorder audio to a transcriptionService. The returned speech-to-text result
|
|
17
|
+ * will be merged to create a transcript
|
|
18
|
+ * @param {AudioRecorder} audioRecorder An audioRecorder recording a conference
|
|
19
|
+ */
|
|
20
|
+var transcriber = function(audioRecorder) {
|
|
21
|
+ //the object which can record all audio in the conference
|
|
22
|
+ this.audioRecorder = audioRecorder;
|
|
23
|
+ //this object can send the recorder audio to a speech-to-text service
|
|
24
|
+ this.transcriptionService = new SphinxService();
|
|
25
|
+ //holds a counter to keep track if merging can start
|
|
26
|
+ this.counter = null;
|
|
27
|
+ //holds the date when transcription started which makes it possible
|
|
28
|
+ //to calculate the offset between recordings
|
|
29
|
+ this.startTime = null;
|
|
30
|
+ //will hold the transcription once it is completed
|
|
31
|
+ this.transcription = null;
|
|
32
|
+ //this will be a method which will be called once the transcription is done
|
|
33
|
+ //with the transcription as parameter
|
|
34
|
+ this.callback = null;
|
|
35
|
+ //stores all the retrieved speech-to-text results to merge together
|
|
36
|
+ //this value will store an Array<Word> object
|
|
37
|
+ this.results = [];
|
|
38
|
+ // Stores the current state of the transcription process
|
|
39
|
+ this.state = BEFORE_STATE;
|
|
40
|
+ //Used in the updateTranscription method to add a new line when the
|
|
41
|
+ //sentence becomes to long
|
|
42
|
+ this.lineLength = 0;
|
|
43
|
+};
|
|
44
|
+
|
|
45
|
+/**
|
|
46
|
+ * Method to start the transcription process. It will tell the audioRecorder
|
|
47
|
+ * to start storing all audio streams and record the start time for merging
|
|
48
|
+ * purposes
|
|
49
|
+ */
|
|
50
|
+transcriber.prototype.start = function start() {
|
|
51
|
+ if(this.state !== BEFORE_STATE){
|
|
52
|
+ throw new Error("The transcription can only start when it's in the" +
|
|
53
|
+ "\"" + BEFORE_STATE + "\" state. It's currently in the " +
|
|
54
|
+ "\"" + this.state + "\" state");
|
|
55
|
+ }
|
|
56
|
+ this.state = RECORDING_STATE;
|
|
57
|
+ this.audioRecorder.start();
|
|
58
|
+ this.startTime = new Date();
|
|
59
|
+};
|
|
60
|
+
|
|
61
|
+/**
|
|
62
|
+ * Method to stop the transcription process. It will tell the audioRecorder to
|
|
63
|
+ * stop, and get all the recorded audio to send it to the transcription service
|
|
64
|
+
|
|
65
|
+ * @param callback a callback which will receive the transcription
|
|
66
|
+ */
|
|
67
|
+transcriber.prototype.stop = function stop(callback) {
|
|
68
|
+ if(this.state !== RECORDING_STATE){
|
|
69
|
+ throw new Error("The transcription can only stop when it's in the" +
|
|
70
|
+ "\"" + RECORDING_STATE + "\" state. It's currently in the " +
|
|
71
|
+ "\"" + this.state + "\" state");
|
|
72
|
+ }
|
|
73
|
+ //stop the recording
|
|
74
|
+ console.log("stopping recording and sending audio files");
|
|
75
|
+ this.audioRecorder.stop();
|
|
76
|
+ //and send all recorded audio the the transcription service
|
|
77
|
+ var t = this;
|
|
78
|
+
|
|
79
|
+ var callBack = blobCallBack.bind(this);
|
|
80
|
+ this.audioRecorder.getRecordingResults().forEach(function(recordingResult){
|
|
81
|
+ t.transcriptionService.send(recordingResult, callBack);
|
|
82
|
+ t.counter++;
|
|
83
|
+ });
|
|
84
|
+ //set the state to "transcribing" so that maybeMerge() functions correctly
|
|
85
|
+ this.state = TRANSCRIBING_STATE;
|
|
86
|
+ //and store the callback for later
|
|
87
|
+ this.callback = callback;
|
|
88
|
+};
|
|
89
|
+
|
|
90
|
+/**
|
|
91
|
+ * This method gets the answer from the transcription service, calculates the
|
|
92
|
+ * offset and adds is to every Word object. It will also start the merging
|
|
93
|
+ * when every send request has been received
|
|
94
|
+ *
|
|
95
|
+ * note: Make sure to bind this as a Transcription object
|
|
96
|
+ *
|
|
97
|
+ * @param {RecordingResult} answer a RecordingResult object with a defined
|
|
98
|
+ * WordArray
|
|
99
|
+ */
|
|
100
|
+var blobCallBack = function(answer){
|
|
101
|
+ console.log("retrieved an answer from the transcription service. The" +
|
|
102
|
+ " answer has an array of length: " + answer.wordArray.length);
|
|
103
|
+ //first add the offset between the start of the transcription and
|
|
104
|
+ //the start of the recording to all start and end times
|
|
105
|
+ if(answer.wordArray.length > 0) {
|
|
106
|
+ var offset = answer.startTime.getUTCMilliseconds() -
|
|
107
|
+ this.startTime.getUTCMilliseconds();
|
|
108
|
+ //transcriber time will always be earlier
|
|
109
|
+ if (offset < 0) {
|
|
110
|
+ offset = 0; //presume 0 if it somehow not earlier
|
|
111
|
+ }
|
|
112
|
+
|
|
113
|
+ var array = "[";
|
|
114
|
+ answer.wordArray.forEach(function(wordObject) {
|
|
115
|
+ wordObject.begin += offset;
|
|
116
|
+ wordObject.end += offset;
|
|
117
|
+ array += wordObject.word+",";
|
|
118
|
+ });
|
|
119
|
+ array += "]";
|
|
120
|
+ console.log(array);
|
|
121
|
+ //give a name value to the Array object so that the merging can access
|
|
122
|
+ //the name value without having to use the whole recordingResult object
|
|
123
|
+ //in the algorithm
|
|
124
|
+ answer.wordArray.name = answer.name;
|
|
125
|
+ }
|
|
126
|
+ //then store the array and decrease the counter
|
|
127
|
+ this.results.push(answer.wordArray);
|
|
128
|
+ this.counter--;
|
|
129
|
+ console.log("current counter: " + this.counter);
|
|
130
|
+ //and check if all results have been received.
|
|
131
|
+ this.maybeMerge();
|
|
132
|
+};
|
|
133
|
+
|
|
134
|
+/**
|
|
135
|
+ * this method will check if the counter is zero. If it is, it will call
|
|
136
|
+ * the merging method
|
|
137
|
+ */
|
|
138
|
+transcriber.prototype.maybeMerge = function(){
|
|
139
|
+ if(this.state === TRANSCRIBING_STATE && this.counter === 0){
|
|
140
|
+ //make sure to include the events in the result arrays before
|
|
141
|
+ //merging starts
|
|
142
|
+ this.merge();
|
|
143
|
+ }
|
|
144
|
+};
|
|
145
|
+
|
|
146
|
+/**
|
|
147
|
+ * This method will merge all speech-to-text arrays together in one
|
|
148
|
+ * readable transcription string
|
|
149
|
+ */
|
|
150
|
+transcriber.prototype.merge = function() {
|
|
151
|
+ console.log("starting merge process!\n The length of the array: " +
|
|
152
|
+ this.results.length);
|
|
153
|
+ this.transcription = "";
|
|
154
|
+ //the merging algorithm will look over all Word objects who are at pos 0 in
|
|
155
|
+ //every array. It will then select the one closest in time to the
|
|
156
|
+ //previously placed word, while removing the selected word from its array
|
|
157
|
+ //note: words can be skipped the skipped word's begin and end time somehow
|
|
158
|
+ //end up between the closest word start and end time
|
|
159
|
+ var arrays = this.results;
|
|
160
|
+ //arrays of Word objects
|
|
161
|
+ var potentialWords = []; //array of the first Word objects
|
|
162
|
+ //check if any arrays are already empty and remove them
|
|
163
|
+ hasPopulatedArrays(arrays);
|
|
164
|
+
|
|
165
|
+ //populate all the potential Words for a first time
|
|
166
|
+ arrays.forEach(function (array){
|
|
167
|
+ pushWordToSortedArray(potentialWords, array);
|
|
168
|
+ });
|
|
169
|
+
|
|
170
|
+ //keep adding words to transcription until all arrays are exhausted
|
|
171
|
+ var lowestWordArray;
|
|
172
|
+ var wordToAdd;
|
|
173
|
+ var foundSmaller;
|
|
174
|
+ while(hasPopulatedArrays(arrays)){
|
|
175
|
+ //first select the lowest array;
|
|
176
|
+ lowestWordArray = arrays[0];
|
|
177
|
+ arrays.forEach(function(wordArray){
|
|
178
|
+ if(wordArray[0].begin < lowestWordArray[0].begin){
|
|
179
|
+ lowestWordArray = wordArray;
|
|
180
|
+ }
|
|
181
|
+ });
|
|
182
|
+ //put the word in the transcription
|
|
183
|
+ wordToAdd = lowestWordArray.shift();
|
|
184
|
+ this.updateTranscription(wordToAdd,lowestWordArray.name);
|
|
185
|
+
|
|
186
|
+ //keep going until a word in another array has a smaller time
|
|
187
|
+ //or the array is empty
|
|
188
|
+ while(!foundSmaller && lowestWordArray.length > 0){
|
|
189
|
+ arrays.forEach(function(wordArray){
|
|
190
|
+ if(wordArray[0].begin < lowestWordArray[0].begin){
|
|
191
|
+ foundSmaller = true;
|
|
192
|
+ }
|
|
193
|
+ });
|
|
194
|
+ //add next word if no smaller time has been found
|
|
195
|
+ if(!foundSmaller){
|
|
196
|
+ wordToAdd = lowestWordArray.shift();
|
|
197
|
+ this.updateTranscription(wordToAdd, null);
|
|
198
|
+ }
|
|
199
|
+ }
|
|
200
|
+
|
|
201
|
+ }
|
|
202
|
+
|
|
203
|
+ //set the state to finished and do the necessary left-over tasks
|
|
204
|
+ this.state = FINISHED_STATE;
|
|
205
|
+ if(this.callback){
|
|
206
|
+ this.callback(this.transcription);
|
|
207
|
+ }
|
|
208
|
+};
|
|
209
|
+
|
|
210
|
+/**
|
|
211
|
+ * Appends a word object to the transcription. It will make a new line with a
|
|
212
|
+ * name if a name is specified
|
|
213
|
+ * @param {Word} word the Word object holding the word to append
|
|
214
|
+ * @param {String|null} name the name of a new speaker. Null if not applicable
|
|
215
|
+ */
|
|
216
|
+transcriber.prototype.updateTranscription = function(word, name){
|
|
217
|
+ if(name !== undefined && name !== null){
|
|
218
|
+ this.transcription += "\n" + name + ":";
|
|
219
|
+ this.lineLength = name.length + 1; //+1 for the semi-colon
|
|
220
|
+ }
|
|
221
|
+ if(this.lineLength + word.word.length > MAXIMUM_SENTENCE_LENGTH){
|
|
222
|
+ this.transcription += "\n ";
|
|
223
|
+ this.lineLength = 4; //because of the 4 spaces after the new line
|
|
224
|
+ }
|
|
225
|
+ this.transcription += " " + word.word;
|
|
226
|
+ this.lineLength += word.word.length + 1; //+1 for the space
|
|
227
|
+};
|
|
228
|
+
|
|
229
|
+/**
|
|
230
|
+ * Check if the given 2 dimensional array has any non-zero Word-arrays in them.
|
|
231
|
+ * All zero-element arrays inside will be removed
|
|
232
|
+ * If any non-zero-element arrays are found, the method will return true.
|
|
233
|
+ * otherwise it will return false
|
|
234
|
+ * @param {Array<Array>} twoDimensionalArray the array to check
|
|
235
|
+ * @returns {boolean} true if any non-zero arrays inside, otherwise false
|
|
236
|
+ */
|
|
237
|
+var hasPopulatedArrays = function(twoDimensionalArray){
|
|
238
|
+ var i;
|
|
239
|
+ for(i = 0; i < twoDimensionalArray.length; i++){
|
|
240
|
+ if(twoDimensionalArray[i].length === 0){
|
|
241
|
+ twoDimensionalArray.splice(i, 1);
|
|
242
|
+ }
|
|
243
|
+ }
|
|
244
|
+ return twoDimensionalArray.length > 0;
|
|
245
|
+};
|
|
246
|
+
|
|
247
|
+/**
|
|
248
|
+ * Push a word to the right location in a sorted array. The array is sorted
|
|
249
|
+ * from lowest to highest start time. Every word is stored in an object which
|
|
250
|
+ * includes the name of the person saying the word.
|
|
251
|
+ *
|
|
252
|
+ * @param {Array<Word>} array the sorted array to push to
|
|
253
|
+ * @param {Word} word the word to push into the array
|
|
254
|
+ */
|
|
255
|
+var pushWordToSortedArray = function(array, word){
|
|
256
|
+ if(array.length === 0) {
|
|
257
|
+ array.push(word);
|
|
258
|
+ }
|
|
259
|
+ else{
|
|
260
|
+ if(array[array.length - 1].begin <= word.begin){
|
|
261
|
+ array.push(word);
|
|
262
|
+ return;
|
|
263
|
+ }
|
|
264
|
+ var i;
|
|
265
|
+ for(i = 0; i < array.length; i++){
|
|
266
|
+ if(word.begin < array[i].begin){
|
|
267
|
+ array.splice(i, 0, word);
|
|
268
|
+ return;
|
|
269
|
+ }
|
|
270
|
+ }
|
|
271
|
+ array.push(word); //fail safe
|
|
272
|
+ }
|
|
273
|
+};
|
|
274
|
+
|
|
275
|
+/**
|
|
276
|
+ * Returns the AudioRecorder module to add and remove tracks to
|
|
277
|
+ */
|
|
278
|
+transcriber.getAudioRecorder = function getAudioRecorder() {
|
|
279
|
+ return this.audioRecorder;
|
|
280
|
+};
|
|
281
|
+
|
|
282
|
+/**
|
|
283
|
+ * Will return the created transcription if it's avialable or throw an error
|
|
284
|
+ * when it's not done yet
|
|
285
|
+ * @returns {String} the transcription as a String
|
|
286
|
+ */
|
|
287
|
+transcriber.prototype.getTranscription = function(){
|
|
288
|
+ if(this.state !== FINISHED_STATE){
|
|
289
|
+ throw new Error("The transcription can only be retrieved when it's in" +
|
|
290
|
+ " the\"" + FINISHED_STATE + "\" state. It's currently in the " +
|
|
291
|
+ "\"" + this.state + "\" state");
|
|
292
|
+ }
|
|
293
|
+ return this.transcription;
|
|
294
|
+};
|
|
295
|
+
|
|
296
|
+/**
|
|
297
|
+ * Returns the current state of the transcription process
|
|
298
|
+ */
|
|
299
|
+transcriber.prototype.getState = function(){
|
|
300
|
+ return this.state;
|
|
301
|
+};
|
|
302
|
+
|
|
303
|
+/**
|
|
304
|
+ * Resets the state to the "before" state, such that it's again possible to
|
|
305
|
+ * call the start method
|
|
306
|
+ */
|
|
307
|
+transcriber.prototype.reset = function() {
|
|
308
|
+ this.state = BEFORE_STATE;
|
|
309
|
+ this.counter = null;
|
|
310
|
+ this.transcription = null;
|
|
311
|
+ this.startTime = null;
|
|
312
|
+ this.callback = null;
|
|
313
|
+ this.results = [];
|
|
314
|
+ this.lineLength = 0;
|
|
315
|
+};
|
|
316
|
+
|
|
317
|
+module.exports = transcriber;
|