jfinn
/
ljm_a0


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
							import AudioRecorder from './audioRecorder';
import SphinxService from './transcriptionServices/SphinxTranscriptionService';

const BEFORE_STATE = 'before';
const RECORDING_STATE = 'recording';
const TRANSCRIBING_STATE = 'transcribing';
const FINISHED_STATE = 'finished';

// the amount of characters each line in the transcription will have
const MAXIMUM_SENTENCE_LENGTH = 80;

/**
 * This is the main object for handing the Transcription. It interacts with
 * the audioRecorder to record every person in a conference and sends the
 * recorder audio to a transcriptionService. The returned speech-to-text result
 * will be merged to create a transcript
 * @param {AudioRecorder} audioRecorder An audioRecorder recording a conference
 */
function Transcriber() {
    // the object which can record all audio in the conference
    this.audioRecorder = new AudioRecorder();

    // this object can send the recorder audio to a speech-to-text service
    this.transcriptionService = new SphinxService();

    // holds a counter to keep track if merging can start
    this.counter = null;

    // holds the date when transcription started which makes it possible
    // to calculate the offset between recordings
    this.startTime = null;

    // will hold the transcription once it is completed
    this.transcription = null;

    // this will be a method which will be called once the transcription is done
    // with the transcription as parameter
    this.callback = null;

    // stores all the retrieved speech-to-text results to merge together
    // this value will store an Array<Word> object
    this.results = [];

    // Stores the current state of the transcription process
    this.state = BEFORE_STATE;

    // Used in the updateTranscription method to add a new line when the
    // sentence becomes to long
    this.lineLength = 0;
}

/**
 * Method to start the transcription process. It will tell the audioRecorder
 * to start storing all audio streams and record the start time for merging
 * purposes
 */
Transcriber.prototype.start = function start() {
    if (this.state !== BEFORE_STATE) {
        throw new Error(
            `The transcription can only start when it's in the "${
                BEFORE_STATE}" state. It's currently in the "${
                this.state}" state`);
    }
    this.state = RECORDING_STATE;
    this.audioRecorder.start();
    this.startTime = new Date();
};

/**
 * Method to stop the transcription process. It will tell the audioRecorder to
 * stop, and get all the recorded audio to send it to the transcription service

 * @param callback a callback which will receive the transcription
 */
Transcriber.prototype.stop = function stop(callback) {
    if (this.state !== RECORDING_STATE) {
        throw new Error(
            `The transcription can only stop when it's in the "${
                RECORDING_STATE}" state. It's currently in the "${
                this.state}" state`);
    }

    // stop the recording
    console.log('stopping recording and sending audio files');
    this.audioRecorder.stop();

    // and send all recorded audio to the transcription service
    const callBack = blobCallBack.bind(null, this);

    this.audioRecorder.getRecordingResults().forEach(recordingResult => {
        this.transcriptionService.send(recordingResult, callBack);
        this.counter++;
    });

    // set the state to "transcribing" so that maybeMerge() functions correctly
    this.state = TRANSCRIBING_STATE;

    // and store the callback for later
    this.callback = callback;
};

/**
 * This method gets the answer from the transcription service, calculates the
 * offset and adds is to every Word object. It will also start the merging
 * when every send request has been received
 *
 * note: Make sure to bind this as a Transcription object
 * @param {Transcriber} transcriber the transcriber instance
 * @param {RecordingResult} answer a RecordingResult object with a defined
 * WordArray
 */
function blobCallBack(transcriber, answer) {
    console.log(
        'retrieved an answer from the transcription service. The answer has an'
            + ` array of length: ${answer.wordArray.length}`);

    // first add the offset between the start of the transcription and
    // the start of the recording to all start and end times
    if (answer.wordArray.length > 0) {
        let offset = answer.startTime.getUTCMilliseconds()
            - transcriber.startTime.getUTCMilliseconds();

        // transcriber time will always be earlier

        if (offset < 0) {
            offset = 0; // presume 0 if it somehow not earlier
        }

        let array = '[';

        answer.wordArray.forEach(wordObject => {
            wordObject.begin += offset;
            wordObject.end += offset;
            array += `${wordObject.word},`;
        });
        array += ']';
        console.log(array);

        // give a name value to the Array object so that the merging can access
        // the name value without having to use the whole recordingResult object
        // in the algorithm
        answer.wordArray.name = answer.name;
    }

    // then store the array and decrease the counter
    transcriber.results.push(answer.wordArray);
    transcriber.counter--;
    console.log(`current counter: ${transcriber.counter}`);

    // and check if all results have been received.
    transcriber.maybeMerge();
}

/**
 * this method will check if the counter is zero. If it is, it will call
 * the merging method
 */
Transcriber.prototype.maybeMerge = function() {
    if (this.state === TRANSCRIBING_STATE && this.counter === 0) {
        // make sure to include the events in the result arrays before
        // merging starts
        this.merge();
    }
};

/**
 * This method will merge all speech-to-text arrays together in one
 * readable transcription string
 */
Transcriber.prototype.merge = function() {
    console.log(
        `starting merge process!\n The length of the array: ${
            this.results.length}`);
    this.transcription = '';

    // the merging algorithm will look over all Word objects who are at pos 0 in
    // every array. It will then select the one closest in time to the
    // previously placed word, while removing the selected word from its array
    // note: words can be skipped the skipped word's begin and end time somehow
    // end up between the closest word start and end time
    const arrays = this.results;

    // arrays of Word objects
    const potentialWords = []; // array of the first Word objects
    // check if any arrays are already empty and remove them

    hasPopulatedArrays(arrays);

    // populate all the potential Words for a first time
    arrays.forEach(array => pushWordToSortedArray(potentialWords, array));

    // keep adding words to transcription until all arrays are exhausted
    while (hasPopulatedArrays(arrays)) {
        // first select the lowest array;
        let lowestWordArray = arrays[0];

        arrays.forEach(wordArray => {
            if (wordArray[0].begin < lowestWordArray[0].begin) {
                lowestWordArray = wordArray;
            }
        });

        // put the word in the transcription
        let wordToAdd = lowestWordArray.shift();

        this.updateTranscription(wordToAdd, lowestWordArray.name);

        // keep going until a word in another array has a smaller time
        // or the array is empty
        while (lowestWordArray.length > 0) {
            let foundSmaller = false;
            const wordToCompare = lowestWordArray[0].begin;

            arrays.forEach(wordArray => {
                if (wordArray[0].begin < wordToCompare) {
                    foundSmaller = true;
                }
            });

            // add next word if no smaller time has been found
            if (foundSmaller) {
                break;
            }

            wordToAdd = lowestWordArray.shift();
            this.updateTranscription(wordToAdd, null);
        }

    }

    // set the state to finished and do the necessary left-over tasks
    this.state = FINISHED_STATE;
    if (this.callback) {
        this.callback(this.transcription);
    }
};

/**
 * Appends a word object to the transcription. It will make a new line with a
 * name if a name is specified
 * @param {Word} word the Word object holding the word to append
 * @param {String|null} name the name of a new speaker. Null if not applicable
 */
Transcriber.prototype.updateTranscription = function(word, name) {
    if (name !== undefined && name !== null) {
        this.transcription += `\n${name}:`;
        this.lineLength = name.length + 1; // +1 for the semi-colon
    }
    if (this.lineLength + word.word.length > MAXIMUM_SENTENCE_LENGTH) {
        this.transcription += '\n    ';
        this.lineLength = 4; // because of the 4 spaces after the new line
    }
    this.transcription += ` ${word.word}`;
    this.lineLength += word.word.length + 1; // +1 for the space
};

/**
 * Check if the given 2 dimensional array has any non-zero Word-arrays in them.
 * All zero-element arrays inside will be removed
 * If any non-zero-element arrays are found, the method will return true.
 * otherwise it will return false
 * @param {Array<Array>} twoDimensionalArray the array to check
 * @returns {boolean} true if any non-zero arrays inside, otherwise false
 */
function hasPopulatedArrays(twoDimensionalArray) {
    for (let i = 0; i < twoDimensionalArray.length; i++) {
        if (twoDimensionalArray[i].length === 0) {
            twoDimensionalArray.splice(i, 1);
        }
    }

    return twoDimensionalArray.length > 0;
}

/**
 * Push a word to the right location in a sorted array. The array is sorted
 * from lowest to highest start time. Every word is stored in an object which
 * includes the name of the person saying the word.
 *
 * @param {Array<Word>} array the sorted array to push to
 * @param {Word} word the word to push into the array
 */
function pushWordToSortedArray(array, word) {
    if (array.length === 0) {
        array.push(word);
    } else {
        if (array[array.length - 1].begin <= word.begin) {
            array.push(word);

            return;
        }

        for (let i = 0; i < array.length; i++) {
            if (word.begin < array[i].begin) {
                array.splice(i, 0, word);

                return;
            }
        }
        array.push(word); // fail safe
    }
}

/**
 * Gives the transcriber a JitsiTrack holding an audioStream to transcribe.
 * The JitsiTrack is given to the audioRecorder. If it doesn't hold an
 * audiostream, it will not be added by the audioRecorder
 * @param {JitsiTrack} track the track to give to the audioRecorder
 */
Transcriber.prototype.addTrack = function(track) {
    this.audioRecorder.addTrack(track);
};

/**
 * Remove the given track from the auioRecorder
 * @param track
 */
Transcriber.prototype.removeTrack = function(track) {
    this.audioRecorder.removeTrack(track);
};

/**
 * Will return the created transcription if it's avialable or throw an error
 * when it's not done yet
 * @returns {String} the transcription as a String
 */
Transcriber.prototype.getTranscription = function() {
    if (this.state !== FINISHED_STATE) {
        throw new Error(
            `The transcription can only be retrieved when it's in the "${
                FINISHED_STATE}" state. It's currently in the "${
                this.state}" state`);
    }

    return this.transcription;
};

/**
 * Returns the current state of the transcription process
 */
Transcriber.prototype.getState = function() {
    return this.state;
};

/**
 * Resets the state to the "before" state, such that it's again possible to
 * call the start method
 */
Transcriber.prototype.reset = function() {
    this.state = BEFORE_STATE;
    this.counter = null;
    this.transcription = null;
    this.startTime = null;
    this.callback = null;
    this.results = [];
    this.lineLength = 0;
};

export default Transcriber;