import dayjs from 'dayjs';
import duration from 'dayjs/plugin/duration';
import { EventEmitter } from 'events';
import deepcopy from 'deepcopy';
import { getChangedRangeFromDelta } from 'libs/quill-utils';
import { isNoise } from 'libs/is-noise';
import { StringAligner } from './StringAligner/stringaligner';
import { getAllMatchIndices, TIME_ANCHOR_SYMBOL, CAPTION_END_SYMBOL, } from './text-utils';
import { BEGIN, END } from './text-metadata';
dayjs.extend(duration);
export const ALIGNMENT_TIME_LIMIT = 10;
export const ALIGNER_EVENTS = Object.freeze({
    ALIGNED: Symbol('aligned'),
});
const SEGMENT_END_REGEX = RegExp(`[\\n${CAPTION_END_SYMBOL}]`, 'g');
const WORD_BOUNDARY_REGEX = new RegExp(`[\u0020\u00a0${TIME_ANCHOR_SYMBOL}]`, 'g');
const MARGIN_BEFORE = 150;
const MARGIN_AFTER = 150;
/*
   DUMMY_WORD_LENGTH needs to be a little faster than actual average word length,
   so that caption length is based on automatic caption speed and not on the last
   dummy word position.
*/
const DUMMY_WORD_LENGTH = 0.250;
const INSERTION_PENALTY = 1;
const DELETION_PENALTY = 1;
const SUBSTITUTION_PENALTY = 1.5;
const INSERT_BETWEEEN_PARAGRAPHS_PENALTY = 0.9;
const CHUNK_SIZE = 1500;
const clamp = (num, min, max) => Math.min(Math.max(num, min), max);
/* handles finding optimal timestamps for changed parts of the document.
   Utilizes the seqalign library to match the document with the original machine
   transcription and transfer the timestamps.
*/
export class DocumentAlignment {
    constructor(editorController) {
        // if the transcription ends with noises, this will allow aligner to
        // align words added by user to the whole length of the recording.
        this.finalizePhrases = (recordingDuration) => {
            this.loadPhrase('_ ', recordingDuration, recordingDuration);
        };
        this.editorController = editorController;
        this.stringAligner = new StringAligner([], [], INSERTION_PENALTY, DELETION_PENALTY, SUBSTITUTION_PENALTY, INSERT_BETWEEEN_PARAGRAPHS_PENALTY, CHUNK_SIZE);
        this.lastWordEnd = 0;
        this.lastPhraseEnd = 0;
        this.emitter = new EventEmitter();
    }
    addEventListener(event, listener) {
        this.emitter.on(event, listener);
    }
    alignAll() {
        const from = 0;
        const to = this.editorController.getLength();
        try {
            this.alignRange(from, to, 0, Infinity);
        }
        catch (error) {
            this.editorController.execTextChange({ runAligner: false, requestSave: false }, () => {
                this.highlightMissingTimestamps(from, to);
            });
            throw error;
        }
        this.editorController.execTextChange({ runAligner: false, requestSave: false }, () => {
            this.highlightMissingTimestamps(from, to);
        });
    }
    /* update the timestamps of the document. Selects the range of
     * the document that needs to be aligned, finds the optimal
     * timestamps and sets them to the document.
     */
    alignOnDelta(changeDelta) {
        const range = this.getAlignmentRange(changeDelta);
        if (range === null) {
            return;
        }
        const [from, to] = range;
        this.alignRangeWithExpanding(from, to);
    }
    /* aligns the range indicated by requestedFrom and requestedTo. The range
     * that will actually be aligned may be larger to ensure that the new timestamps
     * are not flawed.
     */
    alignRangeWithExpanding(requestedFrom, requestedTo, recursionDepth = 0) {
        if (recursionDepth >= 20) {
            // prevents infinite cycle. If we cannot align locally cleanly, align all.
            global.logger.info('could not find clean local alignment range. Aligning all.');
            this.alignAll();
            return;
        }
        const length = this.editorController.getLength();
        if (requestedTo - requestedFrom === 0)
            return;
        const [from, to] = this.adjustAlignmentRange(requestedFrom, requestedTo);
        const [timeFrom, timeTo] = this.textRangeToTimeRange(from, to);
        const [expandLeft, expandRight] = this.alignRange(from, to, timeFrom, timeTo);
        let newFrom = from;
        let newTo = to;
        if (expandLeft && from > 0)
            newFrom = from - 200;
        if (expandRight && to < length)
            newTo = to + 200;
        if (newFrom < from || newTo > to) {
            this.alignRangeWithExpanding(newFrom, newTo, recursionDepth + 1);
            return;
        }
        if (recursionDepth > 10) {
            global.logger.warn('alignment recursion depth:', {
                depth: recursionDepth + 1,
                from: requestedFrom,
                to: requestedTo,
            });
        }
        this.editorController.execTextChange({ runAligner: false, requestSave: false }, () => {
            this.highlightMissingTimestamps(newFrom, newTo);
        });
    }
    highlightMissingTimestamps(fromIndex, toIndex) {
        const { textMetadata } = this.editorController;
        const text = this.editorController.getText();
        const newlinesIndices = [-1]; // implicit newline before the first line
        for (let i = 0; i < text.length; i += 1) {
            if (text[i] === '\n')
                newlinesIndices.push(i);
        }
        for (let i = 1; i < newlinesIndices.length; i += 1) {
            const paragraphFrom = newlinesIndices[i - 1] + 1; // just after the previous newline.
            const paragraphTo = newlinesIndices[i]; // end at the newline
            const paragraphLength = paragraphTo - paragraphFrom;
            if (paragraphTo < fromIndex || paragraphFrom > toIndex) {
                continue;
            }
            const paragraphBegin = textMetadata.getBeginAtIndex(paragraphFrom);
            const paragraphEnd = textMetadata.getEndAtIndex(paragraphTo);
            const paragraphDuration = paragraphEnd - paragraphBegin;
            const paragraphFormat = this.editorController.getLineFormat(paragraphFrom);
            if (this.editorController.isNonTranscriptFormat(paragraphFormat)) {
                continue;
            }
            // this is a heuristic condition that detects if the paragraph is suspiciously fast.
            // It does not warn for very short paragraphs.
            if (paragraphLength > 150
                && paragraphLength / paragraphDuration > 40 // faster than 30 characters per second
            ) {
                if (paragraphFormat.unaligned === undefined) {
                    this.editorController.formatLine(paragraphFrom, 1, 'unaligned', true, 'api', false);
                }
            }
            else if (paragraphFormat.unaligned !== undefined) {
                // timestamps for the paragraph were fixed, remove formatting.
                this.editorController.formatLine(paragraphFrom, 1, 'unaligned', false, 'api', false);
            }
        }
    }
    /* Adds the phrase to the aligner. Fills the gap in
      time from the last phrase with dummy words if needed. */
    loadPhrase(text, begin, end) {
        this.lastPhraseEnd = end; // last phrase, including noises
        if (text === '' || isNoise(text))
            return;
        const delay = begin - this.lastWordEnd; // last phrase end, excluding noises
        const dummyWordsCount = Math.floor(delay / DUMMY_WORD_LENGTH + 0.900);
        const dummyWordLength = delay / dummyWordsCount;
        for (let i = 0; i < dummyWordsCount; i += 1) {
            const dummyBegin = this.lastWordEnd + i * dummyWordLength;
            const dummyEnd = this.lastWordEnd + (i + 1) * dummyWordLength;
            this.stringAligner.addNewWord('_ ', dummyBegin, dummyEnd);
        }
        const doesTextEndWithSpace = text.endsWith(' ') || text.endsWith('\u00a0');
        // phrases on paragraph end do not have space on end. We need to add it
        // so that stringAligner does not join them with the next word.
        const textWithSpace = doesTextEndWithSpace ? text : `${text} `;
        this.splitToWords(textWithSpace, begin, end).forEach(({ wordText, wordBegin, wordEnd }) => {
            this.stringAligner.addNewWord(wordText, wordBegin, wordEnd);
            this.lastWordEnd = wordEnd;
        });
    }
    fromTrsx(trsx) {
        var _a;
        this.stringAligner = new StringAligner([], [], INSERTION_PENALTY, DELETION_PENALTY, SUBSTITUTION_PENALTY, INSERT_BETWEEEN_PARAGRAPHS_PENALTY, CHUNK_SIZE);
        const xmlParser = new DOMParser();
        const xml = xmlParser.parseFromString(trsx, 'text/xml');
        const phrases = xml.getElementsByTagName('p');
        for (let j = 0; j < phrases.length; j += 1) {
            const phrase = phrases[j];
            const text = (_a = phrase.textContent) !== null && _a !== void 0 ? _a : '';
            const isoBegin = phrase.getAttribute('b');
            const isoEnd = phrase.getAttribute('e');
            if (isoBegin === null)
                throw new Error('missing phrase begin');
            if (isoEnd === null)
                throw new Error('missing phrase end');
            const begin = dayjs.duration(isoBegin).asSeconds();
            const end = dayjs.duration(isoEnd).asSeconds();
            this.loadPhrase(text, begin, Math.max(begin, end));
        }
        this.finalizePhrases(this.lastPhraseEnd);
    }
    /* find the range of the inserted text that needs to be aligned
       from quill delta.
       Alignment range will be the inserted text plus some part of
       the paragraph before and after it.
       */
    getAlignmentRange(delta) {
        const changedRange = getChangedRangeFromDelta(delta);
        if (changedRange === null) {
            return null;
        }
        const [changeFrom, changeTo] = changedRange;
        const from = changeFrom - MARGIN_BEFORE;
        const to = changeTo + MARGIN_AFTER;
        return [from, to];
    }
    adjustAlignmentRange(indexFrom, indexTo) {
        var _a, _b;
        let from = indexFrom;
        let to = indexTo;
        const marginText = this.editorController.getText(from, to - from);
        if (from <= 0)
            from = 0;
        else {
            // fix range so that it starts with a beginning of a word
            const firstSpace = (_b = (_a = marginText.match(/[ \n]/)) === null || _a === void 0 ? void 0 : _a.index) !== null && _b !== void 0 ? _b : -1;
            from += firstSpace + 1;
        }
        if (to > this.editorController.getLength()) {
            to = this.editorController.getLength();
        }
        // enforce that the last word is complete, including the whitespace after it.
        for (let i = 1; i <= marginText.length; i += 1) {
            const char = marginText[marginText.length - i];
            if (char === ' ' || char === '\n') {
                to -= i - 1;
                break;
            }
        }
        return [from, to];
    }
    textRangeToTimeRange(from, to) {
        let timeFrom = 0;
        if (from > 0) {
            timeFrom = this.editorController.textMetadata.getEndAtIndex(from - 1);
            if (timeFrom === Infinity) { // if there are no timestamps at all
                timeFrom = 0;
            }
        }
        let timeTo = this.editorController.textMetadata.getBeginAtIndex(to);
        if (to >= this.editorController.getLength()) {
            timeTo = Infinity;
        }
        return [timeFrom, timeTo];
    }
    // gets the range from the document and splits it to words.
    getWords(from, to) {
        const wordSequence = [];
        const wordEndIndices = [];
        const wholeText = this.editorController.getText(from, to - from);
        const segmentEndIndices = getAllMatchIndices(wholeText, SEGMENT_END_REGEX);
        // if the range spans over more than one block, we need to handle each
        // separately.
        const segmentStarts = [from, ...segmentEndIndices.map((index) => index + from + 1), to];
        for (let i = 1; i < segmentStarts.length; i += 1) {
            const segmentFrom = segmentStarts[i - 1];
            const segmentTo = segmentStarts[i];
            if (segmentTo === segmentFrom)
                continue; // skip empty segments
            const format = this.editorController.getLineFormat(segmentFrom);
            if (this.editorController.isNonTranscriptFormat(format))
                continue;
            const segmentText = this.editorController.getText(segmentFrom, segmentTo - segmentFrom);
            const words = segmentText.split(WORD_BOUNDARY_REGEX);
            // new line after the last word is preserved. This is important for aligner
            for (let j = 0; j < words.length - 1; j += 1) {
                words[j] += ' ';
            }
            // if there are multiple whitespaces (or other separating characters) between words,
            // join them with the word on the left.
            for (let j = 1; j < words.length; j += 1) {
                if (words[j] === ' ' || words[j] === CAPTION_END_SYMBOL || words[j] === '\n') {
                    words[j - 1] += words[j];
                    words.splice(j, 1);
                }
            }
            wordSequence.push(...words);
            let currentWordEnd = segmentFrom;
            words.forEach((word) => {
                currentWordEnd += word.length;
                wordEndIndices.push(currentWordEnd);
            });
        }
        return [wordSequence, wordEndIndices];
    }
    /* aligns the given range.
    returns two values: [needsExpandingToTheLeft, needsExpandingToTheRight] indicating whether
    the timestamps were unchanged on either side. If the first or last timestamp changed, it indicates
    that alignment of a broader range is needed.
    */
    alignRange(from, to, timeFrom, timeTo) {
        let needsExpandingToTheRight = false;
        let needsExpandingToTheLeft = false;
        if (timeTo === timeFrom) {
            return [true, true];
        }
        const fixedFrom = from < 0 ? 0 : from;
        const timeAnchors = this.editorController.textMetadata.getMetadataInRange(fixedFrom, to, 'timeAnchor');
        const expandedTimeAnchors = [[fixedFrom - 1, timeFrom], ...timeAnchors, [to, timeTo]];
        for (let i = 1; i < expandedTimeAnchors.length; i += 1) {
            const [anchorIndexFrom, anchorTimeFrom] = expandedTimeAnchors[i - 1];
            const [anchorIndexTo, anchorTimeTo] = expandedTimeAnchors[i];
            const beginsWithAnchor = i > 1;
            if (beginsWithAnchor) {
                // special timestamp for the time anchor itself
                this.editorController.textMetadata.spliceTimestamps(anchorIndexFrom, anchorIndexFrom + 1, [[anchorIndexFrom, anchorTimeFrom, anchorTimeFrom]]);
            }
            const [wasCleanFromLeft, wasCleanFromRight] = this.alignRangeBetweenAnchors(anchorIndexFrom + 1, anchorIndexTo, anchorTimeFrom, anchorTimeTo, beginsWithAnchor);
            if (i === 1 && !wasCleanFromLeft) {
                needsExpandingToTheLeft = true;
            }
            if ( // we need to expand alignment range to the right
            i === expandedTimeAnchors.length - 1
                && !wasCleanFromRight) {
                needsExpandingToTheRight = true;
            }
        }
        this.emitter.emit(ALIGNER_EVENTS.ALIGNED, from, to, timeFrom, timeTo);
        return [needsExpandingToTheLeft, needsExpandingToTheRight];
    }
    alignRangeBetweenAnchors(from, to, timeFrom, timeTo, forceStart = false) {
        const [wordSequence, wordEndIndices] = this.getWords(from, to);
        const cleanedWordSequence = StringAligner.cleanWords(wordSequence).map((word) => word.replace(TIME_ANCHOR_SYMBOL, '').replace(CAPTION_END_SYMBOL, '\n'));
        for (let i = wordSequence.length - 1; i >= 0; i -= 1) {
            if (cleanedWordSequence[i] === '') {
                // remove empty phrases before aligning
                cleanedWordSequence.splice(i, 1);
                wordEndIndices.splice(i, 1);
            }
        }
        if (from <= 0)
            cleanedWordSequence.unshift('\n'); // informs aligner about paragraph start
        const matchIndices = this.stringAligner.compareSequence(cleanedWordSequence, timeFrom, timeTo, Date.now() + ALIGNMENT_TIME_LIMIT * 1000);
        if (from <= 0) {
            // removes timestamp for the \n that was artificially added
            matchIndices.shift();
            cleanedWordSequence.shift();
        }
        const newTimestamps = [];
        for (let i = 0; i < matchIndices.length; i += 1) {
            let begin = timeFrom;
            let end = timeFrom;
            if (matchIndices[i] !== -1) {
                // match index -1 means there is no match.
                [begin, end] = this.stringAligner.targetTimestamps[matchIndices[i]];
            }
            const clampedBegin = clamp(begin, timeFrom, timeTo);
            const clampedEnd = clamp(end, timeFrom, timeTo);
            const word = cleanedWordSequence[i];
            if (word !== '\n') {
                newTimestamps.push([wordEndIndices[i] - 1, clampedBegin, clampedEnd]);
            }
        }
        const firstWord = wordEndIndices.length === 0
            ? ''
            : this.editorController.getText(from, wordEndIndices[0] - from);
        if (forceStart
            && !firstWord.startsWith(CAPTION_END_SYMBOL) // Time anchor is in previous caption.
            && !firstWord.substring(0, firstWord.length - 1).includes('\n') // Time anchor is on previous line.
            && newTimestamps.length > 0) {
            newTimestamps[0][BEGIN] = timeFrom; // force first word start to the time anchor
            if (newTimestamps.length > 1 && newTimestamps[0][END] > newTimestamps[1][BEGIN]) {
                // ensure that the first timestamp does not overlap the second
                newTimestamps[0][END] = newTimestamps[1][BEGIN];
            }
        }
        const fixedNewTimestamps = this.fixOverlappingTimestamps(newTimestamps);
        const roundedTimestamps = this.roundTimestamps(fixedNewTimestamps);
        const [wasCleanFromLeft, wasCleanFromRight] = this.editorController.textMetadata
            .spliceTimestamps(from, to, roundedTimestamps);
        return [wasCleanFromLeft, wasCleanFromRight];
    }
    roundTimestamps(timestamps) {
        return timestamps.map(([index, begin, end]) => [
            index,
            Number(begin.toFixed(3)),
            Number(end.toFixed(3)),
        ]);
    }
    /*
      If there are consecutive timestamps that are identical, this
      fixes them so that timestamps never overlap.
    */
    fixOverlappingTimestamps(timestamps) {
        let groupStart = 0;
        let groupBegin = -1;
        let groupEnd = -1;
        const fixedTimestamps = deepcopy(timestamps);
        for (let i = 0; i <= timestamps.length; i += 1) {
            if (i === timestamps.length // handles the last group
                || timestamps[i][END] !== groupEnd // group boundary
                || timestamps[i][BEGIN] !== groupBegin // group boundary
            ) {
                const wordLength = (groupEnd - groupBegin) / (i - groupStart);
                for (let j = 0; j < i - groupStart; j += 1) {
                    fixedTimestamps[groupStart + j][BEGIN] = groupBegin + j * wordLength;
                    fixedTimestamps[groupStart + j][END] = groupBegin + (j + 1) * wordLength;
                }
                if (i === timestamps.length)
                    break;
                groupBegin = timestamps[i][BEGIN];
                groupEnd = timestamps[i][END];
                groupStart = i;
            }
        }
        return fixedTimestamps;
    }
    splitToWords(text, begin, end) {
        const wordBoundaries = [-1];
        let match = null;
        // eslint-disable-next-line no-cond-assign
        while ((match = WORD_BOUNDARY_REGEX.exec(text)) !== null) {
            wordBoundaries.push(match.index);
        }
        wordBoundaries.push(text.length - 1); // for phrases that do end with space
        const output = [];
        for (let i = 1; i < wordBoundaries.length; i += 1) {
            const from = wordBoundaries[i - 1] + 1;
            const to = wordBoundaries[i] + 1;
            if (from === to)
                continue;
            const wordBegin = begin + (end - begin) * (from / text.length);
            const wordEnd = begin + (end - begin) * (to / text.length);
            output.push({
                wordText: text.substring(from, to),
                wordBegin,
                wordEnd,
            });
        }
        return output;
    }
}
export default DocumentAlignment;
