// lyrics-corrector.js
import stringSimilarity from 'string-similarity';

/**
 * Main function to correct timecoded words using lyrics as reference
 * @param {string} timecodedWords - Words with timing information
 * @param {string} lyrics - Reference lyrics for correct word content
 * @param {Object} options - Additional options
 * @param {boolean} options.stream - Whether to stream results (for large inputs)
 * @param {function} options.onProgress - Callback for streaming progress updates
 * @returns {string|AsyncGenerator} - Corrected text or AsyncGenerator if streaming
 */
export function correctLyrics(timecodedWords, lyrics, options = {}) {
    const { stream = false, onProgress = null } = options;

    // If streaming is enabled, return an async generator
    if (stream) {
        return streamingProcess(timecodedWords, lyrics, onProgress);
    }

    // Otherwise process synchronously
    return processSync(timecodedWords, lyrics);
}

/**
 * Process using async generator for streaming results - IMPROVED VERSION
 * Now processes line-by-line instead of word-by-word for better performance and reliability
 */
async function* streamingProcess(timecodedWords, lyrics, onProgress) {
    // Split input into manageable chunks
    const chunks = splitIntoChunks(timecodedWords);
    const lyricsWords = prepareLyrics(lyrics);

    let processed = 0;
    let totalProcessed = "";

    for (const chunk of chunks) {
        const parsedChunk = parseTimecodedText(chunk);
        const correctedChunk = correctWords(parsedChunk, lyricsWords);
        const formattedChunk = formatOutput(correctedChunk);

        // Split the formatted chunk into lines to stream by line instead of by word
        const lines = formattedChunk.split('\n');

        for (const line of lines) {
            if (line.trim() === '') continue; // Skip empty lines

            // No artificial delay - keep things fast!
            const response = {
                choices: [
                    {
                        delta: {
                            content: line + "\n"
                        }
                    }
                ]
            };

            totalProcessed += line + "\n";

            // Report progress if callback provided
            if (onProgress) {
                onProgress({
                    progress: (processed + 1) / chunks.length,
                    processed: totalProcessed
                });
            }

            yield response;
        }

        processed += 1;
    }
}

/**
 * Process smaller inputs synchronously
 */
function processSync(timecodedWords, lyrics) {
    const parsedWords = parseTimecodedText(timecodedWords);
    const lyricsWords = prepareLyrics(lyrics);

    const correctedWords = correctWords(parsedWords, lyricsWords);
    const finalText = formatOutput(correctedWords);

    return finalText;
}

/**
 * Parse timecoded text into structured format
 */
function parseTimecodedText(text) {
    const lines = text.trim().split('\n');
    const result = [];

    let currentLine = 0;

    lines.forEach(line => {
        // Match pattern: Word|StartTime|EndTime
        const wordMatches = line.trim().match(/(\S+)\|(\d+\.\d+)\|(\d+\.\d+)/g) || [];

        if (wordMatches.length === 0) return;

        wordMatches.forEach(match => {
            const [word, startTime, endTime] = match.split('|');
            result.push({
                word,
                startTime: parseFloat(startTime),
                endTime: parseFloat(endTime),
                line: currentLine
            });
        });

        currentLine++;
    });

    return result;
}

/**
 * Prepare lyrics for matching
 */
function prepareLyrics(lyrics) {
    // Clean and tokenize lyrics - preserve capitalization but remove punctuation
    const cleanedLyrics = lyrics
        .replace(/[.,!?;:"'()\[\]{}]/g, '')  // Remove punctuation
        .replace(/\s+/g, ' ')                 // Normalize whitespace
        .trim();

    // Split into words and filter empty strings
    const words = cleanedLyrics.split(/\s+/).filter(w => w.length > 0);

    // Create variations of words to improve matching
    const variations = new Set();
    words.forEach(word => {
        variations.add(word);
        variations.add(word.toLowerCase());
        variations.add(word.charAt(0).toUpperCase() + word.slice(1).toLowerCase());
    });

    return Array.from(variations);
}

/**
 * Correct words based on lyrics reference
 * IMPROVED: Better structure preservation and line break handling
 */
function correctWords(parsedWords, lyricsWords) {
    const corrected = [];
    let currentLine = 0;
    let wordsInCurrentLine = 0;
    let lastWord = null;

    // Keep track of the last sentence ending position
    // to detect potential sentence boundaries without punctuation
    let lastSentenceEnd = -1;

    for (let i = 0; i < parsedWords.length; i++) {
        const wordData = parsedWords[i];
        const originalWord = wordData.word;

        // Find best match in lyrics
        const bestMatch = findBestMatch(wordData.word, lyricsWords);

        // Determine if we need a line break
        let needsLineBreak = false;

        // Check for sentence break based on timing gaps
        if (lastWord && (wordData.startTime - lastWord.endTime > 0.8)) {
            needsLineBreak = true;
        }

        if (bestMatch && bestMatch !== wordData.word) {
            // Check if this might be a merged word that needs splitting
            const splitWords = checkForSplitWords(wordData, bestMatch, lyricsWords);

            if (splitWords.length > 1) {
                // This was a merged word that needs splitting
                splitWords.forEach((sw, index) => {
                    // Special handling for first word in split
                    if (index === 0) {
                        // Check if this first part needs a line break
                        if (shouldAddLineBreak(wordsInCurrentLine, sw.word, lastWord) || needsLineBreak) {
                            currentLine++;
                            wordsInCurrentLine = 0;
                        }
                    } else {
                        // Non-first words in split don't trigger line breaks by themselves
                    }

                    sw.line = currentLine;
                    corrected.push(sw);
                    wordsInCurrentLine++;
                    lastWord = sw;
                });
            } else {
                // Normal word replacement
                const correctedWord = {
                    ...wordData,
                    word: preserveCapitalization(bestMatch, wordData.word)
                };

                // Add line break if needed
                if (shouldAddLineBreak(wordsInCurrentLine, correctedWord.word, lastWord) || needsLineBreak) {
                    currentLine++;
                    wordsInCurrentLine = 0;
                }

                correctedWord.line = currentLine;
                corrected.push(correctedWord);
                wordsInCurrentLine++;
                lastWord = correctedWord;
            }
        } else {
            // No good match found or match is same as original, keep original
            // Add line break if needed
            if (shouldAddLineBreak(wordsInCurrentLine, wordData.word, lastWord) || needsLineBreak) {
                currentLine++;
                wordsInCurrentLine = 0;
            }

            wordData.line = currentLine;
            corrected.push(wordData);
            wordsInCurrentLine++;
            lastWord = wordData;
        }
    }

    return corrected;
}

/**
 * Find best matching word in lyrics using string similarity
 */
function findBestMatch(word, lyricsWords) {
    const normalizedWord = word.toLowerCase();

    // First try exact match (case insensitive)
    const exactMatch = lyricsWords.find(lw => lw.toLowerCase() === normalizedWord);
    if (exactMatch) {
        return exactMatch;
    }

    // Then try similarity matching
    const matches = lyricsWords.map(lw => ({
        word: lw,
        similarity: stringSimilarity.compareTwoStrings(normalizedWord, lw.toLowerCase())
    }));

    // Sort by similarity score
    matches.sort((a, b) => b.similarity - a.similarity);

    // Lower threshold for better matching but still avoid very poor matches
    // Return best match if similarity is above threshold, otherwise return original word
    return matches[0]?.similarity > 0.3 ? matches[0].word : word;
}

/**
 * Check if a word might be multiple words merged together
 */
function checkForSplitWords(wordData, bestMatch, lyricsWords) {
    const word = wordData.word.toLowerCase();

    // If word is short, probably not merged
    if (word.length < 6) return [wordData];  // Reduced from 8 to 6 for better splitting

    // Try to find possible combinations of lyrics words that match this word
    for (let i = 0; i < lyricsWords.length - 1; i++) {
        const word1 = lyricsWords[i].toLowerCase();

        for (let j = i + 1; j < Math.min(i + 4, lyricsWords.length); j++) {
            const word2 = lyricsWords[j].toLowerCase();

            // Try different combinations
            const combinations = [
                word1 + word2,
                word1 + ' ' + word2,
                word1 + '-' + word2
            ];

            for (const combined of combinations) {
                const similarity = stringSimilarity.compareTwoStrings(word, combined);

                if (similarity > 0.7) {  // Reduced threshold from 0.8 to 0.7
                    // Found a likely split! Create two words with estimated timings
                    const totalDuration = wordData.endTime - wordData.startTime;
                    const ratio = word1.length / (word1.length + word2.length);

                    const splitPoint = wordData.startTime + (totalDuration * ratio);

                    return [
                        {
                            word: preserveCapitalization(lyricsWords[i], wordData.word),
                            startTime: wordData.startTime,
                            endTime: splitPoint,
                            line: wordData.line
                        },
                        {
                            word: preserveCapitalization(lyricsWords[j], wordData.word),  // Now preserve capitalization for second word too
                            startTime: splitPoint,
                            endTime: wordData.endTime,
                            line: wordData.line
                        }
                    ];
                }
            }
        }
    }

    // No good split found
    return [{
        ...wordData,
        word: bestMatch || wordData.word
    }];
}

/**
 * IMPROVED: Check if we should add a line break before this word
 * Now includes logic for capitalized words at appropriate positions
 */
function shouldAddLineBreak(wordsInLine, word, lastWord) {
    // Always break at end of sentences
    if (/[.!?]$/.test(word)) {
        return true;
    }

    // Break on capital letters that likely indicate new sentence
    // Only if we're not at beginning of current line and it's not a common capitalized word
    const isCapitalized = word[0] === word[0].toUpperCase();
    const notCommonCapitalized = !/^(I|I'm|I'll|I've|I'd|You|We|They|He|She|It|The|A|An)$/i.test(word);

    if (wordsInLine > 1 && isCapitalized && notCommonCapitalized &&
        (!lastWord || !/[.!?]$/.test(lastWord.word))) {
        return true;
    }

    // Only break at commas and conjunctions if we have 5+ words in line
    if (wordsInLine > 5 && (
        /,$/.test(word) ||
        /^(and|or|but|yet|so|because|och|eller|men|så|för)$/i.test(word)
    )) {
        return true;
    }

    // Break at natural pauses or around 8 words
    return wordsInLine >= 8;
}

/**
 * Preserve capitalization from original word
 */
function preserveCapitalization(correctWord, originalWord) {
    // If original word was all caps, make correction all caps
    if (originalWord === originalWord.toUpperCase()) {
        return correctWord.toUpperCase();
    }

    // If original word was capitalized, capitalize correction
    if (originalWord && originalWord[0] === originalWord[0].toUpperCase()) {
        return correctWord.charAt(0).toUpperCase() + correctWord.slice(1).toLowerCase();
    }

    // Otherwise use lowercase
    return correctWord.toLowerCase();
}

/**
 * Format corrected words into final output
 */
function formatOutput(correctedWords) {
    let result = '';
    let currentLine = 0;

    correctedWords.forEach(wordData => {
        if (wordData.line > currentLine) {
            result += '\n';
            currentLine = wordData.line;
        } else if (result.length > 0) {
            result += ' ';
        }

        result += `${wordData.word}|${wordData.startTime.toFixed(1)}|${wordData.endTime.toFixed(1)}`;
    });

    return result;
}

/**
 * Split large input into manageable chunks for streaming
 */
function splitIntoChunks(text) {
    const lines = text.split('\n');
    const chunks = [];
    let currentChunk = [];
    let wordCount = 0;

    lines.forEach(line => {
        const wordsInLine = (line.match(/\|/g) || []).length / 2;

        if (wordCount + wordsInLine > 100) {
            chunks.push(currentChunk.join('\n'));
            currentChunk = [line];
            wordCount = wordsInLine;
        } else {
            currentChunk.push(line);
            wordCount += wordsInLine;
        }
    });

    if (currentChunk.length > 0) {
        chunks.push(currentChunk.join('\n'));
    }

    return chunks;
}

// Export additional utility functions for advanced usage
export const utils = {
    parseTimecodedText,
    prepareLyrics,
    findBestMatch,
    checkForSplitWords,
};