import { getSentences } from '@/utils/sentence-splitter';
import { distance } from 'fastest-levenshtein';

export function normalizeText(text: string): string {
  return text
    .toLowerCase()
    .replace(/[^a-z0-9\s]+/g, '')
    .replace(/\s+/, ' ')
    .trim();
}

export function getNumberParts(text: string): string[] {
  // Minimum 5 characters to not include things like years without more data
  return [...text.matchAll(/([0-9]+[a-z]*)+/g)].map((v) => v[0]).filter((v) => v.length > 4);
}

export function checkNumberParts(aNumbers: string[], bNumbers: string[]): boolean {
  const bNumbersSet = new Set(bNumbers);
  for (const aNumber of aNumbers) {
    if (bNumbersSet.has(aNumber)) {
      return true;
    }
  }
  return false;
}

export function getHighlightedText(
  chunk: string,
  answerText: string,
): Array<{ value: string; isHighlighted: boolean }> {
  const answerSentences = getSentences(answerText)
    .map((v) => normalizeText(v))
    .filter((v) => v.length > 12);
  const paragraphs = chunk.split('\n');

  const result = [];
  for (const paragraph of paragraphs) {
    let isHighlighted = false;
    const sentences = getSentences(paragraph).map((v) => v);
    for (const sentence of sentences) {
      const normalizedChunkSentence = normalizeText(sentence);
      const chunksSentenceNumberParts = getNumberParts(normalizedChunkSentence);
      if (normalizedChunkSentence.length > 12) {
        for (const answerSentence of answerSentences) {
          if (chunksSentenceNumberParts.length > 0) {
            const answerSentenceNumberParts = getNumberParts(answerSentence);
            if (checkNumberParts(chunksSentenceNumberParts, answerSentenceNumberParts)) {
              isHighlighted = true;
              break;
            }
          }

          const levenDistance = distance(normalizedChunkSentence, answerSentence);
          const maxDistance = Math.max(normalizedChunkSentence.length / 10, 5);
          if (levenDistance < maxDistance) {
            isHighlighted = true;
            break;
          }
        }
      }

      if (isHighlighted) {
        break;
      }
    }
    result.push({ value: paragraph, isHighlighted });
  }
  return result;
}
