import { Cleaner } from './Cleaner';
import { LanguageBase } from './lang/common/Base';
import { getLanguage } from './Language';
import { Processor } from './Processor';
import { TextSpan } from './utils';

export class Segmenter {
  private languageModule: LanguageBase; // Type would come from Language implementation
  private clean: boolean;
  private charSpan: boolean;
  private originalText: string = '';

  constructor(
    opts: {
      language?: string;
      clean?: boolean;
      charSpan?: boolean;
    } = {},
  ) {
    const { language = 'en', clean = false, charSpan = false } = opts;

    this.languageModule = getLanguage(language);
    this.clean = clean;
    this.charSpan = charSpan;

    if (this.clean && this.charSpan) {
      throw new Error('charSpan must be False if clean is True. Since `clean=True` will modify original text.');
    }
  }

  private cleaner(text: string) {
    if (this.languageModule.Cleaner) {
      return new this.languageModule.Cleaner(text, this.languageModule);
    } else {
      return new Cleaner(text, this.languageModule);
    }
  }

  private processor(text: string) {
    if (this.languageModule.Processor) {
      return new this.languageModule.Processor(text, this.languageModule, this.charSpan);
    } else {
      return new Processor(text, this.languageModule, this.charSpan);
    }
  }

  private sentencesWithCharSpans(sentences: string[]): Array<TextSpan> {
    const sentSpans: Array<TextSpan> = [];
    let priorEndCharIdx = 0;

    for (const sent of sentences) {
      const escapedSent = sent.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
      const regex = new RegExp(`${escapedSent}\\s*`, 'g');
      let match: RegExpExecArray | null;

      while ((match = regex.exec(this.originalText)) !== null) {
        const matchStr = match[0];
        const matchStartIdx = match.index;
        const matchEndIdx = match.index + matchStr.length;

        if (matchEndIdx > priorEndCharIdx) {
          sentSpans.push(new TextSpan(matchStr, matchStartIdx, matchEndIdx));
          priorEndCharIdx = matchEndIdx;
          break;
        }
      }
    }

    return sentSpans;
  }

  segment(text: string): string[] | Array<TextSpan> {
    if (!text) {
      return [];
    }

    this.originalText = text;

    if (this.clean) {
      text = this.cleaner(text).clean();
    }

    const postprocessedSents = this.processor(text).process();
    const sentenceWithCharSpans = this.sentencesWithCharSpans(postprocessedSents);

    if (this.charSpan) {
      return sentenceWithCharSpans;
    } else if (this.clean) {
      // clean and destructed sentences
      return postprocessedSents;
    } else {
      // nondestructive with whitespaces
      return sentenceWithCharSpans.map((textspan) => textspan.sent);
    }
  }
}
