import { Text } from './utils';
import { PDF, HTML, CleanRules as cr } from './clean/rules';
import { LanguageBase } from './lang/common/Base';

export class Cleaner {
  text: string;
  lang: LanguageBase;

  constructor(text: string, lang: LanguageBase) {
    this.text = text;
    this.lang = lang;
  }

  clean(): string {
    if (!this.text) {
      return this.text;
    }
    this.removeAllNewlines();
    this.replaceDoubleNewlines();
    this.replaceNewlines();
    this.replaceEscapedNewlines();
    this.text = new Text(this.text).apply(...HTML.All);
    this.replacePunctuationInBrackets();
    this.text = new Text(this.text).apply(cr.InlineFormattingRule);
    this.cleanQuotations();
    this.cleanTableOfContents();
    this.checkForNoSpaceInBetweenSentences();
    this.cleanConsecutiveCharacters();
    return this.text;
  }

  removeAllNewlines(): void {
    this.removeNewlineInMiddleOfSentence();
    this.removeNewlineInMiddleOfWord();
  }

  removeNewlineInMiddleOfSentence(): void {
    const replaceWithBlank = (match: string): string => {
      return match.replace(cr.NEWLINE_IN_MIDDLE_OF_SENTENCE_REGEX, ' ');
    };
    this.text = this.text.replace(/(?:[^\.])*/g, replaceWithBlank);
  }

  removeNewlineInMiddleOfWord(): void {
    this.text = new Text(this.text).apply(cr.NewLineInMiddleOfWordRule);
  }

  replaceDoubleNewlines(): void {
    this.text = new Text(this.text).apply(cr.DoubleNewLineWithSpaceRule, cr.DoubleNewLineRule);
  }

  replaceNewlines(): void {
    this.text = new Text(this.text).apply(
      cr.NewLineFollowedByPeriodRule,
      cr.ReplaceNewlineWithCarriageReturnRule,
      cr.NewLineFollowedByBulletRule,
      PDF.NewLineInMiddleOfSentenceRule,
      PDF.NewLineInMiddleOfSentenceNoSpacesRule,
    );
  }

  replaceEscapedNewlines(): void {
    this.text = new Text(this.text).apply(
      cr.EscapedNewLineRule,
      cr.EscapedCarriageReturnRule,
      cr.TypoEscapedNewLineRule,
      cr.TypoEscapedCarriageReturnRule,
    );
  }

  replacePunctuationInBrackets(): void {
    const replacePunct = (match: string): string => {
      if (match.includes('?')) {
        return match.replace(/\?/g, '&ᓷ&');
      }
      return match;
    };
    this.text = this.text.replace(/\[(?:[^\]])*\]/g, replacePunct);
  }

  cleanQuotations(): void {
    this.text = this.text.replace(/`/g, "'");
    this.text = new Text(this.text).apply(cr.QuotationsFirstRule, cr.QuotationsSecondRule);
  }

  cleanTableOfContents(): void {
    this.text = new Text(this.text).apply(
      cr.TableOfContentsRule,
      cr.ConsecutivePeriodsRule,
      cr.ConsecutiveForwardSlashRule,
    );
  }

  searchForConnectedSentences(word: string, txt: string, regex: RegExp, rule: any): string {
    if (!regex.test(word)) {
      return txt;
    }
    if (cr.URL_EMAIL_KEYWORDS.some((k) => word.includes(k))) {
      return txt;
    }
    const newWord = new Text(word).apply(rule);
    return txt.replace(new RegExp(word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), newWord);
  }

  checkForNoSpaceInBetweenSentences(): void {
    const words = this.text.split(' ');
    for (const word of words) {
      this.text = this.searchForConnectedSentences(
        word,
        this.text,
        cr.NO_SPACE_BETWEEN_SENTENCES_REGEX,
        cr.NoSpaceBetweenSentencesRule,
      );
      this.text = this.searchForConnectedSentences(
        word,
        this.text,
        cr.NO_SPACE_BETWEEN_SENTENCES_DIGIT_REGEX,
        cr.NoSpaceBetweenSentencesDigitRule,
      );
    }
  }

  cleanConsecutiveCharacters(): void {
    this.text = new Text(this.text).apply(cr.ConsecutivePeriodsRule, cr.ConsecutiveForwardSlashRule);
  }
}
