import { Rule } from '../utils';

export class CleanRules {
  static NewLineInMiddleOfWordRule = new Rule(/\n(?=[a-zA-Z]{1,2}\n)/, ' ');
  static DoubleNewLineWithSpaceRule = new Rule(/\n \n/, '\r');
  static DoubleNewLineRule = new Rule(/\n\n/, '\r');
  static NewLineFollowedByPeriodRule = new Rule(/\n(?=\.(\s|\n))/, '');
  static ReplaceNewlineWithCarriageReturnRule = new Rule(/\n/, '\r');
  static EscapedNewLineRule = new Rule(/\\n/, '\n');
  static EscapedCarriageReturnRule = new Rule(/\\r/, '\r');
  static TypoEscapedNewLineRule = new Rule(/\\\ n/, '\n');
  static TypoEscapedCarriageReturnRule = new Rule(/\\\ r/, '\r');
  static InlineFormattingRule = new Rule(/{b\^&gt;\d*&lt;b\^}|{b\^>\d*<b\^}/, '');
  static TableOfContentsRule = new Rule(/\.{4,}\s*\d+-*\d*/, '\r');
  static ConsecutivePeriodsRule = new Rule(/\.{5,}/, ' ');
  static ConsecutiveForwardSlashRule = new Rule(/\/{3}/, '');

  static readonly NO_SPACE_BETWEEN_SENTENCES_REGEX = /(?<=[a-z])\.(?=[A-Z])/;
  static NoSpaceBetweenSentencesRule = new Rule(CleanRules.NO_SPACE_BETWEEN_SENTENCES_REGEX, '. ');

  static readonly NO_SPACE_BETWEEN_SENTENCES_DIGIT_REGEX = /(?<=\d)\.(?=[A-Z])/;
  static NoSpaceBetweenSentencesDigitRule = new Rule(CleanRules.NO_SPACE_BETWEEN_SENTENCES_DIGIT_REGEX, '. ');

  static readonly URL_EMAIL_KEYWORDS = ['@', 'http', '.com', 'net', 'www', '//'];
  static readonly NEWLINE_IN_MIDDLE_OF_SENTENCE_REGEX = /(?<=\s)\n(?=([a-z]|\())/;

  static NewLineFollowedByBulletRule = new Rule(/\n(?=•')/, '\r');
  static QuotationsFirstRule = new Rule(/''/g, '"');
  static QuotationsSecondRule = new Rule(/``/g, '"');
}

export class HTML {
  static HTMLTagRule = new Rule(/<\/?[^>]+>/g, '');
  static EscapedHTMLTagRule = new Rule(/&lt;\/?[^gt;]*gt;/, '');
  static All = [HTML.HTMLTagRule, HTML.EscapedHTMLTagRule];
}

export class PDF {
  static NewLineInMiddleOfSentenceRule = new Rule(/(?<=[^\n]\s)\n(?=\S)/, '');
  static NewLineInMiddleOfSentenceNoSpacesRule = new Rule(/\n(?=[a-z])/, ' ');
}
