import md5 from 'md5'
import { engContractions } from '.'
import { stopWords } from './services/stopWords'

export const replaceAll = (text: string, toBeReplaced: string, value: string) => {
  let _text = text
  while (_text.includes(toBeReplaced)) _text = _text.replace(toBeReplaced, value)
  return _text
}

function removeUnicodeMarkers(inputString: string) {
  const cleanedString = inputString.replace(/\\U000e00[0-9a-fA-F]{1,5}/g, '')

  return cleanedString
}

// Remove os acentos
export const accentRemove = (sentence: string) => {
  // Remove Emojis
  const text = sentence
    .normalize('NFKD')
    .replace(
      // eslint-disable-next-line max-len
      /([\u2700-\u27BF]|[\u00A4-\uF8FF]|[\uE000-\uF8FF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDFFF]|[\u2011-\u26FF]|\uD83E[\uDD10-\uDDFF]|\uFE0F|\u00A1)/g,
      '',
    )
    .replace(/[\r]+/gm, ' ')

  //Remove acentos
  return text
    .replace(/[\u0300-\u036f]/g, '')

    .normalize('NFKC')
}

const replaceEnglishContractions = (sentence: string): string => {
  const newSentence = sentence.toLowerCase().split(' ')
  const _newSentence = []

  let _word = ''
  for (const word of newSentence) {
    if (engContractions[word]) _word = engContractions[word]
    else _word = word
    _newSentence.push(_word)
  }

  return _newSentence.join(' ')
}

export const replaceOne = (_sentence: string) => {
  let newSentence = _sentence
  while (newSentence.includes('  ')) newSentence = newSentence.replace('  ', ' ')
  const characters = ['?', '!', ',', '.']
  characters.forEach((value) => {
    while (newSentence.includes(value + ' ' + value)) {
      newSentence = newSentence.replace(value + ' ' + value, value)
    }
  })
  while (newSentence.includes('  ')) newSentence = newSentence.replace('  ', ' ')
  return newSentence
}

const removePontuationSentence = (_sentence: string) => {
  let newSentence = _sentence
  const characters = ['?', '!', ',', '.']
  characters.forEach((value) => {
    newSentence = replaceAll(newSentence, value, '')
  })
  return newSentence
}

export const separe = (_sentence: string, characters: string[] = ['?', '!', ',', '.']) => {
  let newSentence = ''
  let lastCharacter = ''
  for (let i = 0; i < _sentence.length; i++) {
    if (characters.includes(_sentence[i]) && lastCharacter != ' ') newSentence += ' '
    lastCharacter = _sentence[i]
    newSentence += _sentence[i]
    if (i < _sentence.length && characters.includes(_sentence[i]) && _sentence[i + 1] != ' ') newSentence += ' '
  }
  return newSentence
}

const removeStopWordsSentence = (_sentence: string, workspaceId: string) => {
  // 'No-Break Space' code precisa ser substituido por espaço para fazer a separação correta da sentença
  const aSentence = _sentence.replace(/\u00A0/g, ' ').split(' ')

  const aSentenceReturn: string[] = []
  const _stopWords = stopWords[workspaceId]
  if (_stopWords) {
    aSentence.forEach((el) => {
      if (!_stopWords.find((word) => el.toLocaleLowerCase() === word.toLocaleLowerCase())) {
        aSentenceReturn.push(el)
      }
    })
  }
  return aSentenceReturn.join(' ')
}

export const preProcess = (
  sentence: string,
  workspaceId: string,
  destructive: boolean,
  removePontuation: boolean = false,
  returnMd5: boolean = true,
  removeStopWords: boolean = false,
) => {
  /** Remove espaçamentos e separa pontuação */
  let newSentence = separe(sentence.trim())

  // Remove quebras de linha
  newSentence = newSentence.replace(/[\n]+/gm, ' ')

  /** Verifica existencia de caracteres com inicio em \U */
  // eslint-disable-next-line no-useless-escape
  newSentence = newSentence.replace(/\U000e00|\\U000e00/g, '\\U000e00')

  /** Transforma todo o texto em letras minúsculas e remove acentos */
  if (removeStopWords) newSentence = removeStopWordsSentence(newSentence, workspaceId)

  if (removePontuation) newSentence = removePontuationSentence(newSentence)
  if (destructive) {
    newSentence = removeUnicodeMarkers(newSentence)
    newSentence = separe(accentRemove(newSentence.toLowerCase()))
    newSentence = newSentence.trim()
  }

  /** Remove pontuações duplicadas */
  newSentence = replaceOne(newSentence)

  /** Remove contrações de palavras no inglês  */
  newSentence = replaceEnglishContractions(newSentence)
  if (returnMd5) return md5(newSentence)
  else return newSentence
}

export const preProcessSentenceToRecord = (sentence: string, workspaceId: string) => {
  return preProcess(sentence, workspaceId, true, false, true, true)
}
