import { PROD_MODE } from '@reverbdotcom/env';
import DOMPurify from 'dompurify';
import { decode, encode } from 'html-entities';

import { info, gauge, currentSession } from './elog';

interface Data {
  referer: string;
  url: string;
  page_view_id: string;
  href: string;
}

// This hook's logic mimics that of the Ruby gem we use in core's SanitizationHelper class:
// https://github.com/rgrove/sanitize/blob/v6.0.0/lib/sanitize/transformers/clean_element.rb#L140-L144
if (DOMPurify.isSupported) {
  DOMPurify.addHook('afterSanitizeAttributes', (node) => {
    const HREF = 'href';
    const PROTOCOL_ALLOWLIST = ['http', 'https'];
    const PROTOCOL_REGEX = /A\s*([^/#]*?)(?::|&#0*58|&#x0*3a)/i;
    const UNSAFE_HREF_LOG_TAG = 'unsafe_href';

    if (node.hasAttribute(HREF)) {
      const href = node.getAttribute(HREF);

      const match = href.match(PROTOCOL_REGEX);
      const disallowed = match && !PROTOCOL_ALLOWLIST.includes(match[0].toLowerCase());

      if (disallowed) {
        const data = buildData(href);

        info(UNSAFE_HREF_LOG_TAG, data);

        gauge({
          name: UNSAFE_HREF_LOG_TAG,
          value: 1,
        });
      }
    }
  });
}

const buildData = (href: string): Data => {
  const session = currentSession();
  const referer = session.referer();
  const url = session.url();
  const page_view_id = session.pageViewID();

  return {
    referer,
    url,
    page_view_id,
    href,
  };
};

export const Purify = {
  sanitize(raw: string, tags?: object): string {
    if (DOMPurify.isSupported === false) {
      if (PROD_MODE) {
        throw new Error('Sanitize does not work in non DOM environments-- do not run this in production');
      }

      return raw;
    }

    return DOMPurify.sanitize(raw, tags);
  },
  // This function can be used to translate user-inputted html strings into a decoded
  // plaintext string, which can be used as input in a normal textarea/contenteditable.
  //
  // This should NOT be used as part of any actual HTML render process in the DOM.
  //
  // ex. input:
  // '<p>hello i have &gt;3 apples and &lt;11 oranges</p><script>alert("bad!")</script><p>and they taste <strong>good</strong> brother.</p>'
  // ex. output:
  // 'hello i have >3 apples and <11 oranges\n\nand they taste good brother.'
  htmlToPlaintextInput(htmlInput: string): string {
    if (DOMPurify.isSupported === false) {
      throw new Error('DOMPurify does not work in non DOM environments');
    }

    DOMPurify.addHook('uponSanitizeElement', function (node) {
      if (!node.textContent) { return; }
      if (['p', 'li'].includes(node.tagName?.toLowerCase())) {
        // split <p> and <li> content into separate lines
        node.textContent = `${node.textContent.trim()}\n\n`;
      }
    });
    const sanitized = DOMPurify.sanitize(htmlInput, { ALLOWED_ATTR: [], ALLOWED_TAGS: [] });
    DOMPurify.removeHook('uponSanitizeElement');
    return decode(sanitized).trim();
  },
  // Inverse of the above: take a user's plaintext input string, and
  // encode into a simple html blob, where each line of text is coerced into a <p> node.
  //
  // ex. input:
  // 'hello i have >3 apples and <11 oranges\n\n\n\n\n\nand they taste good brother.'
  // ex. output:
  // '<p>hello i have &gt;3 apples and &lt;11 oranges</p><p>and they taste good brother.</p>'
  plaintextToHtmlInput(plaintextInput: string): string {
    if (DOMPurify.isSupported === false) {
      throw new Error('DOMPurify does not work in non DOM environments');
    }

    return plaintextInput.split('\n').reduce((accum, currentLine) => {
      if (!currentLine) return accum;
      const sanitizedLine = DOMPurify.sanitize(encode(currentLine), { ALLOWED_ATTR: [], ALLOWED_TAGS: [] });
      if (!sanitizedLine) return accum;

      return `${accum}<p>${sanitizedLine}</p>`;
    }, '');
  },
};

export default Purify;
