"use strict";

Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.default = void 0;

var _url = require("url");

var _htmlparser = require("htmlparser2");

var _loaderUtils = require("loader-utils");

var _HtmlSourceError = _interopRequireDefault(require("../HtmlSourceError"));

var _utils = require("../utils");

function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

function isASCIIWhitespace(character) {
  return (// Horizontal tab
    character === '\u0009' || // New line
    character === '\u000A' || // Form feed
    character === '\u000C' || // Carriage return
    character === '\u000D' || // Space
    character === '\u0020'
  );
} // (Don't use \s, to avoid matching non-breaking space)
// eslint-disable-next-line no-control-regex


const regexLeadingSpaces = /^[ \t\n\r\u000c]+/; // eslint-disable-next-line no-control-regex

const regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/; // eslint-disable-next-line no-control-regex

const regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/;
const regexTrailingCommas = /[,]+$/;
const regexNonNegativeInteger = /^\d+$/; // ( Positive or negative or unsigned integers or decimals, without or without exponents.
// Must include at least one digit.
// According to spec tests any decimal point must be followed by a digit.
// No leading plus sign is allowed.)
// https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number

const regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/;

function parseSrcset(input) {
  // 1. Let input be the value passed to this algorithm.
  const inputLength = input.length;
  let url;
  let descriptors;
  let currentDescriptor;
  let state;
  let c; // 2. Let position be a pointer into input, initially pointing at the start
  //    of the string.

  let position = 0;
  let startUrlPosition; // eslint-disable-next-line consistent-return

  function collectCharacters(regEx) {
    let chars;
    const match = regEx.exec(input.substring(position));

    if (match) {
      [chars] = match;
      position += chars.length;
      return chars;
    }
  } // 3. Let candidates be an initially empty source set.


  const candidates = []; // 4. Splitting loop: Collect a sequence of characters that are space
  //    characters or U+002C COMMA characters. If any U+002C COMMA characters
  //    were collected, that is a parse error.
  // eslint-disable-next-line no-constant-condition

  while (true) {
    collectCharacters(regexLeadingCommasOrSpaces); // 5. If position is past the end of input, return candidates and abort these steps.

    if (position >= inputLength) {
      if (candidates.length === 0) {
        throw new Error('Must contain one or more image candidate strings');
      } // (we're done, this is the sole return path)


      return candidates;
    } // 6. Collect a sequence of characters that are not space characters,
    //    and let that be url.


    startUrlPosition = position;
    url = collectCharacters(regexLeadingNotSpaces); // 7. Let descriptors be a new empty list.

    descriptors = []; // 8. If url ends with a U+002C COMMA character (,), follow these substeps:
    //		(1). Remove all trailing U+002C COMMA characters from url. If this removed
    //         more than one character, that is a parse error.

    if (url.slice(-1) === ',') {
      url = url.replace(regexTrailingCommas, ''); // (Jump ahead to step 9 to skip tokenization and just push the candidate).

      parseDescriptors();
    } //	Otherwise, follow these substeps:
    else {
        tokenize();
      } // 16. Return to the step labeled splitting loop.

  }
  /**
   * Tokenizes descriptor properties prior to parsing
   * Returns undefined.
   */


  function tokenize() {
    // 8.1. Descriptor tokenizer: Skip whitespace
    collectCharacters(regexLeadingSpaces); // 8.2. Let current descriptor be the empty string.

    currentDescriptor = ''; // 8.3. Let state be in descriptor.

    state = 'in descriptor'; // eslint-disable-next-line no-constant-condition

    while (true) {
      // 8.4. Let c be the character at position.
      c = input.charAt(position); //  Do the following depending on the value of state.
      //  For the purpose of this step, "EOF" is a special character representing
      //  that position is past the end of input.
      // In descriptor

      if (state === 'in descriptor') {
        // Do the following, depending on the value of c:
        // Space character
        // If current descriptor is not empty, append current descriptor to
        // descriptors and let current descriptor be the empty string.
        // Set state to after descriptor.
        if (isASCIIWhitespace(c)) {
          if (currentDescriptor) {
            descriptors.push(currentDescriptor);
            currentDescriptor = '';
            state = 'after descriptor';
          }
        } // U+002C COMMA (,)
        // Advance position to the next character in input. If current descriptor
        // is not empty, append current descriptor to descriptors. Jump to the step
        // labeled descriptor parser.
        else if (c === ',') {
            position += 1;

            if (currentDescriptor) {
              descriptors.push(currentDescriptor);
            }

            parseDescriptors();
            return;
          } // U+0028 LEFT PARENTHESIS (()
          // Append c to current descriptor. Set state to in parens.
          else if (c === '\u0028') {
              currentDescriptor += c;
              state = 'in parens';
            } // EOF
            // If current descriptor is not empty, append current descriptor to
            // descriptors. Jump to the step labeled descriptor parser.
            else if (c === '') {
                if (currentDescriptor) {
                  descriptors.push(currentDescriptor);
                }

                parseDescriptors();
                return; // Anything else
                // Append c to current descriptor.
              } else {
                currentDescriptor += c;
              }
      } // In parens
      else if (state === 'in parens') {
          // U+0029 RIGHT PARENTHESIS ())
          // Append c to current descriptor. Set state to in descriptor.
          if (c === ')') {
            currentDescriptor += c;
            state = 'in descriptor';
          } // EOF
          // Append current descriptor to descriptors. Jump to the step labeled
          // descriptor parser.
          else if (c === '') {
              descriptors.push(currentDescriptor);
              parseDescriptors();
              return;
            } // Anything else
            // Append c to current descriptor.
            else {
                currentDescriptor += c;
              }
        } // After descriptor
        else if (state === 'after descriptor') {
            // Do the following, depending on the value of c:
            if (isASCIIWhitespace(c)) {} // Space character: Stay in this state.
            // EOF: Jump to the step labeled descriptor parser.
            else if (c === '') {
                parseDescriptors();
                return;
              } // Anything else
              // Set state to in descriptor. Set position to the previous character in input.
              else {
                  state = 'in descriptor';
                  position -= 1;
                }
          } // Advance position to the next character in input.


      position += 1;
    }
  }
  /**
   * Adds descriptor properties to a candidate, pushes to the candidates array
   * @return undefined
   */
  // Declared outside of the while loop so that it's only created once.


  function parseDescriptors() {
    // 9. Descriptor parser: Let error be no.
    let pError = false; // 10. Let width be absent.
    // 11. Let density be absent.
    // 12. Let future-compat-h be absent. (We're implementing it now as h)

    let w;
    let d;
    let h;
    let i;
    const candidate = {};
    let desc;
    let lastChar;
    let value;
    let intVal;
    let floatVal; // 13. For each descriptor in descriptors, run the appropriate set of steps
    // from the following list:

    for (i = 0; i < descriptors.length; i++) {
      desc = descriptors[i];
      lastChar = desc[desc.length - 1];
      value = desc.substring(0, desc.length - 1);
      intVal = parseInt(value, 10);
      floatVal = parseFloat(value); // If the descriptor consists of a valid non-negative integer followed by
      // a U+0077 LATIN SMALL LETTER W character

      if (regexNonNegativeInteger.test(value) && lastChar === 'w') {
        // If width and density are not both absent, then let error be yes.
        if (w || d) {
          pError = true;
        } // Apply the rules for parsing non-negative integers to the descriptor.
        // If the result is zero, let error be yes.
        // Otherwise, let width be the result.


        if (intVal === 0) {
          pError = true;
        } else {
          w = intVal;
        }
      } // If the descriptor consists of a valid floating-point number followed by
      // a U+0078 LATIN SMALL LETTER X character
      else if (regexFloatingPoint.test(value) && lastChar === 'x') {
          // If width, density and future-compat-h are not all absent, then let error
          // be yes.
          if (w || d || h) {
            pError = true;
          } // Apply the rules for parsing floating-point number values to the descriptor.
          // If the result is less than zero, let error be yes. Otherwise, let density
          // be the result.


          if (floatVal < 0) {
            pError = true;
          } else {
            d = floatVal;
          }
        } // If the descriptor consists of a valid non-negative integer followed by
        // a U+0068 LATIN SMALL LETTER H character
        else if (regexNonNegativeInteger.test(value) && lastChar === 'h') {
            // If height and density are not both absent, then let error be yes.
            if (h || d) {
              pError = true;
            } // Apply the rules for parsing non-negative integers to the descriptor.
            // If the result is zero, let error be yes. Otherwise, let future-compat-h
            // be the result.


            if (intVal === 0) {
              pError = true;
            } else {
              h = intVal;
            } // Anything else, Let error be yes.

          } else {
            pError = true;
          }
    } // 15. If error is still no, then append a new image source to candidates whose
    // URL is url, associated with a width width if not absent and a pixel
    // density density if not absent. Otherwise, there is a parse error.


    if (!pError) {
      candidate.source = {
        value: url,
        startIndex: startUrlPosition
      };

      if (w) {
        candidate.width = {
          value: w
        };
      }

      if (d) {
        candidate.density = {
          value: d
        };
      }

      if (h) {
        candidate.height = {
          value: h
        };
      }

      candidates.push(candidate);
    } else {
      throw new Error(`Invalid srcset descriptor found in '${input}' at '${desc}'`);
    }
  }
}

function parseSrc(input) {
  if (!input) {
    throw new Error('Must be non-empty');
  }

  let startIndex = 0;
  let value = input;

  while (isASCIIWhitespace(value.substring(0, 1))) {
    startIndex += 1;
    value = value.substring(1, value.length);
  }

  while (isASCIIWhitespace(value.substring(value.length - 1, value.length))) {
    value = value.substring(0, value.length - 1);
  }

  if (!value) {
    throw new Error('Must be non-empty');
  }

  return {
    value,
    startIndex
  };
}

function getAttributeValue(attributes, name) {
  const lowercasedAttributes = Object.keys(attributes).reduce((keys, k) => {
    // eslint-disable-next-line no-param-reassign
    keys[k.toLowerCase()] = k;
    return keys;
  }, {});
  return attributes[lowercasedAttributes[name.toLowerCase()]];
}

const defaultAttributes = [{
  tag: 'audio',
  attribute: 'src',
  type: 'src'
}, {
  tag: 'embed',
  attribute: 'src',
  type: 'src'
}, {
  tag: 'img',
  attribute: 'src',
  type: 'src'
}, {
  tag: 'img',
  attribute: 'srcset',
  type: 'srcset'
}, {
  tag: 'input',
  attribute: 'src',
  type: 'src'
}, {
  tag: 'link',
  attribute: 'href',
  type: 'src',
  filter: (tag, attribute, attributes) => {
    if (!/stylesheet/i.test(getAttributeValue(attributes, 'rel'))) {
      return false;
    }

    if (attributes.type && getAttributeValue(attributes, 'type').trim().toLowerCase() !== 'text/css') {
      return false;
    }

    return true;
  }
}, {
  tag: 'object',
  attribute: 'data',
  type: 'src'
}, {
  tag: 'script',
  attribute: 'src',
  type: 'src'
}, {
  tag: 'source',
  attribute: 'src',
  type: 'src'
}, {
  tag: 'source',
  attribute: 'srcset',
  type: 'srcset'
}, {
  tag: 'track',
  attribute: 'src',
  type: 'src'
}, {
  tag: 'video',
  attribute: 'poster',
  type: 'src'
}, {
  tag: 'video',
  attribute: 'src',
  type: 'src'
}];

var _default = options => function process(html, result) {
  let attributeList;
  let maybeUrlFilter;
  let root;

  if (typeof options.attributes === 'undefined' || options.attributes === true) {
    attributeList = defaultAttributes;
  } else {
    attributeList = options.attributes.list || defaultAttributes; // eslint-disable-next-line no-undefined

    ({
      urlFilter: maybeUrlFilter,
      root
    } = options.attributes);
  }

  const sources = [];
  const urlFilter = (0, _utils.getFilter)(maybeUrlFilter, value => (0, _loaderUtils.isUrlRequest)(value, root));

  const getAttribute = (tag, attribute, attributes, resourcePath) => {
    return attributeList.find(element => (typeof element.tag === 'undefined' || typeof element.tag !== 'undefined' && element.tag.toLowerCase() === tag.toLowerCase()) && element.attribute.toLowerCase() === attribute.toLowerCase() && (element.filter ? element.filter(tag, attribute, attributes, resourcePath) : true));
  };

  const {
    resourcePath
  } = options;
  const parser = new _htmlparser.Parser({
    attributesMeta: {},

    onattribute(name, value) {
      // eslint-disable-next-line no-underscore-dangle
      const endIndex = parser._tokenizer._index;
      const startIndex = endIndex - value.length;
      const unquoted = html[endIndex] !== '"' && html[endIndex] !== "'";
      this.attributesMeta[name] = {
        startIndex,
        unquoted
      };
    },

    onopentag(tag, attributes) {
      Object.keys(attributes).forEach(attribute => {
        const value = attributes[attribute];
        const {
          startIndex: valueStartIndex,
          unquoted
        } = this.attributesMeta[attribute];
        const foundAttribute = getAttribute(tag, attribute, attributes, resourcePath);

        if (!foundAttribute) {
          return;
        }

        const {
          type
        } = foundAttribute;

        if (type === 'srcset') {
          let sourceSet;

          try {
            sourceSet = parseSrcset(value);
          } catch (error) {
            result.messages.push({
              type: 'error',
              value: new _HtmlSourceError.default(`Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`, parser.startIndex, parser.endIndex, html)
            });
            return;
          }

          sourceSet.forEach(sourceItem => {
            const {
              source
            } = sourceItem;

            if (!urlFilter(attribute, source.value, resourcePath)) {
              return;
            }

            const startIndex = valueStartIndex + source.startIndex;
            sources.push({
              startIndex,
              value: source.value,
              unquoted
            });
          });
          return;
        }

        let source;

        try {
          source = parseSrc(value);
        } catch (error) {
          result.messages.push({
            type: 'error',
            value: new _HtmlSourceError.default(`Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`, parser.startIndex, parser.endIndex, html)
          });
          return;
        }

        if (!urlFilter(attribute, source.value, resourcePath)) {
          return;
        }

        const startIndex = valueStartIndex + source.startIndex;
        sources.push({
          startIndex,
          value: source.value,
          unquoted
        });
      });
      this.attributesMeta = {};
    },

    onerror(error) {
      result.messages.push({
        type: 'error',
        value: error
      });
    }

  }, {
    decodeEntities: false,
    lowerCaseTags: false,
    lowerCaseAttributeNames: false,
    recognizeCDATA: true,
    recognizeSelfClosing: true
  });
  parser.write(html);
  parser.end();
  const importsMap = new Map();
  const replacersMap = new Map();
  let offset = 0;

  for (const source of sources) {
    const {
      startIndex,
      unquoted
    } = source;
    let {
      value
    } = source;
    const URLObject = (0, _url.parse)(value);
    const {
      hash
    } = URLObject;

    if (hash) {
      URLObject.hash = null;
      source.value = URLObject.format();
      value = value.slice(0, value.length - hash.length);
    }

    const importKey = (0, _loaderUtils.urlToRequest)(decodeURIComponent(source.value), root);
    let importName = importsMap.get(importKey);

    if (!importName) {
      importName = `___HTML_LOADER_IMPORT_${importsMap.size}___`;
      importsMap.set(importKey, importName);
      result.messages.push({
        type: 'import',
        value: {
          type: 'source',
          source: importKey,
          importName
        }
      });
    }

    const replacerKey = JSON.stringify({
      importKey,
      unquoted,
      hash
    });
    let replacerName = replacersMap.get(replacerKey);

    if (!replacerName) {
      replacerName = `___HTML_LOADER_REPLACER_${replacersMap.size}___`;
      replacersMap.set(replacerKey, replacerName);
      result.messages.push({
        type: 'replacer',
        value: {
          type: 'source',
          hash,
          importName,
          replacerName,
          unquoted
        }
      });
    }

    const valueLength = hash ? value.length + hash.length : value.length; // eslint-disable-next-line no-param-reassign

    html = html.substr(0, startIndex + offset) + replacerName + html.substr(startIndex + valueLength + offset);
    offset += replacerName.length - valueLength;
  }

  return html;
};

exports.default = _default;