You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

633 lines
18 KiB

"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = void 0;
var _url = require("url");
var _htmlparser = require("htmlparser2");
var _loaderUtils = require("loader-utils");
var _HtmlSourceError = _interopRequireDefault(require("../HtmlSourceError"));
var _utils = require("../utils");
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
function isASCIIWhitespace(character) {
return (// Horizontal tab
character === '\u0009' || // New line
character === '\u000A' || // Form feed
character === '\u000C' || // Carriage return
character === '\u000D' || // Space
character === '\u0020'
);
} // (Don't use \s, to avoid matching non-breaking space)
// eslint-disable-next-line no-control-regex
const regexLeadingSpaces = /^[ \t\n\r\u000c]+/; // eslint-disable-next-line no-control-regex
const regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/; // eslint-disable-next-line no-control-regex
const regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/;
const regexTrailingCommas = /[,]+$/;
const regexNonNegativeInteger = /^\d+$/; // ( Positive or negative or unsigned integers or decimals, without or without exponents.
// Must include at least one digit.
// According to spec tests any decimal point must be followed by a digit.
// No leading plus sign is allowed.)
// https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
const regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/;
function parseSrcset(input) {
// 1. Let input be the value passed to this algorithm.
const inputLength = input.length;
let url;
let descriptors;
let currentDescriptor;
let state;
let c; // 2. Let position be a pointer into input, initially pointing at the start
// of the string.
let position = 0;
let startUrlPosition; // eslint-disable-next-line consistent-return
function collectCharacters(regEx) {
let chars;
const match = regEx.exec(input.substring(position));
if (match) {
[chars] = match;
position += chars.length;
return chars;
}
} // 3. Let candidates be an initially empty source set.
const candidates = []; // 4. Splitting loop: Collect a sequence of characters that are space
// characters or U+002C COMMA characters. If any U+002C COMMA characters
// were collected, that is a parse error.
// eslint-disable-next-line no-constant-condition
while (true) {
collectCharacters(regexLeadingCommasOrSpaces); // 5. If position is past the end of input, return candidates and abort these steps.
if (position >= inputLength) {
if (candidates.length === 0) {
throw new Error('Must contain one or more image candidate strings');
} // (we're done, this is the sole return path)
return candidates;
} // 6. Collect a sequence of characters that are not space characters,
// and let that be url.
startUrlPosition = position;
url = collectCharacters(regexLeadingNotSpaces); // 7. Let descriptors be a new empty list.
descriptors = []; // 8. If url ends with a U+002C COMMA character (,), follow these substeps:
// (1). Remove all trailing U+002C COMMA characters from url. If this removed
// more than one character, that is a parse error.
if (url.slice(-1) === ',') {
url = url.replace(regexTrailingCommas, ''); // (Jump ahead to step 9 to skip tokenization and just push the candidate).
parseDescriptors();
} // Otherwise, follow these substeps:
else {
tokenize();
} // 16. Return to the step labeled splitting loop.
}
/**
* Tokenizes descriptor properties prior to parsing
* Returns undefined.
*/
function tokenize() {
// 8.1. Descriptor tokenizer: Skip whitespace
collectCharacters(regexLeadingSpaces); // 8.2. Let current descriptor be the empty string.
currentDescriptor = ''; // 8.3. Let state be in descriptor.
state = 'in descriptor'; // eslint-disable-next-line no-constant-condition
while (true) {
// 8.4. Let c be the character at position.
c = input.charAt(position); // Do the following depending on the value of state.
// For the purpose of this step, "EOF" is a special character representing
// that position is past the end of input.
// In descriptor
if (state === 'in descriptor') {
// Do the following, depending on the value of c:
// Space character
// If current descriptor is not empty, append current descriptor to
// descriptors and let current descriptor be the empty string.
// Set state to after descriptor.
if (isASCIIWhitespace(c)) {
if (currentDescriptor) {
descriptors.push(currentDescriptor);
currentDescriptor = '';
state = 'after descriptor';
}
} // U+002C COMMA (,)
// Advance position to the next character in input. If current descriptor
// is not empty, append current descriptor to descriptors. Jump to the step
// labeled descriptor parser.
else if (c === ',') {
position += 1;
if (currentDescriptor) {
descriptors.push(currentDescriptor);
}
parseDescriptors();
return;
} // U+0028 LEFT PARENTHESIS (()
// Append c to current descriptor. Set state to in parens.
else if (c === '\u0028') {
currentDescriptor += c;
state = 'in parens';
} // EOF
// If current descriptor is not empty, append current descriptor to
// descriptors. Jump to the step labeled descriptor parser.
else if (c === '') {
if (currentDescriptor) {
descriptors.push(currentDescriptor);
}
parseDescriptors();
return; // Anything else
// Append c to current descriptor.
} else {
currentDescriptor += c;
}
} // In parens
else if (state === 'in parens') {
// U+0029 RIGHT PARENTHESIS ())
// Append c to current descriptor. Set state to in descriptor.
if (c === ')') {
currentDescriptor += c;
state = 'in descriptor';
} // EOF
// Append current descriptor to descriptors. Jump to the step labeled
// descriptor parser.
else if (c === '') {
descriptors.push(currentDescriptor);
parseDescriptors();
return;
} // Anything else
// Append c to current descriptor.
else {
currentDescriptor += c;
}
} // After descriptor
else if (state === 'after descriptor') {
// Do the following, depending on the value of c:
if (isASCIIWhitespace(c)) {} // Space character: Stay in this state.
// EOF: Jump to the step labeled descriptor parser.
else if (c === '') {
parseDescriptors();
return;
} // Anything else
// Set state to in descriptor. Set position to the previous character in input.
else {
state = 'in descriptor';
position -= 1;
}
} // Advance position to the next character in input.
position += 1;
}
}
/**
* Adds descriptor properties to a candidate, pushes to the candidates array
* @return undefined
*/
// Declared outside of the while loop so that it's only created once.
function parseDescriptors() {
// 9. Descriptor parser: Let error be no.
let pError = false; // 10. Let width be absent.
// 11. Let density be absent.
// 12. Let future-compat-h be absent. (We're implementing it now as h)
let w;
let d;
let h;
let i;
const candidate = {};
let desc;
let lastChar;
let value;
let intVal;
let floatVal; // 13. For each descriptor in descriptors, run the appropriate set of steps
// from the following list:
for (i = 0; i < descriptors.length; i++) {
desc = descriptors[i];
lastChar = desc[desc.length - 1];
value = desc.substring(0, desc.length - 1);
intVal = parseInt(value, 10);
floatVal = parseFloat(value); // If the descriptor consists of a valid non-negative integer followed by
// a U+0077 LATIN SMALL LETTER W character
if (regexNonNegativeInteger.test(value) && lastChar === 'w') {
// If width and density are not both absent, then let error be yes.
if (w || d) {
pError = true;
} // Apply the rules for parsing non-negative integers to the descriptor.
// If the result is zero, let error be yes.
// Otherwise, let width be the result.
if (intVal === 0) {
pError = true;
} else {
w = intVal;
}
} // If the descriptor consists of a valid floating-point number followed by
// a U+0078 LATIN SMALL LETTER X character
else if (regexFloatingPoint.test(value) && lastChar === 'x') {
// If width, density and future-compat-h are not all absent, then let error
// be yes.
if (w || d || h) {
pError = true;
} // Apply the rules for parsing floating-point number values to the descriptor.
// If the result is less than zero, let error be yes. Otherwise, let density
// be the result.
if (floatVal < 0) {
pError = true;
} else {
d = floatVal;
}
} // If the descriptor consists of a valid non-negative integer followed by
// a U+0068 LATIN SMALL LETTER H character
else if (regexNonNegativeInteger.test(value) && lastChar === 'h') {
// If height and density are not both absent, then let error be yes.
if (h || d) {
pError = true;
} // Apply the rules for parsing non-negative integers to the descriptor.
// If the result is zero, let error be yes. Otherwise, let future-compat-h
// be the result.
if (intVal === 0) {
pError = true;
} else {
h = intVal;
} // Anything else, Let error be yes.
} else {
pError = true;
}
} // 15. If error is still no, then append a new image source to candidates whose
// URL is url, associated with a width width if not absent and a pixel
// density density if not absent. Otherwise, there is a parse error.
if (!pError) {
candidate.source = {
value: url,
startIndex: startUrlPosition
};
if (w) {
candidate.width = {
value: w
};
}
if (d) {
candidate.density = {
value: d
};
}
if (h) {
candidate.height = {
value: h
};
}
candidates.push(candidate);
} else {
throw new Error(`Invalid srcset descriptor found in '${input}' at '${desc}'`);
}
}
}
function parseSrc(input) {
if (!input) {
throw new Error('Must be non-empty');
}
let startIndex = 0;
let value = input;
while (isASCIIWhitespace(value.substring(0, 1))) {
startIndex += 1;
value = value.substring(1, value.length);
}
while (isASCIIWhitespace(value.substring(value.length - 1, value.length))) {
value = value.substring(0, value.length - 1);
}
if (!value) {
throw new Error('Must be non-empty');
}
return {
value,
startIndex
};
}
function getAttributeValue(attributes, name) {
const lowercasedAttributes = Object.keys(attributes).reduce((keys, k) => {
// eslint-disable-next-line no-param-reassign
keys[k.toLowerCase()] = k;
return keys;
}, {});
return attributes[lowercasedAttributes[name.toLowerCase()]];
}
const defaultAttributes = [{
tag: 'audio',
attribute: 'src',
type: 'src'
}, {
tag: 'embed',
attribute: 'src',
type: 'src'
}, {
tag: 'img',
attribute: 'src',
type: 'src'
}, {
tag: 'img',
attribute: 'srcset',
type: 'srcset'
}, {
tag: 'input',
attribute: 'src',
type: 'src'
}, {
tag: 'link',
attribute: 'href',
type: 'src',
filter: (tag, attribute, attributes) => {
if (!/stylesheet/i.test(getAttributeValue(attributes, 'rel'))) {
return false;
}
if (attributes.type && getAttributeValue(attributes, 'type').trim().toLowerCase() !== 'text/css') {
return false;
}
return true;
}
}, {
tag: 'object',
attribute: 'data',
type: 'src'
}, {
tag: 'script',
attribute: 'src',
type: 'src'
}, {
tag: 'source',
attribute: 'src',
type: 'src'
}, {
tag: 'source',
attribute: 'srcset',
type: 'srcset'
}, {
tag: 'track',
attribute: 'src',
type: 'src'
}, {
tag: 'video',
attribute: 'poster',
type: 'src'
}, {
tag: 'video',
attribute: 'src',
type: 'src'
}];
var _default = options => function process(html, result) {
let attributeList;
let maybeUrlFilter;
let root;
if (typeof options.attributes === 'undefined' || options.attributes === true) {
attributeList = defaultAttributes;
} else {
attributeList = options.attributes.list || defaultAttributes; // eslint-disable-next-line no-undefined
({
urlFilter: maybeUrlFilter,
root
} = options.attributes);
}
const sources = [];
const urlFilter = (0, _utils.getFilter)(maybeUrlFilter, value => (0, _loaderUtils.isUrlRequest)(value, root));
const getAttribute = (tag, attribute, attributes, resourcePath) => {
return attributeList.find(element => (typeof element.tag === 'undefined' || typeof element.tag !== 'undefined' && element.tag.toLowerCase() === tag.toLowerCase()) && element.attribute.toLowerCase() === attribute.toLowerCase() && (element.filter ? element.filter(tag, attribute, attributes, resourcePath) : true));
};
const {
resourcePath
} = options;
const parser = new _htmlparser.Parser({
attributesMeta: {},
onattribute(name, value) {
// eslint-disable-next-line no-underscore-dangle
const endIndex = parser._tokenizer._index;
const startIndex = endIndex - value.length;
const unquoted = html[endIndex] !== '"' && html[endIndex] !== "'";
this.attributesMeta[name] = {
startIndex,
unquoted
};
},
onopentag(tag, attributes) {
Object.keys(attributes).forEach(attribute => {
const value = attributes[attribute];
const {
startIndex: valueStartIndex,
unquoted
} = this.attributesMeta[attribute];
const foundAttribute = getAttribute(tag, attribute, attributes, resourcePath);
if (!foundAttribute) {
return;
}
const {
type
} = foundAttribute;
if (type === 'srcset') {
let sourceSet;
try {
sourceSet = parseSrcset(value);
} catch (error) {
result.messages.push({
type: 'error',
value: new _HtmlSourceError.default(`Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`, parser.startIndex, parser.endIndex, html)
});
return;
}
sourceSet.forEach(sourceItem => {
const {
source
} = sourceItem;
if (!urlFilter(attribute, source.value, resourcePath)) {
return;
}
const startIndex = valueStartIndex + source.startIndex;
sources.push({
startIndex,
value: source.value,
unquoted
});
});
return;
}
let source;
try {
source = parseSrc(value);
} catch (error) {
result.messages.push({
type: 'error',
value: new _HtmlSourceError.default(`Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`, parser.startIndex, parser.endIndex, html)
});
return;
}
if (!urlFilter(attribute, source.value, resourcePath)) {
return;
}
const startIndex = valueStartIndex + source.startIndex;
sources.push({
startIndex,
value: source.value,
unquoted
});
});
this.attributesMeta = {};
},
onerror(error) {
result.messages.push({
type: 'error',
value: error
});
}
}, {
decodeEntities: false,
lowerCaseTags: false,
lowerCaseAttributeNames: false,
recognizeCDATA: true,
recognizeSelfClosing: true
});
parser.write(html);
parser.end();
const importsMap = new Map();
const replacersMap = new Map();
let offset = 0;
for (const source of sources) {
const {
startIndex,
unquoted
} = source;
let {
value
} = source;
const URLObject = (0, _url.parse)(value);
const {
hash
} = URLObject;
if (hash) {
URLObject.hash = null;
source.value = URLObject.format();
value = value.slice(0, value.length - hash.length);
}
const importKey = (0, _loaderUtils.urlToRequest)(decodeURIComponent(source.value), root);
let importName = importsMap.get(importKey);
if (!importName) {
importName = `___HTML_LOADER_IMPORT_${importsMap.size}___`;
importsMap.set(importKey, importName);
result.messages.push({
type: 'import',
value: {
type: 'source',
source: importKey,
importName
}
});
}
const replacerKey = JSON.stringify({
importKey,
unquoted,
hash
});
let replacerName = replacersMap.get(replacerKey);
if (!replacerName) {
replacerName = `___HTML_LOADER_REPLACER_${replacersMap.size}___`;
replacersMap.set(replacerKey, replacerName);
result.messages.push({
type: 'replacer',
value: {
type: 'source',
hash,
importName,
replacerName,
unquoted
}
});
}
const valueLength = hash ? value.length + hash.length : value.length; // eslint-disable-next-line no-param-reassign
html = html.substr(0, startIndex + offset) + replacerName + html.substr(startIndex + valueLength + offset);
offset += replacerName.length - valueLength;
}
return html;
};
exports.default = _default;