site/node_modules/css-tree/lib/lexer/match.js
2024-10-14 08:09:33 +02:00

631 lines
19 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { MATCH, MISMATCH, DISALLOW_EMPTY } from './match-graph.js';
import * as TYPE from '../tokenizer/types.js';
const { hasOwnProperty } = Object.prototype;
const STUB = 0;
const TOKEN = 1;
const OPEN_SYNTAX = 2;
const CLOSE_SYNTAX = 3;
const EXIT_REASON_MATCH = 'Match';
const EXIT_REASON_MISMATCH = 'Mismatch';
const EXIT_REASON_ITERATION_LIMIT = 'Maximum iteration number exceeded (please fill an issue on https://github.com/csstree/csstree/issues)';
const ITERATION_LIMIT = 15000;
export let totalIterationCount = 0;
function reverseList(list) {
let prev = null;
let next = null;
let item = list;
while (item !== null) {
next = item.prev;
item.prev = prev;
prev = item;
item = next;
}
return prev;
}
function areStringsEqualCaseInsensitive(testStr, referenceStr) {
if (testStr.length !== referenceStr.length) {
return false;
}
for (let i = 0; i < testStr.length; i++) {
const referenceCode = referenceStr.charCodeAt(i);
let testCode = testStr.charCodeAt(i);
// testCode.toLowerCase() for U+0041 LATIN CAPITAL LETTER A (A) .. U+005A LATIN CAPITAL LETTER Z (Z).
if (testCode >= 0x0041 && testCode <= 0x005A) {
testCode = testCode | 32;
}
if (testCode !== referenceCode) {
return false;
}
}
return true;
}
function isContextEdgeDelim(token) {
if (token.type !== TYPE.Delim) {
return false;
}
// Fix matching for unicode-range: U+30??, U+FF00-FF9F
// Probably we need to check out previous match instead
return token.value !== '?';
}
function isCommaContextStart(token) {
if (token === null) {
return true;
}
return (
token.type === TYPE.Comma ||
token.type === TYPE.Function ||
token.type === TYPE.LeftParenthesis ||
token.type === TYPE.LeftSquareBracket ||
token.type === TYPE.LeftCurlyBracket ||
isContextEdgeDelim(token)
);
}
function isCommaContextEnd(token) {
if (token === null) {
return true;
}
return (
token.type === TYPE.RightParenthesis ||
token.type === TYPE.RightSquareBracket ||
token.type === TYPE.RightCurlyBracket ||
(token.type === TYPE.Delim && token.value === '/')
);
}
function internalMatch(tokens, state, syntaxes) {
function moveToNextToken() {
do {
tokenIndex++;
token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
} while (token !== null && (token.type === TYPE.WhiteSpace || token.type === TYPE.Comment));
}
function getNextToken(offset) {
const nextIndex = tokenIndex + offset;
return nextIndex < tokens.length ? tokens[nextIndex] : null;
}
function stateSnapshotFromSyntax(nextState, prev) {
return {
nextState,
matchStack,
syntaxStack,
thenStack,
tokenIndex,
prev
};
}
function pushThenStack(nextState) {
thenStack = {
nextState,
matchStack,
syntaxStack,
prev: thenStack
};
}
function pushElseStack(nextState) {
elseStack = stateSnapshotFromSyntax(nextState, elseStack);
}
function addTokenToMatch() {
matchStack = {
type: TOKEN,
syntax: state.syntax,
token,
prev: matchStack
};
moveToNextToken();
syntaxStash = null;
if (tokenIndex > longestMatch) {
longestMatch = tokenIndex;
}
}
function openSyntax() {
syntaxStack = {
syntax: state.syntax,
opts: state.syntax.opts || (syntaxStack !== null && syntaxStack.opts) || null,
prev: syntaxStack
};
matchStack = {
type: OPEN_SYNTAX,
syntax: state.syntax,
token: matchStack.token,
prev: matchStack
};
}
function closeSyntax() {
if (matchStack.type === OPEN_SYNTAX) {
matchStack = matchStack.prev;
} else {
matchStack = {
type: CLOSE_SYNTAX,
syntax: syntaxStack.syntax,
token: matchStack.token,
prev: matchStack
};
}
syntaxStack = syntaxStack.prev;
}
let syntaxStack = null;
let thenStack = null;
let elseStack = null;
// null stashing allowed, nothing stashed
// false stashing disabled, nothing stashed
// anithing else fail stashable syntaxes, some syntax stashed
let syntaxStash = null;
let iterationCount = 0; // count iterations and prevent infinite loop
let exitReason = null;
let token = null;
let tokenIndex = -1;
let longestMatch = 0;
let matchStack = {
type: STUB,
syntax: null,
token: null,
prev: null
};
moveToNextToken();
while (exitReason === null && ++iterationCount < ITERATION_LIMIT) {
// function mapList(list, fn) {
// const result = [];
// while (list) {
// result.unshift(fn(list));
// list = list.prev;
// }
// return result;
// }
// console.log('--\n',
// '#' + iterationCount,
// require('util').inspect({
// match: mapList(matchStack, x => x.type === TOKEN ? x.token && x.token.value : x.syntax ? ({ [OPEN_SYNTAX]: '<', [CLOSE_SYNTAX]: '</' }[x.type] || x.type) + '!' + x.syntax.name : null),
// token: token && token.value,
// tokenIndex,
// syntax: syntax.type + (syntax.id ? ' #' + syntax.id : '')
// }, { depth: null })
// );
switch (state.type) {
case 'Match':
if (thenStack === null) {
// turn to MISMATCH when some tokens left unmatched
if (token !== null) {
// doesn't mismatch if just one token left and it's an IE hack
if (tokenIndex !== tokens.length - 1 || (token.value !== '\\0' && token.value !== '\\9')) {
state = MISMATCH;
break;
}
}
// break the main loop, return a result - MATCH
exitReason = EXIT_REASON_MATCH;
break;
}
// go to next syntax (`then` branch)
state = thenStack.nextState;
// check match is not empty
if (state === DISALLOW_EMPTY) {
if (thenStack.matchStack === matchStack) {
state = MISMATCH;
break;
} else {
state = MATCH;
}
}
// close syntax if needed
while (thenStack.syntaxStack !== syntaxStack) {
closeSyntax();
}
// pop stack
thenStack = thenStack.prev;
break;
case 'Mismatch':
// when some syntax is stashed
if (syntaxStash !== null && syntaxStash !== false) {
// there is no else branches or a branch reduce match stack
if (elseStack === null || tokenIndex > elseStack.tokenIndex) {
// restore state from the stash
elseStack = syntaxStash;
syntaxStash = false; // disable stashing
}
} else if (elseStack === null) {
// no else branches -> break the main loop
// return a result - MISMATCH
exitReason = EXIT_REASON_MISMATCH;
break;
}
// go to next syntax (`else` branch)
state = elseStack.nextState;
// restore all the rest stack states
thenStack = elseStack.thenStack;
syntaxStack = elseStack.syntaxStack;
matchStack = elseStack.matchStack;
tokenIndex = elseStack.tokenIndex;
token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
// pop stack
elseStack = elseStack.prev;
break;
case 'MatchGraph':
state = state.match;
break;
case 'If':
// IMPORTANT: else stack push must go first,
// since it stores the state of thenStack before changes
if (state.else !== MISMATCH) {
pushElseStack(state.else);
}
if (state.then !== MATCH) {
pushThenStack(state.then);
}
state = state.match;
break;
case 'MatchOnce':
state = {
type: 'MatchOnceBuffer',
syntax: state,
index: 0,
mask: 0
};
break;
case 'MatchOnceBuffer': {
const terms = state.syntax.terms;
if (state.index === terms.length) {
// no matches at all or it's required all terms to be matched
if (state.mask === 0 || state.syntax.all) {
state = MISMATCH;
break;
}
// a partial match is ok
state = MATCH;
break;
}
// all terms are matched
if (state.mask === (1 << terms.length) - 1) {
state = MATCH;
break;
}
for (; state.index < terms.length; state.index++) {
const matchFlag = 1 << state.index;
if ((state.mask & matchFlag) === 0) {
// IMPORTANT: else stack push must go first,
// since it stores the state of thenStack before changes
pushElseStack(state);
pushThenStack({
type: 'AddMatchOnce',
syntax: state.syntax,
mask: state.mask | matchFlag
});
// match
state = terms[state.index++];
break;
}
}
break;
}
case 'AddMatchOnce':
state = {
type: 'MatchOnceBuffer',
syntax: state.syntax,
index: 0,
mask: state.mask
};
break;
case 'Enum':
if (token !== null) {
let name = token.value.toLowerCase();
// drop \0 and \9 hack from keyword name
if (name.indexOf('\\') !== -1) {
name = name.replace(/\\[09].*$/, '');
}
if (hasOwnProperty.call(state.map, name)) {
state = state.map[name];
break;
}
}
state = MISMATCH;
break;
case 'Generic': {
const opts = syntaxStack !== null ? syntaxStack.opts : null;
const lastTokenIndex = tokenIndex + Math.floor(state.fn(token, getNextToken, opts));
if (!isNaN(lastTokenIndex) && lastTokenIndex > tokenIndex) {
while (tokenIndex < lastTokenIndex) {
addTokenToMatch();
}
state = MATCH;
} else {
state = MISMATCH;
}
break;
}
case 'Type':
case 'Property': {
const syntaxDict = state.type === 'Type' ? 'types' : 'properties';
const dictSyntax = hasOwnProperty.call(syntaxes, syntaxDict) ? syntaxes[syntaxDict][state.name] : null;
if (!dictSyntax || !dictSyntax.match) {
throw new Error(
'Bad syntax reference: ' +
(state.type === 'Type'
? '<' + state.name + '>'
: '<\'' + state.name + '\'>')
);
}
// stash a syntax for types with low priority
if (syntaxStash !== false && token !== null && state.type === 'Type') {
const lowPriorityMatching =
// https://drafts.csswg.org/css-values-4/#custom-idents
// When parsing positionally-ambiguous keywords in a property value, a <custom-ident> production
// can only claim the keyword if no other unfulfilled production can claim it.
(state.name === 'custom-ident' && token.type === TYPE.Ident) ||
// https://drafts.csswg.org/css-values-4/#lengths
// ... if a `0` could be parsed as either a <number> or a <length> in a property (such as line-height),
// it must parse as a <number>
(state.name === 'length' && token.value === '0');
if (lowPriorityMatching) {
if (syntaxStash === null) {
syntaxStash = stateSnapshotFromSyntax(state, elseStack);
}
state = MISMATCH;
break;
}
}
openSyntax();
state = dictSyntax.match;
break;
}
case 'Keyword': {
const name = state.name;
if (token !== null) {
let keywordName = token.value;
// drop \0 and \9 hack from keyword name
if (keywordName.indexOf('\\') !== -1) {
keywordName = keywordName.replace(/\\[09].*$/, '');
}
if (areStringsEqualCaseInsensitive(keywordName, name)) {
addTokenToMatch();
state = MATCH;
break;
}
}
state = MISMATCH;
break;
}
case 'AtKeyword':
case 'Function':
if (token !== null && areStringsEqualCaseInsensitive(token.value, state.name)) {
addTokenToMatch();
state = MATCH;
break;
}
state = MISMATCH;
break;
case 'Token':
if (token !== null && token.value === state.value) {
addTokenToMatch();
state = MATCH;
break;
}
state = MISMATCH;
break;
case 'Comma':
if (token !== null && token.type === TYPE.Comma) {
if (isCommaContextStart(matchStack.token)) {
state = MISMATCH;
} else {
addTokenToMatch();
state = isCommaContextEnd(token) ? MISMATCH : MATCH;
}
} else {
state = isCommaContextStart(matchStack.token) || isCommaContextEnd(token) ? MATCH : MISMATCH;
}
break;
case 'String':
let string = '';
let lastTokenIndex = tokenIndex;
for (; lastTokenIndex < tokens.length && string.length < state.value.length; lastTokenIndex++) {
string += tokens[lastTokenIndex].value;
}
if (areStringsEqualCaseInsensitive(string, state.value)) {
while (tokenIndex < lastTokenIndex) {
addTokenToMatch();
}
state = MATCH;
} else {
state = MISMATCH;
}
break;
default:
throw new Error('Unknown node type: ' + state.type);
}
}
totalIterationCount += iterationCount;
switch (exitReason) {
case null:
console.warn('[csstree-match] BREAK after ' + ITERATION_LIMIT + ' iterations');
exitReason = EXIT_REASON_ITERATION_LIMIT;
matchStack = null;
break;
case EXIT_REASON_MATCH:
while (syntaxStack !== null) {
closeSyntax();
}
break;
default:
matchStack = null;
}
return {
tokens,
reason: exitReason,
iterations: iterationCount,
match: matchStack,
longestMatch
};
}
export function matchAsList(tokens, matchGraph, syntaxes) {
const matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
if (matchResult.match !== null) {
let item = reverseList(matchResult.match).prev;
matchResult.match = [];
while (item !== null) {
switch (item.type) {
case OPEN_SYNTAX:
case CLOSE_SYNTAX:
matchResult.match.push({
type: item.type,
syntax: item.syntax
});
break;
default:
matchResult.match.push({
token: item.token.value,
node: item.token.node
});
break;
}
item = item.prev;
}
}
return matchResult;
}
export function matchAsTree(tokens, matchGraph, syntaxes) {
const matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
if (matchResult.match === null) {
return matchResult;
}
let item = matchResult.match;
let host = matchResult.match = {
syntax: matchGraph.syntax || null,
match: []
};
const hostStack = [host];
// revert a list and start with 2nd item since 1st is a stub item
item = reverseList(item).prev;
// build a tree
while (item !== null) {
switch (item.type) {
case OPEN_SYNTAX:
host.match.push(host = {
syntax: item.syntax,
match: []
});
hostStack.push(host);
break;
case CLOSE_SYNTAX:
hostStack.pop();
host = hostStack[hostStack.length - 1];
break;
default:
host.match.push({
syntax: item.syntax || null,
token: item.token.value,
node: item.token.node
});
}
item = item.prev;
}
return matchResult;
}