584 lines
14 KiB
JavaScript
584 lines
14 KiB
JavaScript
import { Tokenizer } from './tokenizer.js';
|
|
|
|
const TAB = 9;
|
|
const N = 10;
|
|
const F = 12;
|
|
const R = 13;
|
|
const SPACE = 32;
|
|
const EXCLAMATIONMARK = 33; // !
|
|
const NUMBERSIGN = 35; // #
|
|
const AMPERSAND = 38; // &
|
|
const APOSTROPHE = 39; // '
|
|
const LEFTPARENTHESIS = 40; // (
|
|
const RIGHTPARENTHESIS = 41; // )
|
|
const ASTERISK = 42; // *
|
|
const PLUSSIGN = 43; // +
|
|
const COMMA = 44; // ,
|
|
const HYPERMINUS = 45; // -
|
|
const LESSTHANSIGN = 60; // <
|
|
const GREATERTHANSIGN = 62; // >
|
|
const QUESTIONMARK = 63; // ?
|
|
const COMMERCIALAT = 64; // @
|
|
const LEFTSQUAREBRACKET = 91; // [
|
|
const RIGHTSQUAREBRACKET = 93; // ]
|
|
const LEFTCURLYBRACKET = 123; // {
|
|
const VERTICALLINE = 124; // |
|
|
const RIGHTCURLYBRACKET = 125; // }
|
|
const INFINITY = 8734; // ∞
|
|
const NAME_CHAR = new Uint8Array(128).map((_, idx) =>
|
|
/[a-zA-Z0-9\-]/.test(String.fromCharCode(idx)) ? 1 : 0
|
|
);
|
|
const COMBINATOR_PRECEDENCE = {
|
|
' ': 1,
|
|
'&&': 2,
|
|
'||': 3,
|
|
'|': 4
|
|
};
|
|
|
|
function scanSpaces(tokenizer) {
|
|
return tokenizer.substringToPos(
|
|
tokenizer.findWsEnd(tokenizer.pos)
|
|
);
|
|
}
|
|
|
|
function scanWord(tokenizer) {
|
|
let end = tokenizer.pos;
|
|
|
|
for (; end < tokenizer.str.length; end++) {
|
|
const code = tokenizer.str.charCodeAt(end);
|
|
if (code >= 128 || NAME_CHAR[code] === 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (tokenizer.pos === end) {
|
|
tokenizer.error('Expect a keyword');
|
|
}
|
|
|
|
return tokenizer.substringToPos(end);
|
|
}
|
|
|
|
function scanNumber(tokenizer) {
|
|
let end = tokenizer.pos;
|
|
|
|
for (; end < tokenizer.str.length; end++) {
|
|
const code = tokenizer.str.charCodeAt(end);
|
|
if (code < 48 || code > 57) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (tokenizer.pos === end) {
|
|
tokenizer.error('Expect a number');
|
|
}
|
|
|
|
return tokenizer.substringToPos(end);
|
|
}
|
|
|
|
function scanString(tokenizer) {
|
|
const end = tokenizer.str.indexOf('\'', tokenizer.pos + 1);
|
|
|
|
if (end === -1) {
|
|
tokenizer.pos = tokenizer.str.length;
|
|
tokenizer.error('Expect an apostrophe');
|
|
}
|
|
|
|
return tokenizer.substringToPos(end + 1);
|
|
}
|
|
|
|
function readMultiplierRange(tokenizer) {
|
|
let min = null;
|
|
let max = null;
|
|
|
|
tokenizer.eat(LEFTCURLYBRACKET);
|
|
|
|
min = scanNumber(tokenizer);
|
|
|
|
if (tokenizer.charCode() === COMMA) {
|
|
tokenizer.pos++;
|
|
if (tokenizer.charCode() !== RIGHTCURLYBRACKET) {
|
|
max = scanNumber(tokenizer);
|
|
}
|
|
} else {
|
|
max = min;
|
|
}
|
|
|
|
tokenizer.eat(RIGHTCURLYBRACKET);
|
|
|
|
return {
|
|
min: Number(min),
|
|
max: max ? Number(max) : 0
|
|
};
|
|
}
|
|
|
|
function readMultiplier(tokenizer) {
|
|
let range = null;
|
|
let comma = false;
|
|
|
|
switch (tokenizer.charCode()) {
|
|
case ASTERISK:
|
|
tokenizer.pos++;
|
|
|
|
range = {
|
|
min: 0,
|
|
max: 0
|
|
};
|
|
|
|
break;
|
|
|
|
case PLUSSIGN:
|
|
tokenizer.pos++;
|
|
|
|
range = {
|
|
min: 1,
|
|
max: 0
|
|
};
|
|
|
|
break;
|
|
|
|
case QUESTIONMARK:
|
|
tokenizer.pos++;
|
|
|
|
range = {
|
|
min: 0,
|
|
max: 1
|
|
};
|
|
|
|
break;
|
|
|
|
case NUMBERSIGN:
|
|
tokenizer.pos++;
|
|
|
|
comma = true;
|
|
|
|
if (tokenizer.charCode() === LEFTCURLYBRACKET) {
|
|
range = readMultiplierRange(tokenizer);
|
|
} else if (tokenizer.charCode() === QUESTIONMARK) {
|
|
// https://www.w3.org/TR/css-values-4/#component-multipliers
|
|
// > the # and ? multipliers may be stacked as #?
|
|
// In this case just treat "#?" as a single multiplier
|
|
// { min: 0, max: 0, comma: true }
|
|
tokenizer.pos++;
|
|
range = {
|
|
min: 0,
|
|
max: 0
|
|
};
|
|
} else {
|
|
range = {
|
|
min: 1,
|
|
max: 0
|
|
};
|
|
}
|
|
|
|
break;
|
|
|
|
case LEFTCURLYBRACKET:
|
|
range = readMultiplierRange(tokenizer);
|
|
break;
|
|
|
|
default:
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
type: 'Multiplier',
|
|
comma,
|
|
min: range.min,
|
|
max: range.max,
|
|
term: null
|
|
};
|
|
}
|
|
|
|
function maybeMultiplied(tokenizer, node) {
|
|
const multiplier = readMultiplier(tokenizer);
|
|
|
|
if (multiplier !== null) {
|
|
multiplier.term = node;
|
|
|
|
// https://www.w3.org/TR/css-values-4/#component-multipliers
|
|
// > The + and # multipliers may be stacked as +#;
|
|
// Represent "+#" as nested multipliers:
|
|
// { ...<multiplier #>,
|
|
// term: {
|
|
// ...<multipler +>,
|
|
// term: node
|
|
// }
|
|
// }
|
|
if (tokenizer.charCode() === NUMBERSIGN &&
|
|
tokenizer.charCodeAt(tokenizer.pos - 1) === PLUSSIGN) {
|
|
return maybeMultiplied(tokenizer, multiplier);
|
|
}
|
|
|
|
return multiplier;
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
function maybeToken(tokenizer) {
|
|
const ch = tokenizer.peek();
|
|
|
|
if (ch === '') {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
type: 'Token',
|
|
value: ch
|
|
};
|
|
}
|
|
|
|
function readProperty(tokenizer) {
|
|
let name;
|
|
|
|
tokenizer.eat(LESSTHANSIGN);
|
|
tokenizer.eat(APOSTROPHE);
|
|
|
|
name = scanWord(tokenizer);
|
|
|
|
tokenizer.eat(APOSTROPHE);
|
|
tokenizer.eat(GREATERTHANSIGN);
|
|
|
|
return maybeMultiplied(tokenizer, {
|
|
type: 'Property',
|
|
name
|
|
});
|
|
}
|
|
|
|
// https://drafts.csswg.org/css-values-3/#numeric-ranges
|
|
// 4.1. Range Restrictions and Range Definition Notation
|
|
//
|
|
// Range restrictions can be annotated in the numeric type notation using CSS bracketed
|
|
// range notation—[min,max]—within the angle brackets, after the identifying keyword,
|
|
// indicating a closed range between (and including) min and max.
|
|
// For example, <integer [0, 10]> indicates an integer between 0 and 10, inclusive.
|
|
function readTypeRange(tokenizer) {
|
|
// use null for Infinity to make AST format JSON serializable/deserializable
|
|
let min = null; // -Infinity
|
|
let max = null; // Infinity
|
|
let sign = 1;
|
|
|
|
tokenizer.eat(LEFTSQUAREBRACKET);
|
|
|
|
if (tokenizer.charCode() === HYPERMINUS) {
|
|
tokenizer.peek();
|
|
sign = -1;
|
|
}
|
|
|
|
if (sign == -1 && tokenizer.charCode() === INFINITY) {
|
|
tokenizer.peek();
|
|
} else {
|
|
min = sign * Number(scanNumber(tokenizer));
|
|
|
|
if (NAME_CHAR[tokenizer.charCode()] !== 0) {
|
|
min += scanWord(tokenizer);
|
|
}
|
|
}
|
|
|
|
scanSpaces(tokenizer);
|
|
tokenizer.eat(COMMA);
|
|
scanSpaces(tokenizer);
|
|
|
|
if (tokenizer.charCode() === INFINITY) {
|
|
tokenizer.peek();
|
|
} else {
|
|
sign = 1;
|
|
|
|
if (tokenizer.charCode() === HYPERMINUS) {
|
|
tokenizer.peek();
|
|
sign = -1;
|
|
}
|
|
|
|
max = sign * Number(scanNumber(tokenizer));
|
|
|
|
if (NAME_CHAR[tokenizer.charCode()] !== 0) {
|
|
max += scanWord(tokenizer);
|
|
}
|
|
}
|
|
|
|
tokenizer.eat(RIGHTSQUAREBRACKET);
|
|
|
|
return {
|
|
type: 'Range',
|
|
min,
|
|
max
|
|
};
|
|
}
|
|
|
|
function readType(tokenizer) {
|
|
let name;
|
|
let opts = null;
|
|
|
|
tokenizer.eat(LESSTHANSIGN);
|
|
name = scanWord(tokenizer);
|
|
|
|
if (tokenizer.charCode() === LEFTPARENTHESIS &&
|
|
tokenizer.nextCharCode() === RIGHTPARENTHESIS) {
|
|
tokenizer.pos += 2;
|
|
name += '()';
|
|
}
|
|
|
|
if (tokenizer.charCodeAt(tokenizer.findWsEnd(tokenizer.pos)) === LEFTSQUAREBRACKET) {
|
|
scanSpaces(tokenizer);
|
|
opts = readTypeRange(tokenizer);
|
|
}
|
|
|
|
tokenizer.eat(GREATERTHANSIGN);
|
|
|
|
return maybeMultiplied(tokenizer, {
|
|
type: 'Type',
|
|
name,
|
|
opts
|
|
});
|
|
}
|
|
|
|
function readKeywordOrFunction(tokenizer) {
|
|
const name = scanWord(tokenizer);
|
|
|
|
if (tokenizer.charCode() === LEFTPARENTHESIS) {
|
|
tokenizer.pos++;
|
|
|
|
return {
|
|
type: 'Function',
|
|
name
|
|
};
|
|
}
|
|
|
|
return maybeMultiplied(tokenizer, {
|
|
type: 'Keyword',
|
|
name
|
|
});
|
|
}
|
|
|
|
function regroupTerms(terms, combinators) {
|
|
function createGroup(terms, combinator) {
|
|
return {
|
|
type: 'Group',
|
|
terms,
|
|
combinator,
|
|
disallowEmpty: false,
|
|
explicit: false
|
|
};
|
|
}
|
|
|
|
let combinator;
|
|
|
|
combinators = Object.keys(combinators)
|
|
.sort((a, b) => COMBINATOR_PRECEDENCE[a] - COMBINATOR_PRECEDENCE[b]);
|
|
|
|
while (combinators.length > 0) {
|
|
combinator = combinators.shift();
|
|
|
|
let i = 0;
|
|
let subgroupStart = 0;
|
|
|
|
for (; i < terms.length; i++) {
|
|
const term = terms[i];
|
|
|
|
if (term.type === 'Combinator') {
|
|
if (term.value === combinator) {
|
|
if (subgroupStart === -1) {
|
|
subgroupStart = i - 1;
|
|
}
|
|
terms.splice(i, 1);
|
|
i--;
|
|
} else {
|
|
if (subgroupStart !== -1 && i - subgroupStart > 1) {
|
|
terms.splice(
|
|
subgroupStart,
|
|
i - subgroupStart,
|
|
createGroup(terms.slice(subgroupStart, i), combinator)
|
|
);
|
|
i = subgroupStart + 1;
|
|
}
|
|
subgroupStart = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (subgroupStart !== -1 && combinators.length) {
|
|
terms.splice(
|
|
subgroupStart,
|
|
i - subgroupStart,
|
|
createGroup(terms.slice(subgroupStart, i), combinator)
|
|
);
|
|
}
|
|
}
|
|
|
|
return combinator;
|
|
}
|
|
|
|
function readImplicitGroup(tokenizer) {
|
|
const terms = [];
|
|
const combinators = {};
|
|
let token;
|
|
let prevToken = null;
|
|
let prevTokenPos = tokenizer.pos;
|
|
|
|
while (token = peek(tokenizer)) {
|
|
if (token.type !== 'Spaces') {
|
|
if (token.type === 'Combinator') {
|
|
// check for combinator in group beginning and double combinator sequence
|
|
if (prevToken === null || prevToken.type === 'Combinator') {
|
|
tokenizer.pos = prevTokenPos;
|
|
tokenizer.error('Unexpected combinator');
|
|
}
|
|
|
|
combinators[token.value] = true;
|
|
} else if (prevToken !== null && prevToken.type !== 'Combinator') {
|
|
combinators[' '] = true; // a b
|
|
terms.push({
|
|
type: 'Combinator',
|
|
value: ' '
|
|
});
|
|
}
|
|
|
|
terms.push(token);
|
|
prevToken = token;
|
|
prevTokenPos = tokenizer.pos;
|
|
}
|
|
}
|
|
|
|
// check for combinator in group ending
|
|
if (prevToken !== null && prevToken.type === 'Combinator') {
|
|
tokenizer.pos -= prevTokenPos;
|
|
tokenizer.error('Unexpected combinator');
|
|
}
|
|
|
|
return {
|
|
type: 'Group',
|
|
terms,
|
|
combinator: regroupTerms(terms, combinators) || ' ',
|
|
disallowEmpty: false,
|
|
explicit: false
|
|
};
|
|
}
|
|
|
|
function readGroup(tokenizer) {
|
|
let result;
|
|
|
|
tokenizer.eat(LEFTSQUAREBRACKET);
|
|
result = readImplicitGroup(tokenizer);
|
|
tokenizer.eat(RIGHTSQUAREBRACKET);
|
|
|
|
result.explicit = true;
|
|
|
|
if (tokenizer.charCode() === EXCLAMATIONMARK) {
|
|
tokenizer.pos++;
|
|
result.disallowEmpty = true;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
function peek(tokenizer) {
|
|
let code = tokenizer.charCode();
|
|
|
|
if (code < 128 && NAME_CHAR[code] === 1) {
|
|
return readKeywordOrFunction(tokenizer);
|
|
}
|
|
|
|
switch (code) {
|
|
case RIGHTSQUAREBRACKET:
|
|
// don't eat, stop scan a group
|
|
break;
|
|
|
|
case LEFTSQUAREBRACKET:
|
|
return maybeMultiplied(tokenizer, readGroup(tokenizer));
|
|
|
|
case LESSTHANSIGN:
|
|
return tokenizer.nextCharCode() === APOSTROPHE
|
|
? readProperty(tokenizer)
|
|
: readType(tokenizer);
|
|
|
|
case VERTICALLINE:
|
|
return {
|
|
type: 'Combinator',
|
|
value: tokenizer.substringToPos(
|
|
tokenizer.pos + (tokenizer.nextCharCode() === VERTICALLINE ? 2 : 1)
|
|
)
|
|
};
|
|
|
|
case AMPERSAND:
|
|
tokenizer.pos++;
|
|
tokenizer.eat(AMPERSAND);
|
|
|
|
return {
|
|
type: 'Combinator',
|
|
value: '&&'
|
|
};
|
|
|
|
case COMMA:
|
|
tokenizer.pos++;
|
|
return {
|
|
type: 'Comma'
|
|
};
|
|
|
|
case APOSTROPHE:
|
|
return maybeMultiplied(tokenizer, {
|
|
type: 'String',
|
|
value: scanString(tokenizer)
|
|
});
|
|
|
|
case SPACE:
|
|
case TAB:
|
|
case N:
|
|
case R:
|
|
case F:
|
|
return {
|
|
type: 'Spaces',
|
|
value: scanSpaces(tokenizer)
|
|
};
|
|
|
|
case COMMERCIALAT:
|
|
code = tokenizer.nextCharCode();
|
|
|
|
if (code < 128 && NAME_CHAR[code] === 1) {
|
|
tokenizer.pos++;
|
|
return {
|
|
type: 'AtKeyword',
|
|
name: scanWord(tokenizer)
|
|
};
|
|
}
|
|
|
|
return maybeToken(tokenizer);
|
|
|
|
case ASTERISK:
|
|
case PLUSSIGN:
|
|
case QUESTIONMARK:
|
|
case NUMBERSIGN:
|
|
case EXCLAMATIONMARK:
|
|
// prohibited tokens (used as a multiplier start)
|
|
break;
|
|
|
|
case LEFTCURLYBRACKET:
|
|
// LEFTCURLYBRACKET is allowed since mdn/data uses it w/o quoting
|
|
// check next char isn't a number, because it's likely a disjoined multiplier
|
|
code = tokenizer.nextCharCode();
|
|
|
|
if (code < 48 || code > 57) {
|
|
return maybeToken(tokenizer);
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
return maybeToken(tokenizer);
|
|
}
|
|
}
|
|
|
|
export function parse(source) {
|
|
const tokenizer = new Tokenizer(source);
|
|
const result = readImplicitGroup(tokenizer);
|
|
|
|
if (tokenizer.pos !== source.length) {
|
|
tokenizer.error('Unexpected input');
|
|
}
|
|
|
|
// reduce redundant groups with single group term
|
|
if (result.terms.length === 1 && result.terms[0].type === 'Group') {
|
|
return result.terms[0];
|
|
}
|
|
|
|
return result;
|
|
};
|