585 lines
No EOL
22 KiB
JavaScript
585 lines
No EOL
22 KiB
JavaScript
// parse a single path portion
|
|
import { parseClass } from './brace-expressions.js';
|
|
import { unescape } from './unescape.js';
|
|
const types = new Set(['!', '?', '+', '*', '@']);
|
|
const isExtglobType = (c) => types.has(c);
|
|
// Patterns that get prepended to bind to the start of either the
|
|
// entire string, or just a single path portion, to prevent dots
|
|
// and/or traversal patterns, when needed.
|
|
// Exts don't need the ^ or / bit, because the root binds that already.
|
|
const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))';
|
|
const startNoDot = '(?!\\.)';
|
|
// characters that indicate a start of pattern needs the "no dots" bit,
|
|
// because a dot *might* be matched. ( is not in the list, because in
|
|
// the case of a child extglob, it will handle the prevention itself.
|
|
const addPatternStart = new Set(['[', '.']);
|
|
// cases where traversal is A-OK, no dot prevention needed
|
|
const justDots = new Set(['..', '.']);
|
|
const reSpecials = new Set('().*{}+?[]^$\\!');
|
|
const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
|
|
// any single thing other than /
|
|
const qmark = '[^/]';
|
|
// * => any number of characters
|
|
const star = qmark + '*?';
|
|
// use + when we need to ensure that *something* matches, because the * is
|
|
// the only thing in the path portion.
|
|
const starNoEmpty = qmark + '+?';
|
|
// remove the \ chars that we added if we end up doing a nonmagic compare
|
|
// const deslash = (s: string) => s.replace(/\\(.)/g, '$1')
|
|
export class AST {
|
|
type;
|
|
#root;
|
|
#hasMagic;
|
|
#uflag = false;
|
|
#parts = [];
|
|
#parent;
|
|
#parentIndex;
|
|
#negs;
|
|
#filledNegs = false;
|
|
#options;
|
|
#toString;
|
|
// set to true if it's an extglob with no children
|
|
// (which really means one child of '')
|
|
#emptyExt = false;
|
|
constructor(type, parent, options = {}) {
|
|
this.type = type;
|
|
// extglobs are inherently magical
|
|
if (type)
|
|
this.#hasMagic = true;
|
|
this.#parent = parent;
|
|
this.#root = this.#parent ? this.#parent.#root : this;
|
|
this.#options = this.#root === this ? options : this.#root.#options;
|
|
this.#negs = this.#root === this ? [] : this.#root.#negs;
|
|
if (type === '!' && !this.#root.#filledNegs)
|
|
this.#negs.push(this);
|
|
this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
|
|
}
|
|
get hasMagic() {
|
|
/* c8 ignore start */
|
|
if (this.#hasMagic !== undefined)
|
|
return this.#hasMagic;
|
|
/* c8 ignore stop */
|
|
for (const p of this.#parts) {
|
|
if (typeof p === 'string')
|
|
continue;
|
|
if (p.type || p.hasMagic)
|
|
return (this.#hasMagic = true);
|
|
}
|
|
// note: will be undefined until we generate the regexp src and find out
|
|
return this.#hasMagic;
|
|
}
|
|
// reconstructs the pattern
|
|
toString() {
|
|
if (this.#toString !== undefined)
|
|
return this.#toString;
|
|
if (!this.type) {
|
|
return (this.#toString = this.#parts.map(p => String(p)).join(''));
|
|
}
|
|
else {
|
|
return (this.#toString =
|
|
this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')');
|
|
}
|
|
}
|
|
#fillNegs() {
|
|
/* c8 ignore start */
|
|
if (this !== this.#root)
|
|
throw new Error('should only call on root');
|
|
if (this.#filledNegs)
|
|
return this;
|
|
/* c8 ignore stop */
|
|
// call toString() once to fill this out
|
|
this.toString();
|
|
this.#filledNegs = true;
|
|
let n;
|
|
while ((n = this.#negs.pop())) {
|
|
if (n.type !== '!')
|
|
continue;
|
|
// walk up the tree, appending everthing that comes AFTER parentIndex
|
|
let p = n;
|
|
let pp = p.#parent;
|
|
while (pp) {
|
|
for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
|
|
for (const part of n.#parts) {
|
|
/* c8 ignore start */
|
|
if (typeof part === 'string') {
|
|
throw new Error('string part in extglob AST??');
|
|
}
|
|
/* c8 ignore stop */
|
|
part.copyIn(pp.#parts[i]);
|
|
}
|
|
}
|
|
p = pp;
|
|
pp = p.#parent;
|
|
}
|
|
}
|
|
return this;
|
|
}
|
|
push(...parts) {
|
|
for (const p of parts) {
|
|
if (p === '')
|
|
continue;
|
|
/* c8 ignore start */
|
|
if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
|
|
throw new Error('invalid part: ' + p);
|
|
}
|
|
/* c8 ignore stop */
|
|
this.#parts.push(p);
|
|
}
|
|
}
|
|
toJSON() {
|
|
const ret = this.type === null
|
|
? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON()))
|
|
: [this.type, ...this.#parts.map(p => p.toJSON())];
|
|
if (this.isStart() && !this.type)
|
|
ret.unshift([]);
|
|
if (this.isEnd() &&
|
|
(this === this.#root ||
|
|
(this.#root.#filledNegs && this.#parent?.type === '!'))) {
|
|
ret.push({});
|
|
}
|
|
return ret;
|
|
}
|
|
isStart() {
|
|
if (this.#root === this)
|
|
return true;
|
|
// if (this.type) return !!this.#parent?.isStart()
|
|
if (!this.#parent?.isStart())
|
|
return false;
|
|
if (this.#parentIndex === 0)
|
|
return true;
|
|
// if everything AHEAD of this is a negation, then it's still the "start"
|
|
const p = this.#parent;
|
|
for (let i = 0; i < this.#parentIndex; i++) {
|
|
const pp = p.#parts[i];
|
|
if (!(pp instanceof AST && pp.type === '!')) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
isEnd() {
|
|
if (this.#root === this)
|
|
return true;
|
|
if (this.#parent?.type === '!')
|
|
return true;
|
|
if (!this.#parent?.isEnd())
|
|
return false;
|
|
if (!this.type)
|
|
return this.#parent?.isEnd();
|
|
// if not root, it'll always have a parent
|
|
/* c8 ignore start */
|
|
const pl = this.#parent ? this.#parent.#parts.length : 0;
|
|
/* c8 ignore stop */
|
|
return this.#parentIndex === pl - 1;
|
|
}
|
|
copyIn(part) {
|
|
if (typeof part === 'string')
|
|
this.push(part);
|
|
else
|
|
this.push(part.clone(this));
|
|
}
|
|
clone(parent) {
|
|
const c = new AST(this.type, parent);
|
|
for (const p of this.#parts) {
|
|
c.copyIn(p);
|
|
}
|
|
return c;
|
|
}
|
|
static #parseAST(str, ast, pos, opt) {
|
|
let escaping = false;
|
|
let inBrace = false;
|
|
let braceStart = -1;
|
|
let braceNeg = false;
|
|
if (ast.type === null) {
|
|
// outside of a extglob, append until we find a start
|
|
let i = pos;
|
|
let acc = '';
|
|
while (i < str.length) {
|
|
const c = str.charAt(i++);
|
|
// still accumulate escapes at this point, but we do ignore
|
|
// starts that are escaped
|
|
if (escaping || c === '\\') {
|
|
escaping = !escaping;
|
|
acc += c;
|
|
continue;
|
|
}
|
|
if (inBrace) {
|
|
if (i === braceStart + 1) {
|
|
if (c === '^' || c === '!') {
|
|
braceNeg = true;
|
|
}
|
|
}
|
|
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
|
|
inBrace = false;
|
|
}
|
|
acc += c;
|
|
continue;
|
|
}
|
|
else if (c === '[') {
|
|
inBrace = true;
|
|
braceStart = i;
|
|
braceNeg = false;
|
|
acc += c;
|
|
continue;
|
|
}
|
|
if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
|
|
ast.push(acc);
|
|
acc = '';
|
|
const ext = new AST(c, ast);
|
|
i = AST.#parseAST(str, ext, i, opt);
|
|
ast.push(ext);
|
|
continue;
|
|
}
|
|
acc += c;
|
|
}
|
|
ast.push(acc);
|
|
return i;
|
|
}
|
|
// some kind of extglob, pos is at the (
|
|
// find the next | or )
|
|
let i = pos + 1;
|
|
let part = new AST(null, ast);
|
|
const parts = [];
|
|
let acc = '';
|
|
while (i < str.length) {
|
|
const c = str.charAt(i++);
|
|
// still accumulate escapes at this point, but we do ignore
|
|
// starts that are escaped
|
|
if (escaping || c === '\\') {
|
|
escaping = !escaping;
|
|
acc += c;
|
|
continue;
|
|
}
|
|
if (inBrace) {
|
|
if (i === braceStart + 1) {
|
|
if (c === '^' || c === '!') {
|
|
braceNeg = true;
|
|
}
|
|
}
|
|
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
|
|
inBrace = false;
|
|
}
|
|
acc += c;
|
|
continue;
|
|
}
|
|
else if (c === '[') {
|
|
inBrace = true;
|
|
braceStart = i;
|
|
braceNeg = false;
|
|
acc += c;
|
|
continue;
|
|
}
|
|
if (isExtglobType(c) && str.charAt(i) === '(') {
|
|
part.push(acc);
|
|
acc = '';
|
|
const ext = new AST(c, part);
|
|
part.push(ext);
|
|
i = AST.#parseAST(str, ext, i, opt);
|
|
continue;
|
|
}
|
|
if (c === '|') {
|
|
part.push(acc);
|
|
acc = '';
|
|
parts.push(part);
|
|
part = new AST(null, ast);
|
|
continue;
|
|
}
|
|
if (c === ')') {
|
|
if (acc === '' && ast.#parts.length === 0) {
|
|
ast.#emptyExt = true;
|
|
}
|
|
part.push(acc);
|
|
acc = '';
|
|
ast.push(...parts, part);
|
|
return i;
|
|
}
|
|
acc += c;
|
|
}
|
|
// unfinished extglob
|
|
// if we got here, it was a malformed extglob! not an extglob, but
|
|
// maybe something else in there.
|
|
ast.type = null;
|
|
ast.#hasMagic = undefined;
|
|
ast.#parts = [str.substring(pos - 1)];
|
|
return i;
|
|
}
|
|
static fromGlob(pattern, options = {}) {
|
|
const ast = new AST(null, undefined, options);
|
|
AST.#parseAST(pattern, ast, 0, options);
|
|
return ast;
|
|
}
|
|
// returns the regular expression if there's magic, or the unescaped
|
|
// string if not.
|
|
toMMPattern() {
|
|
// should only be called on root
|
|
/* c8 ignore start */
|
|
if (this !== this.#root)
|
|
return this.#root.toMMPattern();
|
|
/* c8 ignore stop */
|
|
const glob = this.toString();
|
|
const [re, body, hasMagic, uflag] = this.toRegExpSource();
|
|
// if we're in nocase mode, and not nocaseMagicOnly, then we do
|
|
// still need a regular expression if we have to case-insensitively
|
|
// match capital/lowercase characters.
|
|
const anyMagic = hasMagic ||
|
|
this.#hasMagic ||
|
|
(this.#options.nocase &&
|
|
!this.#options.nocaseMagicOnly &&
|
|
glob.toUpperCase() !== glob.toLowerCase());
|
|
if (!anyMagic) {
|
|
return body;
|
|
}
|
|
const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : '');
|
|
return Object.assign(new RegExp(`^${re}$`, flags), {
|
|
_src: re,
|
|
_glob: glob,
|
|
});
|
|
}
|
|
// returns the string match, the regexp source, whether there's magic
|
|
// in the regexp (so a regular expression is required) and whether or
|
|
// not the uflag is needed for the regular expression (for posix classes)
|
|
// TODO: instead of injecting the start/end at this point, just return
|
|
// the BODY of the regexp, along with the start/end portions suitable
|
|
// for binding the start/end in either a joined full-path makeRe context
|
|
// (where we bind to (^|/), or a standalone matchPart context (where
|
|
// we bind to ^, and not /). Otherwise slashes get duped!
|
|
//
|
|
// In part-matching mode, the start is:
|
|
// - if not isStart: nothing
|
|
// - if traversal possible, but not allowed: ^(?!\.\.?$)
|
|
// - if dots allowed or not possible: ^
|
|
// - if dots possible and not allowed: ^(?!\.)
|
|
// end is:
|
|
// - if not isEnd(): nothing
|
|
// - else: $
|
|
//
|
|
// In full-path matching mode, we put the slash at the START of the
|
|
// pattern, so start is:
|
|
// - if first pattern: same as part-matching mode
|
|
// - if not isStart(): nothing
|
|
// - if traversal possible, but not allowed: /(?!\.\.?(?:$|/))
|
|
// - if dots allowed or not possible: /
|
|
// - if dots possible and not allowed: /(?!\.)
|
|
// end is:
|
|
// - if last pattern, same as part-matching mode
|
|
// - else nothing
|
|
//
|
|
// Always put the (?:$|/) on negated tails, though, because that has to be
|
|
// there to bind the end of the negated pattern portion, and it's easier to
|
|
// just stick it in now rather than try to inject it later in the middle of
|
|
// the pattern.
|
|
//
|
|
// We can just always return the same end, and leave it up to the caller
|
|
// to know whether it's going to be used joined or in parts.
|
|
// And, if the start is adjusted slightly, can do the same there:
|
|
// - if not isStart: nothing
|
|
// - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$)
|
|
// - if dots allowed or not possible: (?:/|^)
|
|
// - if dots possible and not allowed: (?:/|^)(?!\.)
|
|
//
|
|
// But it's better to have a simpler binding without a conditional, for
|
|
// performance, so probably better to return both start options.
|
|
//
|
|
// Then the caller just ignores the end if it's not the first pattern,
|
|
// and the start always gets applied.
|
|
//
|
|
// But that's always going to be $ if it's the ending pattern, or nothing,
|
|
// so the caller can just attach $ at the end of the pattern when building.
|
|
//
|
|
// So the todo is:
|
|
// - better detect what kind of start is needed
|
|
// - return both flavors of starting pattern
|
|
// - attach $ at the end of the pattern when creating the actual RegExp
|
|
//
|
|
// Ah, but wait, no, that all only applies to the root when the first pattern
|
|
// is not an extglob. If the first pattern IS an extglob, then we need all
|
|
// that dot prevention biz to live in the extglob portions, because eg
|
|
// +(*|.x*) can match .xy but not .yx.
|
|
//
|
|
// So, return the two flavors if it's #root and the first child is not an
|
|
// AST, otherwise leave it to the child AST to handle it, and there,
|
|
// use the (?:^|/) style of start binding.
|
|
//
|
|
// Even simplified further:
|
|
// - Since the start for a join is eg /(?!\.) and the start for a part
|
|
// is ^(?!\.), we can just prepend (?!\.) to the pattern (either root
|
|
// or start or whatever) and prepend ^ or / at the Regexp construction.
|
|
toRegExpSource(allowDot) {
|
|
const dot = allowDot ?? !!this.#options.dot;
|
|
if (this.#root === this)
|
|
this.#fillNegs();
|
|
if (!this.type) {
|
|
const noEmpty = this.isStart() && this.isEnd();
|
|
const src = this.#parts
|
|
.map(p => {
|
|
const [re, _, hasMagic, uflag] = typeof p === 'string'
|
|
? AST.#parseGlob(p, this.#hasMagic, noEmpty)
|
|
: p.toRegExpSource(allowDot);
|
|
this.#hasMagic = this.#hasMagic || hasMagic;
|
|
this.#uflag = this.#uflag || uflag;
|
|
return re;
|
|
})
|
|
.join('');
|
|
let start = '';
|
|
if (this.isStart()) {
|
|
if (typeof this.#parts[0] === 'string') {
|
|
// this is the string that will match the start of the pattern,
|
|
// so we need to protect against dots and such.
|
|
// '.' and '..' cannot match unless the pattern is that exactly,
|
|
// even if it starts with . or dot:true is set.
|
|
const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
|
|
if (!dotTravAllowed) {
|
|
const aps = addPatternStart;
|
|
// check if we have a possibility of matching . or ..,
|
|
// and prevent that.
|
|
const needNoTrav =
|
|
// dots are allowed, and the pattern starts with [ or .
|
|
(dot && aps.has(src.charAt(0))) ||
|
|
// the pattern starts with \., and then [ or .
|
|
(src.startsWith('\\.') && aps.has(src.charAt(2))) ||
|
|
// the pattern starts with \.\., and then [ or .
|
|
(src.startsWith('\\.\\.') && aps.has(src.charAt(4)));
|
|
// no need to prevent dots if it can't match a dot, or if a
|
|
// sub-pattern will be preventing it anyway.
|
|
const needNoDot = !dot && !allowDot && aps.has(src.charAt(0));
|
|
start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : '';
|
|
}
|
|
}
|
|
}
|
|
// append the "end of path portion" pattern to negation tails
|
|
let end = '';
|
|
if (this.isEnd() &&
|
|
this.#root.#filledNegs &&
|
|
this.#parent?.type === '!') {
|
|
end = '(?:$|\\/)';
|
|
}
|
|
const final = start + src + end;
|
|
return [
|
|
final,
|
|
unescape(src),
|
|
(this.#hasMagic = !!this.#hasMagic),
|
|
this.#uflag,
|
|
];
|
|
}
|
|
// We need to calculate the body *twice* if it's a repeat pattern
|
|
// at the start, once in nodot mode, then again in dot mode, so a
|
|
// pattern like *(?) can match 'x.y'
|
|
const repeated = this.type === '*' || this.type === '+';
|
|
// some kind of extglob
|
|
const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
|
|
let body = this.#partsToRegExp(dot);
|
|
if (this.isStart() && this.isEnd() && !body && this.type !== '!') {
|
|
// invalid extglob, has to at least be *something* present, if it's
|
|
// the entire path portion.
|
|
const s = this.toString();
|
|
this.#parts = [s];
|
|
this.type = null;
|
|
this.#hasMagic = undefined;
|
|
return [s, unescape(this.toString()), false, false];
|
|
}
|
|
// XXX abstract out this map method
|
|
let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot
|
|
? ''
|
|
: this.#partsToRegExp(true);
|
|
if (bodyDotAllowed === body) {
|
|
bodyDotAllowed = '';
|
|
}
|
|
if (bodyDotAllowed) {
|
|
body = `(?:${body})(?:${bodyDotAllowed})*?`;
|
|
}
|
|
// an empty !() is exactly equivalent to a starNoEmpty
|
|
let final = '';
|
|
if (this.type === '!' && this.#emptyExt) {
|
|
final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty;
|
|
}
|
|
else {
|
|
const close = this.type === '!'
|
|
? // !() must match something,but !(x) can match ''
|
|
'))' +
|
|
(this.isStart() && !dot && !allowDot ? startNoDot : '') +
|
|
star +
|
|
')'
|
|
: this.type === '@'
|
|
? ')'
|
|
: this.type === '?'
|
|
? ')?'
|
|
: this.type === '+' && bodyDotAllowed
|
|
? ')'
|
|
: this.type === '*' && bodyDotAllowed
|
|
? `)?`
|
|
: `)${this.type}`;
|
|
final = start + body + close;
|
|
}
|
|
return [
|
|
final,
|
|
unescape(body),
|
|
(this.#hasMagic = !!this.#hasMagic),
|
|
this.#uflag,
|
|
];
|
|
}
|
|
#partsToRegExp(dot) {
|
|
return this.#parts
|
|
.map(p => {
|
|
// extglob ASTs should only contain parent ASTs
|
|
/* c8 ignore start */
|
|
if (typeof p === 'string') {
|
|
throw new Error('string type in extglob ast??');
|
|
}
|
|
/* c8 ignore stop */
|
|
// can ignore hasMagic, because extglobs are already always magic
|
|
const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot);
|
|
this.#uflag = this.#uflag || uflag;
|
|
return re;
|
|
})
|
|
.filter(p => !(this.isStart() && this.isEnd()) || !!p)
|
|
.join('|');
|
|
}
|
|
static #parseGlob(glob, hasMagic, noEmpty = false) {
|
|
let escaping = false;
|
|
let re = '';
|
|
let uflag = false;
|
|
for (let i = 0; i < glob.length; i++) {
|
|
const c = glob.charAt(i);
|
|
if (escaping) {
|
|
escaping = false;
|
|
re += (reSpecials.has(c) ? '\\' : '') + c;
|
|
continue;
|
|
}
|
|
if (c === '\\') {
|
|
if (i === glob.length - 1) {
|
|
re += '\\\\';
|
|
}
|
|
else {
|
|
escaping = true;
|
|
}
|
|
continue;
|
|
}
|
|
if (c === '[') {
|
|
const [src, needUflag, consumed, magic] = parseClass(glob, i);
|
|
if (consumed) {
|
|
re += src;
|
|
uflag = uflag || needUflag;
|
|
i += consumed - 1;
|
|
hasMagic = hasMagic || magic;
|
|
continue;
|
|
}
|
|
}
|
|
if (c === '*') {
|
|
if (noEmpty && glob === '*')
|
|
re += starNoEmpty;
|
|
else
|
|
re += star;
|
|
hasMagic = true;
|
|
continue;
|
|
}
|
|
if (c === '?') {
|
|
re += qmark;
|
|
hasMagic = true;
|
|
continue;
|
|
}
|
|
re += regExpEscape(c);
|
|
}
|
|
return [re, unescape(glob), !!hasMagic, uflag];
|
|
}
|
|
}
|
|
//# sourceMappingURL=ast.js.map
|