2024-10-14 08:09:33 +02:00

514 lines
14 KiB

* Copyright (c) 2017-2022 The MathJax Consortium
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
* @fileoverview The TexParser. Implements the basic parsing functionality and
* administers the global stack and tree objects.
* @author (Volker Sorge)
import ParseUtil from './ParseUtil.js';
import {HandlerType} from './MapHandler.js';
import Stack from './Stack.js';
import StackItemFactory from './StackItemFactory.js';
import {Tags} from './Tags.js';
import TexError from './TexError.js';
import {MmlNode, AbstractMmlNode} from '../../core/MmlTree/MmlNode.js';
import {ParseInput, ParseResult} from './Types.js';
import ParseOptions from './ParseOptions.js';
import {StackItem, EnvList} from './StackItem.js';
import {Symbol} from './Symbol.js';
import {OptionList} from '../../util/Options.js';
* The main Tex Parser class.
export default class TexParser {
* Counter for recursive macros.
* @type {number}
public macroCount: number = 0;
* The stack for items and created nodes.
* @type {Stack}
public stack: Stack;
* Current position in the string that is parsed.
* @type {number}
public i: number = 0;
* The last command sequence
* @type {string}
public currentCS: string = '';
* @constructor
* @param {string} string The string to parse.
* @param {EnvList} env The intial environment representing the current parse
* state of the overall expression translation.
* @param {ParseOptions} configuration A parser configuration.
constructor(private _string: string, env: EnvList, public configuration: ParseOptions) {
const inner = env.hasOwnProperty('isInner');
const isInner = env['isInner'] as boolean;
delete env['isInner'];
let ENV: EnvList;
if (env) {
ENV = {};
for (const id of Object.keys(env)) {
ENV[id] = env[id];
this.stack = new Stack(this.itemFactory, ENV, inner ? isInner : true);
* @return {OptionList} The configuration options.
get options(): OptionList {
return this.configuration.options;
* @return {StackItemFactory} The factory for stack items.
get itemFactory(): StackItemFactory {
return this.configuration.itemFactory;
* @return {Tags} The tags style of this configuration.
get tags(): Tags {
return this.configuration.tags;
* Sets the string that should be parsed.
* @param {string} str The new string to parse.
set string(str: string) {
this._string = str;
* @return {string} The string that is currently parsed.
get string(): string {
return this._string;
* Parses the input with the specified kind of map.
* @param {HandlerType} kind Configuration name.
* @param {ParseInput} input Input to be parsed.
* @return {ParseResult} The output of the parsing function.
public parse(kind: HandlerType, input: ParseInput): ParseResult {
return this.configuration.handlers.get(kind).parse(input);
* Maps a symbol to its "parse value" if it exists.
* @param {HandlerType} kind Configuration name.
* @param {string} symbol The symbol to parse.
* @return {any} A boolean, Character, or Macro.
public lookup(kind: HandlerType, symbol: string): any {
return this.configuration.handlers.get(kind).lookup(symbol);
* Checks if a symbol is contained in one of the symbol mappings of the
* specified kind.
* @param {HandlerType} kind Configuration name.
* @param {string} symbol The symbol to parse.
* @return {boolean} True if the symbol is contained in the given types of
* symbol mapping.
public contains(kind: HandlerType, symbol: string): boolean {
return this.configuration.handlers.get(kind).contains(symbol);
* @override
public toString(): string {
let str = '';
for (const config of Array.from(this.configuration.handlers.keys())) {
str += config + ': ' +
this.configuration.handlers.get(config as HandlerType) + '\n';
return str;
* Parses the current input string.
public Parse() {
let c: string;
while (this.i < this.string.length) {
c = this.getCodePoint();
this.i += c.length;
this.parse('character', [this, c]);
* Pushes a new item onto the stack. The item can also be a Mml node,
* but if the mml item is an inferred row, push its children instead.
* @param {StackItem|MmlNode} arg The new item.
public Push(arg: StackItem | MmlNode) {
if (arg instanceof AbstractMmlNode && arg.isInferred) {
} else {
* Pushes a list of new items onto the stack.
* @param {StackItem|MmlNode[]} args The new items.
public PushAll(args: (StackItem | MmlNode)[]) {
for (const arg of args) {
* @return {MmlNode} The internal Mathml structure.
public mml(): MmlNode {
if (!this.stack.Top().isKind('mml')) {
return null;
let node = this.stack.Top().First;
return node;
* String handling routines
* Convert delimiter to character.
* @param {string} c The delimiter name.
* @return {string} The corresponding character.
public convertDelimiter(c: string): string {
const symbol = this.lookup('delimiter', c) as Symbol;
return symbol ? symbol.char : null;
* @return {string} Get the next unicode character in the string
public getCodePoint(): string {
const code = this.string.codePointAt(this.i);
return code === undefined ? '' : String.fromCodePoint(code);
* @return {boolean} True if the next character to parse is a space.
public nextIsSpace(): boolean {
return !!this.string.charAt(this.i).match(/\s/);
* @return {string} Get the next non-space character.
public GetNext(): string {
while (this.nextIsSpace()) {
return this.getCodePoint();
* @return {string} Get and return a control-sequence name
public GetCS(): string {
let CS = this.string.slice(this.i).match(/^(([a-z]+) ?|[\uD800-\uDBFF].|.)/i);
if (CS) {
this.i += CS[0].length;
return CS[2] || CS[1];
} else {
return ' ';
* Get and return a TeX argument (either a single character or control
* sequence, or the contents of the next set of braces).
* @param {string} name Name of the current control sequence.
* @param {boolean} noneOK? True if no argument is OK.
* @return {string} The next argument.
public GetArgument(_name: string, noneOK?: boolean): string {
switch (this.GetNext()) {
case '':
if (!noneOK) {
// @test MissingArgFor
throw new TexError('MissingArgFor', 'Missing argument for %1', this.currentCS);
return null;
case '}':
if (!noneOK) {
// @test ExtraCloseMissingOpen
throw new TexError('ExtraCloseMissingOpen',
'Extra close brace or missing open brace');
return null;
case '\\':
return '\\' + this.GetCS();
case '{':
let j = ++this.i, parens = 1;
while (this.i < this.string.length) {
switch (this.string.charAt(this.i++)) {
case '\\': this.i++; break;
case '{': parens++; break;
case '}':
if (--parens === 0) {
return this.string.slice(j, this.i - 1);
// @test MissingCloseBrace
throw new TexError('MissingCloseBrace', 'Missing close brace');
const c = this.getCodePoint();
this.i += c.length;
return c;
* Get an optional LaTeX argument in brackets.
* @param {string} name Name of the current control sequence.
* @param {string} def? The default value for the optional argument.
* @return {string} The optional argument.
public GetBrackets(_name: string, def?: string): string {
if (this.GetNext() !== '[') {
return def;
let j = ++this.i, parens = 0;
while (this.i < this.string.length) {
switch (this.string.charAt(this.i++)) {
case '{': parens++; break;
case '\\': this.i++; break;
case '}':
if (parens-- <= 0) {
// @test ExtraCloseLooking1
throw new TexError('ExtraCloseLooking',
'Extra close brace while looking for %1', '\']\'');
case ']':
if (parens === 0) {
return this.string.slice(j, this.i - 1);
// @test MissingCloseBracket
throw new TexError('MissingCloseBracket',
'Could not find closing \']\' for argument to %1', this.currentCS);
* Get the name of a delimiter (check it in the delimiter list).
* @param {string} name Name of the current control sequence.
* @param {boolean} braceOK? Are braces around the delimiter OK.
* @return {string} The delimiter name.
public GetDelimiter(name: string, braceOK?: boolean): string {
let c = this.GetNext(); this.i += c.length;
if (this.i <= this.string.length) {
if (c === '\\') {
c += this.GetCS();
} else if (c === '{' && braceOK) {
c = this.GetArgument(name).trim();
if (this.contains('delimiter', c)) {
return this.convertDelimiter(c);
// @test MissingOrUnrecognizedDelim1, MissingOrUnrecognizedDelim2
throw new TexError('MissingOrUnrecognizedDelim',
'Missing or unrecognized delimiter for %1', this.currentCS);
* Get a dimension (including its units).
* @param {string} name Name of the current control sequence.
* @return {string} The dimension string.
public GetDimen(name: string): string {
if (this.GetNext() === '{') {
let dimen = this.GetArgument(name);
let [value, unit] = ParseUtil.matchDimen(dimen);
if (value) {
// @test Raise In Line, Lower 2, (Raise|Lower) Negative
return value + unit;
} else {
// @test Above, Raise, Lower, Modulo, Above With Delims
let dimen = this.string.slice(this.i);
let [value, unit, length] = ParseUtil.matchDimen(dimen, true);
if (value) {
this.i += length;
return value + unit;
// @test MissingDimOrUnits
throw new TexError('MissingDimOrUnits',
'Missing dimension or its units for %1', this.currentCS);
* Get everything up to the given control sequence (token)
* @param {string} name Name of the current control sequence.
* @param {string} token The element until where to parse.
* @return {string} The text between the current position and the given token.
public GetUpTo(_name: string, token: string): string {
while (this.nextIsSpace()) {
let j = this.i;
let parens = 0;
while (this.i < this.string.length) {
let k = this.i;
let c = this.GetNext(); this.i += c.length;
switch (c) {
case '\\': c += this.GetCS(); break;
case '{': parens++; break;
case '}':
if (parens === 0) {
// @test ExtraCloseLooking2
throw new TexError('ExtraCloseLooking',
'Extra close brace while looking for %1', token);
if (parens === 0 && c === token) {
return this.string.slice(j, k);
// @test TokenNotFoundForCommand
throw new TexError('TokenNotFoundForCommand',
'Could not find %1 for %2', token, this.currentCS);
* Parse the arguments of a control sequence in a new parser instance.
* @param {string} name Name of the current control sequence.
* @return {MmlNode} The parsed node.
public ParseArg(name: string): MmlNode {
return new TexParser(this.GetArgument(name), this.stack.env,
* Parses a given string up to a given token in a new parser instance.
* @param {string} name Name of the current control sequence.
* @param {string} token A Token at which to end parsing.
* @return {MmlNode} The parsed node.
public ParseUpTo(name: string, token: string): MmlNode {
return new TexParser(this.GetUpTo(name, token), this.stack.env,
* Get a delimiter or empty argument
* @param {string} name Name of the current control sequence.
* @return {string} The delimiter.
public GetDelimiterArg(name: string): string {
let c = ParseUtil.trimSpaces(this.GetArgument(name));
if (c === '') {
return null;
if (this.contains('delimiter', c)) {
return c;
// @test MissingOrUnrecognizedDelim
throw new TexError('MissingOrUnrecognizedDelim',
'Missing or unrecognized delimiter for %1', this.currentCS);
* @return {boolean} True if a star follows the control sequence name.
public GetStar(): boolean {
let star = (this.GetNext() === '*');
if (star) {
return star;
* Convenience method to create nodes with the node factory of the current
* configuration.
* @param {string} kind The kind of node to create.
* @param {any[]} The remaining arguments for the creation method.
* @return {MmlNode} The newly created node.
public create(kind: string, any[]): MmlNode {
return this.configuration.nodeFactory.create(kind,;