528 lines
No EOL
16 KiB
JavaScript
528 lines
No EOL
16 KiB
JavaScript
function ownKeys(e, r) { var t = Object.keys(e); if (Object.getOwnPropertySymbols) { var o = Object.getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return Object.getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
|
|
function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), !0).forEach(function (r) { _defineProperty(e, r, t[r]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) { Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r)); }); } return e; }
|
|
function _defineProperty(obj, key, value) { key = _toPropertyKey(key); if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
|
|
function _toPropertyKey(t) { var i = _toPrimitive(t, "string"); return "symbol" == typeof i ? i : String(i); }
|
|
function _toPrimitive(t, r) { if ("object" != typeof t || !t) return t; var e = t[Symbol.toPrimitive]; if (void 0 !== e) { var i = e.call(t, r || "default"); if ("object" != typeof i) return i; throw new TypeError("@@toPrimitive must return a primitive value."); } return ("string" === r ? String : Number)(t); }
|
|
import { util } from '@citation-js/core';
|
|
import moo from 'moo';
|
|
import config from '../config.js';
|
|
import * as constants from './constants.js';
|
|
import { orderNamePieces, formatNameParts, getStringCase } from './name.js';
|
|
const text = {
|
|
commandBegin: {
|
|
match: '\\begin',
|
|
push: 'environment'
|
|
},
|
|
command: {
|
|
match: /\\(?:[a-zA-Z]+|.) */,
|
|
value: s => s.slice(1).trim()
|
|
},
|
|
lbrace: {
|
|
match: '{',
|
|
push: 'bracedLiteral'
|
|
},
|
|
mathShift: {
|
|
match: '$',
|
|
push: 'mathLiteral'
|
|
},
|
|
whitespace: {
|
|
match: /[\s]+|~/,
|
|
lineBreaks: true,
|
|
value(token) {
|
|
return token === '~' ? '\xa0' : ' ';
|
|
}
|
|
}
|
|
};
|
|
const lexer = moo.states({
|
|
stringLiteral: _objectSpread(_objectSpread({}, text), {}, {
|
|
text: /[^{$}\s~\\]+/
|
|
}),
|
|
namesLiteral: _objectSpread(_objectSpread({
|
|
and: /\s+[aA][nN][dD]\s+/,
|
|
comma: ',',
|
|
hyphen: '-',
|
|
equals: '='
|
|
}, text), {}, {
|
|
text: /[^{$}\s~\\,=-]+/
|
|
}),
|
|
listLiteral: _objectSpread(_objectSpread({
|
|
and: /\s+and\s+/
|
|
}, text), {}, {
|
|
text: /[^{$}\s~\\]+/
|
|
}),
|
|
separatedLiteral: _objectSpread(_objectSpread({
|
|
comma: ','
|
|
}, text), {}, {
|
|
text: /[^{$}\s~\\,]+/
|
|
}),
|
|
annotation: _objectSpread(_objectSpread({}, text), {}, {
|
|
colon: ':',
|
|
equals: '=',
|
|
comma: ',',
|
|
semicolon: ';',
|
|
quote: '"',
|
|
itemCount: /\d+/,
|
|
text: /[^{$}\s~\\":;,=]+/
|
|
}),
|
|
bracedLiteral: _objectSpread(_objectSpread({}, text), {}, {
|
|
rbrace: {
|
|
match: '}',
|
|
pop: true
|
|
},
|
|
text: /[^{$}\s~\\]+/
|
|
}),
|
|
mathLiteral: _objectSpread(_objectSpread({}, text), {}, {
|
|
mathShift: {
|
|
match: '$',
|
|
pop: true
|
|
},
|
|
script: /[\^_]/,
|
|
text: /[^{$}\s~\\^_]+/
|
|
}),
|
|
environment: _objectSpread(_objectSpread({
|
|
commandEnd: {
|
|
match: '\\end',
|
|
pop: true
|
|
}
|
|
}, text), {}, {
|
|
text: /[^{$}\s~\\]+/
|
|
})
|
|
});
|
|
function flattenConsString(string) {
|
|
string[0];
|
|
return string;
|
|
}
|
|
function applyFormatting(text, format) {
|
|
if (format in constants.formatting) {
|
|
return text && constants.formatting[format].join(text);
|
|
} else {
|
|
return text;
|
|
}
|
|
}
|
|
export const valueGrammar = new util.Grammar({
|
|
String() {
|
|
let output = '';
|
|
while (!this.matchEndOfFile()) {
|
|
output += this.consumeRule('Text');
|
|
}
|
|
return flattenConsString(output);
|
|
},
|
|
StringNames() {
|
|
const list = [];
|
|
while (true) {
|
|
this.consumeToken('whitespace', true);
|
|
list.push(this.consumeRule('Name'));
|
|
this.consumeToken('whitespace', true);
|
|
if (this.matchEndOfFile()) {
|
|
return list;
|
|
} else {
|
|
this.consumeToken('and');
|
|
}
|
|
}
|
|
},
|
|
Name() {
|
|
const pieces = [];
|
|
while (true) {
|
|
pieces.push(this.consumeRule('NamePiece'));
|
|
if (this.matchEndOfFile() || this.matchToken('and')) {
|
|
return orderNamePieces(pieces);
|
|
} else {
|
|
this.consumeToken('comma');
|
|
this.consumeToken('whitespace', true);
|
|
}
|
|
}
|
|
},
|
|
NamePiece() {
|
|
const parts = [];
|
|
while (true) {
|
|
const part = this.consumeRule('NameToken');
|
|
if (part.label) {
|
|
part.label = formatNameParts([...parts, {
|
|
value: part.label
|
|
}]);
|
|
return [part];
|
|
}
|
|
parts.push(part);
|
|
if (this.matchEndOfFile() || this.matchToken('and') || this.matchToken('comma')) {
|
|
return parts;
|
|
} else {
|
|
while (this.matchToken('hyphen') || this.matchToken('whitespace')) {
|
|
this.consumeToken();
|
|
}
|
|
}
|
|
}
|
|
},
|
|
NameToken() {
|
|
let upperCase = null;
|
|
let value = '';
|
|
while (true) {
|
|
if (upperCase === null && this.matchToken('text')) {
|
|
const text = this.consumeToken().value;
|
|
value += text;
|
|
upperCase = getStringCase(text);
|
|
} else if (this.matchEndOfFile() || this.matchToken('and') || this.matchToken('comma') || this.matchToken('whitespace')) {
|
|
return {
|
|
value,
|
|
upperCase
|
|
};
|
|
} else if (this.matchToken('hyphen')) {
|
|
return {
|
|
value,
|
|
upperCase,
|
|
hyphenated: true
|
|
};
|
|
} else if (this.matchToken('equals')) {
|
|
this.consumeToken('equals');
|
|
const text = this.consumeRule('NamePiece');
|
|
if (text[0].label) {
|
|
value += '=' + text[0].label;
|
|
}
|
|
return {
|
|
value: formatNameParts(text),
|
|
label: value
|
|
};
|
|
} else {
|
|
value += this.consumeRule('Text');
|
|
}
|
|
}
|
|
},
|
|
StringList() {
|
|
const list = [];
|
|
while (!this.matchEndOfFile()) {
|
|
let output = '';
|
|
while (!this.matchEndOfFile() && !this.matchToken('and')) {
|
|
output += this.consumeRule('Text');
|
|
}
|
|
list.push(flattenConsString(output));
|
|
this.consumeToken('and', true);
|
|
}
|
|
return list.length === 1 ? list[0] : list;
|
|
},
|
|
StringSeparated() {
|
|
const list = [];
|
|
while (!this.matchEndOfFile()) {
|
|
let output = '';
|
|
while (!this.matchEndOfFile() && !this.matchToken('comma')) {
|
|
output += this.consumeRule('Text');
|
|
}
|
|
list.push(output.trim());
|
|
this.consumeToken('comma', true);
|
|
this.consumeToken('whitespace', true);
|
|
}
|
|
return list;
|
|
},
|
|
StringVerbatim() {
|
|
let output = '';
|
|
while (!this.matchEndOfFile()) {
|
|
output += this.consumeToken().text;
|
|
}
|
|
return flattenConsString(output);
|
|
},
|
|
StringUri() {
|
|
const uri = this.consumeRule('StringVerbatim');
|
|
try {
|
|
if (decodeURI(uri) === uri) {
|
|
return encodeURI(uri);
|
|
} else {
|
|
return uri;
|
|
}
|
|
} catch (e) {
|
|
return encodeURI(uri);
|
|
}
|
|
},
|
|
StringTitleCase() {
|
|
this.state.sentenceCase = true;
|
|
let output = '';
|
|
while (!this.matchEndOfFile()) {
|
|
output += this.consumeRule('Text');
|
|
}
|
|
return flattenConsString(output);
|
|
},
|
|
Annotations() {
|
|
const annotations = {};
|
|
while (true) {
|
|
const {
|
|
scope,
|
|
item,
|
|
part,
|
|
value
|
|
} = this.consumeRule('Annotation');
|
|
if (scope === 'part') {
|
|
if (!annotations.part) {
|
|
annotations.part = [];
|
|
}
|
|
if (!annotations.part[item]) {
|
|
annotations.part[item] = {};
|
|
}
|
|
annotations.part[item][part] = value;
|
|
} else if (scope === 'item') {
|
|
if (!annotations.item) {
|
|
annotations.item = [];
|
|
}
|
|
annotations.item[item] = value;
|
|
} else {
|
|
annotations.field = value;
|
|
}
|
|
if (this.matchEndOfFile()) {
|
|
break;
|
|
} else {
|
|
this.consumeToken('semicolon');
|
|
this.consumeRule('_');
|
|
}
|
|
}
|
|
return annotations;
|
|
},
|
|
Annotation() {
|
|
const annotation = {};
|
|
if (this.matchToken('itemCount')) {
|
|
annotation.item = parseInt(this.consumeToken('itemCount')) - 1;
|
|
if (this.matchToken('colon')) {
|
|
this.consumeToken('colon');
|
|
annotation.part = this.consumeToken('text');
|
|
annotation.scope = 'part';
|
|
} else {
|
|
annotation.scope = 'item';
|
|
}
|
|
} else {
|
|
annotation.scope = 'field';
|
|
}
|
|
this.consumeToken('equals');
|
|
this.consumeRule('_');
|
|
if (this.matchToken('quote')) {
|
|
this.consumeToken('quote');
|
|
let literal = '';
|
|
while (!this.matchToken('quote')) {
|
|
if (this.matchToken('itemCount') || this.matchToken('colon') || this.matchToken('comma') || this.matchToken('semicolon') || this.matchToken('equals')) {
|
|
literal += this.token.value;
|
|
this.token = this.lexer.next();
|
|
} else {
|
|
literal += this.consumeRule('Text');
|
|
}
|
|
}
|
|
this.consumeToken('quote');
|
|
annotation.value = flattenConsString(literal);
|
|
this.consumeRule('_');
|
|
} else {
|
|
annotation.value = [];
|
|
let output = '';
|
|
while (true) {
|
|
output += this.consumeRule('Text');
|
|
if (this.matchToken('comma')) {
|
|
this.consumeToken('comma');
|
|
this.consumeRule('_');
|
|
annotation.value.push(flattenConsString(output));
|
|
output = '';
|
|
} else if (this.matchEndOfFile() || this.matchToken('semicolon')) {
|
|
annotation.value.push(flattenConsString(output));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return annotation;
|
|
},
|
|
BracketString() {
|
|
var _this$state;
|
|
let output = '';
|
|
this.consumeToken('lbrace');
|
|
const sentenceCase = this.state.sentenceCase;
|
|
this.state.sentenceCase = sentenceCase && this.matchToken('command');
|
|
(_this$state = this.state).partlyLowercase && (_this$state.partlyLowercase = this.state.sentenceCase);
|
|
while (!this.matchToken('rbrace')) {
|
|
output += this.consumeRule('Text');
|
|
}
|
|
const topLevel = sentenceCase && !this.state.sentenceCase;
|
|
const protectCase = topLevel && this.state.partlyLowercase;
|
|
this.state.sentenceCase = sentenceCase;
|
|
this.consumeToken('rbrace');
|
|
return protectCase ? applyFormatting(output, 'nocase') : output;
|
|
},
|
|
MathString() {
|
|
let output = '';
|
|
this.consumeToken('mathShift');
|
|
while (!this.matchToken('mathShift')) {
|
|
if (this.matchToken('script')) {
|
|
const script = this.consumeToken('script').value;
|
|
const text = this.consumeRule('Text').split('');
|
|
if (text.every(char => char in constants.mathScripts[script])) {
|
|
output += text.map(char => constants.mathScripts[script][char]).join('');
|
|
} else {
|
|
const formatName = constants.mathScriptFormatting[script];
|
|
output += constants.formatting[formatName].join(text.join(''));
|
|
}
|
|
continue;
|
|
}
|
|
if (this.matchToken('command')) {
|
|
const command = this.token.value;
|
|
if (command in constants.mathScriptFormatting) {
|
|
this.consumeToken('command');
|
|
const text = this.consumeRule('BracketString');
|
|
output += applyFormatting(text, constants.mathScriptFormatting[command]);
|
|
continue;
|
|
}
|
|
}
|
|
output += this.consumeRule('Text');
|
|
}
|
|
this.consumeToken('mathShift');
|
|
return output;
|
|
},
|
|
Text() {
|
|
if (this.matchToken('lbrace')) {
|
|
return this.consumeRule('BracketString');
|
|
} else if (this.matchToken('mathShift')) {
|
|
return this.consumeRule('MathString');
|
|
} else if (this.matchToken('whitespace')) {
|
|
return this.consumeToken('whitespace').value;
|
|
} else if (this.matchToken('commandBegin')) {
|
|
return this.consumeRule('EnclosedEnv');
|
|
} else if (this.matchToken('command')) {
|
|
return this.consumeRule('Command');
|
|
}
|
|
const text = this.consumeToken('text').value.replace(constants.ligaturePattern, ligature => constants.ligatures[ligature]);
|
|
const afterPunctuation = this.state.afterPunctuation;
|
|
this.state.afterPunctuation = /[?!.:]$/.test(text);
|
|
if (!this.state.sentenceCase) {
|
|
var _this$state2;
|
|
(_this$state2 = this.state).partlyLowercase || (_this$state2.partlyLowercase = text === text.toLowerCase() && text !== text.toUpperCase());
|
|
return text;
|
|
}
|
|
const [first, ...otherCharacters] = text;
|
|
const rest = otherCharacters.join('');
|
|
const restLowerCase = rest.toLowerCase();
|
|
if (rest !== restLowerCase) {
|
|
return text;
|
|
}
|
|
if (!afterPunctuation) {
|
|
return text.toLowerCase();
|
|
}
|
|
return first + restLowerCase;
|
|
},
|
|
Command() {
|
|
const commandToken = this.consumeToken('command');
|
|
const command = commandToken.value;
|
|
if (command in constants.formattingEnvs) {
|
|
const text = this.consumeRule('Env');
|
|
const format = constants.formattingEnvs[command];
|
|
return applyFormatting(text, format);
|
|
} else if (command in constants.formattingCommands) {
|
|
const text = this.consumeRule('BracketString');
|
|
const format = constants.formattingCommands[command];
|
|
return applyFormatting(text, format);
|
|
} else if (command in constants.commands) {
|
|
return constants.commands[command];
|
|
} else if (command in constants.mathCommands) {
|
|
return constants.mathCommands[command];
|
|
} else if (command in constants.diacritics && !this.matchEndOfFile()) {
|
|
const text = this.consumeRule('Text');
|
|
const diacritic = text[0] + constants.diacritics[command];
|
|
return diacritic.normalize('NFC') + text.slice(1);
|
|
} else if (command in constants.argumentCommands) {
|
|
const func = constants.argumentCommands[command];
|
|
const args = [];
|
|
let arity = func.length;
|
|
while (arity-- > 0) {
|
|
this.consumeToken('whitespace', true);
|
|
args.push(this.consumeRule('BracketString'));
|
|
}
|
|
return func(...args);
|
|
} else if (/^[&%$#_{}]$/.test(command)) {
|
|
return commandToken.text.slice(1);
|
|
} else {
|
|
return commandToken.text;
|
|
}
|
|
},
|
|
Env() {
|
|
let output = '';
|
|
while (!this.matchEndOfFile() && !this.matchToken('rbrace')) {
|
|
output += this.consumeRule('Text');
|
|
}
|
|
return output;
|
|
},
|
|
EnclosedEnv() {
|
|
this.consumeToken('commandBegin');
|
|
const beginEnv = this.consumeRule('BracketString');
|
|
let output = '';
|
|
while (!this.matchToken('commandEnd')) {
|
|
output += this.consumeRule('Text');
|
|
}
|
|
const end = this.consumeToken('commandEnd');
|
|
const endEnv = this.consumeRule('BracketString');
|
|
if (beginEnv !== endEnv) {
|
|
throw new SyntaxError(this.lexer.formatError(end, `environment started with "${beginEnv}", ended with "${endEnv}"`));
|
|
}
|
|
return applyFormatting(output, constants.formattingEnvs[beginEnv]);
|
|
},
|
|
_() {
|
|
while (this.matchToken('whitespace')) {
|
|
this.consumeToken('whitespace');
|
|
}
|
|
}
|
|
}, {
|
|
sentenceCase: false,
|
|
partlyLowercase: false,
|
|
afterPunctuation: true
|
|
});
|
|
function singleLanguageIsEnglish(language) {
|
|
return constants.sentenceCaseLanguages.includes(language.toLowerCase());
|
|
}
|
|
function isEnglish(languages) {
|
|
if (Array.isArray(languages)) {
|
|
return languages.every(singleLanguageIsEnglish);
|
|
}
|
|
return singleLanguageIsEnglish(languages);
|
|
}
|
|
function getMainRule(fieldType, languages) {
|
|
if (fieldType[1] === 'name') {
|
|
return fieldType[0] === 'list' ? 'StringNames' : 'Name';
|
|
}
|
|
if (fieldType[1] === 'title') {
|
|
const option = config.parse.sentenceCase;
|
|
if (option === 'always' || option === 'english' && isEnglish(languages)) {
|
|
return 'StringTitleCase';
|
|
} else {
|
|
return 'String';
|
|
}
|
|
}
|
|
switch (fieldType[0] === 'field' ? fieldType[1] : fieldType[0]) {
|
|
case 'list':
|
|
return 'StringList';
|
|
case 'separated':
|
|
return 'StringSeparated';
|
|
case 'verbatim':
|
|
return 'StringVerbatim';
|
|
case 'uri':
|
|
return 'StringUri';
|
|
case 'title':
|
|
case 'literal':
|
|
default:
|
|
return 'String';
|
|
}
|
|
}
|
|
function getLexerState(fieldType) {
|
|
if (fieldType[1] === 'name') {
|
|
return 'namesLiteral';
|
|
}
|
|
switch (fieldType[0]) {
|
|
case 'list':
|
|
return 'listLiteral';
|
|
case 'separated':
|
|
return 'separatedLiteral';
|
|
case 'field':
|
|
default:
|
|
return 'stringLiteral';
|
|
}
|
|
}
|
|
export function parse(text, field, languages = []) {
|
|
const fieldType = constants.fieldTypes[field] || [];
|
|
return valueGrammar.parse(lexer.reset(text, {
|
|
state: getLexerState(fieldType),
|
|
line: 0,
|
|
col: 0
|
|
}), getMainRule(fieldType, languages));
|
|
}
|
|
export function parseAnnotation(text) {
|
|
return valueGrammar.parse(lexer.reset(text, {
|
|
state: 'annotation',
|
|
line: 0,
|
|
col: 0
|
|
}), 'Annotations');
|
|
} |