227 lines
9.7 KiB
JavaScript
227 lines
9.7 KiB
JavaScript
/**
|
|
* @typedef {import('./types').CiteItem} CiteItem
|
|
* @typedef {import('./types').CiteItemSuffix} CiteItemSuffix
|
|
*/
|
|
import { fullCitationRE, locatorRE } from './regex.js';
|
|
/**
|
|
* The locatorLabels have been sourced from the Citr library. Basically it's just
|
|
* a map with valid CSL locator labels and an array of possible natural labels
|
|
* which a user might want to write (instead of the standardized labels).
|
|
*
|
|
* @var {{ [key: string]: string[] }}}
|
|
*/
|
|
const locatorLabels = {
|
|
book: ['Buch', 'Bücher', 'B.', 'book', 'books', 'bk.', 'bks.', 'livre', 'livres', 'liv.'],
|
|
chapter: ['Kapitel', 'Kap.', 'chapter', 'chapters', 'chap.', 'chaps', 'chapitre', 'chapitres'],
|
|
column: ['Spalte', 'Spalten', 'Sp.', 'column', 'columns', 'col.', 'cols', 'colonne', 'colonnes'],
|
|
figure: ['Abbildung', 'Abbildungen', 'Abb.', 'figure', 'figures', 'fig.', 'figs'],
|
|
folio: ['Blatt', 'Blätter', 'Fol.', 'folio', 'folios', 'fol.', 'fols', 'fᵒ', 'fᵒˢ'],
|
|
issue: [
|
|
'Nummer',
|
|
'Nummern',
|
|
'Nr.',
|
|
'number',
|
|
'numbers',
|
|
'no.',
|
|
'nos.',
|
|
'numéro',
|
|
'numéros',
|
|
'nᵒ',
|
|
'nᵒˢ',
|
|
],
|
|
line: ['Zeile', 'Zeilen', 'Z', 'line', 'lines', 'l.', 'll.', 'ligne', 'lignes'],
|
|
note: ['Note', 'Noten', 'N.', 'note', 'notes', 'n.', 'nn.'],
|
|
opus: ['Opus', 'Opera', 'op.', 'opus', 'opera', 'opp.'],
|
|
page: ['Seite', 'Seiten', 'S.', 'page', 'pages', 'p.', 'pp.'],
|
|
paragraph: [
|
|
'Absatz',
|
|
'Absätze',
|
|
'Abs.',
|
|
'¶',
|
|
'¶¶',
|
|
'paragraph',
|
|
'paragraphs',
|
|
'para.',
|
|
'paras',
|
|
'paragraphe',
|
|
'paragraphes',
|
|
'paragr.',
|
|
],
|
|
part: ['Teil', 'Teile', 'part', 'parts', 'pt.', 'pts', 'partie', 'parties', 'part.'],
|
|
section: [
|
|
'Abschnitt',
|
|
'Abschnitte',
|
|
'Abschn.',
|
|
'§',
|
|
'§§',
|
|
'section',
|
|
'sections',
|
|
'sec.',
|
|
'secs',
|
|
'sect.',
|
|
],
|
|
'sub verbo': ['sub verbo', 'sub verbis', 's. v.', 's. vv.', 's.v.', 's.vv.'],
|
|
verse: ['Vers', 'Verse', 'V.', 'verse', 'verses', 'v.', 'vv.', 'verset', 'versets'],
|
|
volume: ['Band', 'Bände', 'Bd.', 'Bde.', 'volume', 'volumes', 'vol.', 'vols.'],
|
|
};
|
|
/**
|
|
* Parses a given citation string and return entries and isComposite flag required for cite-proc.
|
|
* Adapted from https://github.com/Zettlr/Zettlr/blob/develop/source/common/util/extract-citations.ts
|
|
*
|
|
* @param {RegExpMatchArray} regexMatch Cite string in the form of '[@item]' or '@item'
|
|
* @return {[CiteItem[], boolean]} [entries, isComposite]
|
|
*/
|
|
export const parseCitation = (regexMatch) => {
|
|
/** @type {CiteItem[]} */
|
|
let entries = [];
|
|
let isComposite = false;
|
|
const fullCitation = regexMatch[1];
|
|
const inTextSuppressAuthor = regexMatch[2];
|
|
const inTextCitation = regexMatch[3];
|
|
const optionalSuffix = regexMatch[4];
|
|
if (fullCitation !== undefined) {
|
|
// Handle citations in the form of [@item1; @item2]
|
|
for (const citationPart of fullCitation.split(';')) {
|
|
const match = fullCitationRE.exec(citationPart.trim());
|
|
if (match === null) {
|
|
continue; // Faulty citation
|
|
}
|
|
// Prefix is the portion before @ e.g. [see @item1] or an empty string
|
|
// We explicitly cast groups since we have groups in our RegExp and as
|
|
// such the groups object will be set.
|
|
/** @type {CiteItem} */
|
|
const thisCitation = {
|
|
id: match.groups.citekey.replace(/{(.+)}/, '$1'),
|
|
prefix: undefined,
|
|
locator: undefined,
|
|
label: 'page',
|
|
'suppress-author': false,
|
|
suffix: undefined,
|
|
};
|
|
// First, deal with the prefix. The speciality here is that it can
|
|
// indicate if we should suppress the author.
|
|
const rawPrefix = match.groups.prefix;
|
|
if (rawPrefix !== undefined) {
|
|
thisCitation['suppress-author'] = rawPrefix.trim().endsWith('-');
|
|
if (thisCitation['suppress-author']) {
|
|
thisCitation.prefix = rawPrefix.substring(0, rawPrefix.trim().length - 1).trim();
|
|
}
|
|
else {
|
|
thisCitation.prefix = rawPrefix.trim();
|
|
}
|
|
}
|
|
// Second, deal with the suffix. This one can be much more tricky than
|
|
// the prefix. We have three alternatives where the locator may be
|
|
// present: If we have an explicitLocator or an explicitLocatorInSuffix,
|
|
// we should extract the locator from there and leave the actual suffix
|
|
// untouched. Only if those two alternatives are not present, then we
|
|
// have a look at the rawSuffix and extract a (potential) locator.
|
|
const explicitLocator = match.groups.explicitLocator;
|
|
const explicitLocatorInSuffix = match.groups.explicitLocatorInSuffix;
|
|
const rawSuffix = match.groups.suffix;
|
|
let suffixToParse;
|
|
let containsLocator = true;
|
|
if (explicitLocator === undefined && explicitLocatorInSuffix === undefined) {
|
|
// Potential locator in rawSuffix. Only in this case should we overwrite
|
|
// the suffix (hence the same if-condition below)
|
|
suffixToParse = rawSuffix;
|
|
containsLocator = false;
|
|
}
|
|
else if (explicitLocatorInSuffix !== undefined || explicitLocator !== undefined) {
|
|
suffixToParse = explicitLocator !== undefined ? explicitLocator : explicitLocatorInSuffix;
|
|
thisCitation.suffix = rawSuffix?.trim();
|
|
}
|
|
const { label, locator, suffix } = parseSuffix(suffixToParse, containsLocator);
|
|
thisCitation.locator = locator;
|
|
if (label !== undefined) {
|
|
thisCitation.label = label;
|
|
}
|
|
if (explicitLocator === undefined && explicitLocatorInSuffix === undefined) {
|
|
thisCitation.suffix = suffix;
|
|
}
|
|
else if (suffix !== undefined && thisCitation.locator !== undefined) {
|
|
// If we're here, we should not change the suffix, but parseSuffix may
|
|
// have put something into the suffix return. If we're here, that will
|
|
// definitely be a part of the locator.
|
|
thisCitation.locator += suffix;
|
|
}
|
|
entries.push(thisCitation);
|
|
}
|
|
}
|
|
else {
|
|
// We have an in-text citation, so we can take a shortcut
|
|
isComposite = true;
|
|
entries.push({
|
|
prefix: undefined,
|
|
id: inTextCitation.replace(/{(.+)}/, '$1'),
|
|
'suppress-author': inTextSuppressAuthor !== undefined,
|
|
...parseSuffix(optionalSuffix, false), // Populate more depending on the suffix
|
|
});
|
|
}
|
|
return [entries, isComposite];
|
|
};
|
|
/**
|
|
* This takes a suffix and extracts optional label and locator from this. Pass
|
|
* true for the containsLocator property to indicate to this function that what
|
|
* it got was not a regular suffix with an optional locator, but an explicit
|
|
* locator so it knows it just needs to look for an optional label.
|
|
*
|
|
* @param {string} suffix The suffix to parse
|
|
* @param {boolean} containsLocator If true, forces parseSuffix to return a locator
|
|
*
|
|
* @return {CiteItemSuffix} An object containing three optional properties locator, label, or suffix.
|
|
*/
|
|
function parseSuffix(suffix, containsLocator) {
|
|
/** @type {CiteItemSuffix} */
|
|
const retValue = {
|
|
locator: undefined,
|
|
label: 'page',
|
|
suffix: undefined,
|
|
};
|
|
if (suffix === undefined) {
|
|
return retValue;
|
|
}
|
|
// Make sure the suffix does not start or end with spaces
|
|
suffix = suffix.trim();
|
|
// If there is a label, the suffix must start with it
|
|
for (const label in locatorLabels) {
|
|
for (const natural of locatorLabels[label]) {
|
|
if (suffix.toLowerCase().startsWith(natural.toLowerCase())) {
|
|
retValue.label = label;
|
|
if (containsLocator) {
|
|
// The suffix actually is the full locator, we just had to extract
|
|
// the label from it. There is no remaining suffix.
|
|
retValue.locator = suffix.substr(natural.length).trim();
|
|
}
|
|
else {
|
|
// The caller indicated that this is a regular suffix, so we must also
|
|
// extract the locator from what is left after label extraction.
|
|
retValue.suffix = suffix.substr(natural.length).trim();
|
|
const match = locatorRE.exec(retValue.suffix);
|
|
if (match !== null) {
|
|
retValue.locator = match[0]; // Extract the full match
|
|
retValue.suffix = retValue.suffix.substr(match[0].length).trim();
|
|
}
|
|
}
|
|
return retValue; // Early exit
|
|
}
|
|
}
|
|
}
|
|
// If we're here, there was no explicit label given, but the caller has indicated
|
|
// that this suffix MUST contain a locator. This means that the whole suffix is
|
|
// the locator.
|
|
if (containsLocator) {
|
|
retValue.locator = suffix;
|
|
}
|
|
else {
|
|
// The caller has not indicated that the whole suffix is the locator, so it
|
|
// can be at the beginning. We only accept simple page/number ranges here.
|
|
// For everything, the user should please be more specific.
|
|
const match = locatorRE.exec(suffix);
|
|
if (match !== null) {
|
|
retValue.locator = match[0]; // Full match is the locator
|
|
retValue.suffix = suffix.substr(match[0].length).trim(); // The rest is the suffix.
|
|
}
|
|
}
|
|
return retValue;
|
|
}
|