site/node_modules/rehype-citation/dist/node/src/parse-citation.js
2024-10-14 08:09:33 +02:00

227 lines
9.7 KiB
JavaScript

/**
* @typedef {import('./types').CiteItem} CiteItem
* @typedef {import('./types').CiteItemSuffix} CiteItemSuffix
*/
import { fullCitationRE, locatorRE } from './regex.js';
/**
* The locatorLabels have been sourced from the Citr library. Basically it's just
* a map with valid CSL locator labels and an array of possible natural labels
* which a user might want to write (instead of the standardized labels).
*
* @var {{ [key: string]: string[] }}}
*/
const locatorLabels = {
book: ['Buch', 'Bücher', 'B.', 'book', 'books', 'bk.', 'bks.', 'livre', 'livres', 'liv.'],
chapter: ['Kapitel', 'Kap.', 'chapter', 'chapters', 'chap.', 'chaps', 'chapitre', 'chapitres'],
column: ['Spalte', 'Spalten', 'Sp.', 'column', 'columns', 'col.', 'cols', 'colonne', 'colonnes'],
figure: ['Abbildung', 'Abbildungen', 'Abb.', 'figure', 'figures', 'fig.', 'figs'],
folio: ['Blatt', 'Blätter', 'Fol.', 'folio', 'folios', 'fol.', 'fols', 'fᵒ', 'fᵒˢ'],
issue: [
'Nummer',
'Nummern',
'Nr.',
'number',
'numbers',
'no.',
'nos.',
'numéro',
'numéros',
'nᵒ',
'nᵒˢ',
],
line: ['Zeile', 'Zeilen', 'Z', 'line', 'lines', 'l.', 'll.', 'ligne', 'lignes'],
note: ['Note', 'Noten', 'N.', 'note', 'notes', 'n.', 'nn.'],
opus: ['Opus', 'Opera', 'op.', 'opus', 'opera', 'opp.'],
page: ['Seite', 'Seiten', 'S.', 'page', 'pages', 'p.', 'pp.'],
paragraph: [
'Absatz',
'Absätze',
'Abs.',
'¶',
'¶¶',
'paragraph',
'paragraphs',
'para.',
'paras',
'paragraphe',
'paragraphes',
'paragr.',
],
part: ['Teil', 'Teile', 'part', 'parts', 'pt.', 'pts', 'partie', 'parties', 'part.'],
section: [
'Abschnitt',
'Abschnitte',
'Abschn.',
'§',
'§§',
'section',
'sections',
'sec.',
'secs',
'sect.',
],
'sub verbo': ['sub verbo', 'sub verbis', 's. v.', 's. vv.', 's.v.', 's.vv.'],
verse: ['Vers', 'Verse', 'V.', 'verse', 'verses', 'v.', 'vv.', 'verset', 'versets'],
volume: ['Band', 'Bände', 'Bd.', 'Bde.', 'volume', 'volumes', 'vol.', 'vols.'],
};
/**
* Parses a given citation string and return entries and isComposite flag required for cite-proc.
* Adapted from https://github.com/Zettlr/Zettlr/blob/develop/source/common/util/extract-citations.ts
*
* @param {RegExpMatchArray} regexMatch Cite string in the form of '[@item]' or '@item'
* @return {[CiteItem[], boolean]} [entries, isComposite]
*/
export const parseCitation = (regexMatch) => {
/** @type {CiteItem[]} */
let entries = [];
let isComposite = false;
const fullCitation = regexMatch[1];
const inTextSuppressAuthor = regexMatch[2];
const inTextCitation = regexMatch[3];
const optionalSuffix = regexMatch[4];
if (fullCitation !== undefined) {
// Handle citations in the form of [@item1; @item2]
for (const citationPart of fullCitation.split(';')) {
const match = fullCitationRE.exec(citationPart.trim());
if (match === null) {
continue; // Faulty citation
}
// Prefix is the portion before @ e.g. [see @item1] or an empty string
// We explicitly cast groups since we have groups in our RegExp and as
// such the groups object will be set.
/** @type {CiteItem} */
const thisCitation = {
id: match.groups.citekey.replace(/{(.+)}/, '$1'),
prefix: undefined,
locator: undefined,
label: 'page',
'suppress-author': false,
suffix: undefined,
};
// First, deal with the prefix. The speciality here is that it can
// indicate if we should suppress the author.
const rawPrefix = match.groups.prefix;
if (rawPrefix !== undefined) {
thisCitation['suppress-author'] = rawPrefix.trim().endsWith('-');
if (thisCitation['suppress-author']) {
thisCitation.prefix = rawPrefix.substring(0, rawPrefix.trim().length - 1).trim();
}
else {
thisCitation.prefix = rawPrefix.trim();
}
}
// Second, deal with the suffix. This one can be much more tricky than
// the prefix. We have three alternatives where the locator may be
// present: If we have an explicitLocator or an explicitLocatorInSuffix,
// we should extract the locator from there and leave the actual suffix
// untouched. Only if those two alternatives are not present, then we
// have a look at the rawSuffix and extract a (potential) locator.
const explicitLocator = match.groups.explicitLocator;
const explicitLocatorInSuffix = match.groups.explicitLocatorInSuffix;
const rawSuffix = match.groups.suffix;
let suffixToParse;
let containsLocator = true;
if (explicitLocator === undefined && explicitLocatorInSuffix === undefined) {
// Potential locator in rawSuffix. Only in this case should we overwrite
// the suffix (hence the same if-condition below)
suffixToParse = rawSuffix;
containsLocator = false;
}
else if (explicitLocatorInSuffix !== undefined || explicitLocator !== undefined) {
suffixToParse = explicitLocator !== undefined ? explicitLocator : explicitLocatorInSuffix;
thisCitation.suffix = rawSuffix?.trim();
}
const { label, locator, suffix } = parseSuffix(suffixToParse, containsLocator);
thisCitation.locator = locator;
if (label !== undefined) {
thisCitation.label = label;
}
if (explicitLocator === undefined && explicitLocatorInSuffix === undefined) {
thisCitation.suffix = suffix;
}
else if (suffix !== undefined && thisCitation.locator !== undefined) {
// If we're here, we should not change the suffix, but parseSuffix may
// have put something into the suffix return. If we're here, that will
// definitely be a part of the locator.
thisCitation.locator += suffix;
}
entries.push(thisCitation);
}
}
else {
// We have an in-text citation, so we can take a shortcut
isComposite = true;
entries.push({
prefix: undefined,
id: inTextCitation.replace(/{(.+)}/, '$1'),
'suppress-author': inTextSuppressAuthor !== undefined,
...parseSuffix(optionalSuffix, false), // Populate more depending on the suffix
});
}
return [entries, isComposite];
};
/**
* This takes a suffix and extracts optional label and locator from this. Pass
* true for the containsLocator property to indicate to this function that what
* it got was not a regular suffix with an optional locator, but an explicit
* locator so it knows it just needs to look for an optional label.
*
* @param {string} suffix The suffix to parse
* @param {boolean} containsLocator If true, forces parseSuffix to return a locator
*
* @return {CiteItemSuffix} An object containing three optional properties locator, label, or suffix.
*/
function parseSuffix(suffix, containsLocator) {
/** @type {CiteItemSuffix} */
const retValue = {
locator: undefined,
label: 'page',
suffix: undefined,
};
if (suffix === undefined) {
return retValue;
}
// Make sure the suffix does not start or end with spaces
suffix = suffix.trim();
// If there is a label, the suffix must start with it
for (const label in locatorLabels) {
for (const natural of locatorLabels[label]) {
if (suffix.toLowerCase().startsWith(natural.toLowerCase())) {
retValue.label = label;
if (containsLocator) {
// The suffix actually is the full locator, we just had to extract
// the label from it. There is no remaining suffix.
retValue.locator = suffix.substr(natural.length).trim();
}
else {
// The caller indicated that this is a regular suffix, so we must also
// extract the locator from what is left after label extraction.
retValue.suffix = suffix.substr(natural.length).trim();
const match = locatorRE.exec(retValue.suffix);
if (match !== null) {
retValue.locator = match[0]; // Extract the full match
retValue.suffix = retValue.suffix.substr(match[0].length).trim();
}
}
return retValue; // Early exit
}
}
}
// If we're here, there was no explicit label given, but the caller has indicated
// that this suffix MUST contain a locator. This means that the whole suffix is
// the locator.
if (containsLocator) {
retValue.locator = suffix;
}
else {
// The caller has not indicated that the whole suffix is the locator, so it
// can be at the beginning. We only accept simple page/number ranges here.
// For everything, the user should please be more specific.
const match = locatorRE.exec(suffix);
if (match !== null) {
retValue.locator = match[0]; // Full match is the locator
retValue.suffix = suffix.substr(match[0].length).trim(); // The rest is the suffix.
}
}
return retValue;
}