/** * @typedef {import('./types').CiteItem} CiteItem * @typedef {import('./types').CiteItemSuffix} CiteItemSuffix */ import { fullCitationRE, locatorRE } from './regex.js'; /** * The locatorLabels have been sourced from the Citr library. Basically it's just * a map with valid CSL locator labels and an array of possible natural labels * which a user might want to write (instead of the standardized labels). * * @var {{ [key: string]: string[] }}} */ const locatorLabels = { book: ['Buch', 'Bücher', 'B.', 'book', 'books', 'bk.', 'bks.', 'livre', 'livres', 'liv.'], chapter: ['Kapitel', 'Kap.', 'chapter', 'chapters', 'chap.', 'chaps', 'chapitre', 'chapitres'], column: ['Spalte', 'Spalten', 'Sp.', 'column', 'columns', 'col.', 'cols', 'colonne', 'colonnes'], figure: ['Abbildung', 'Abbildungen', 'Abb.', 'figure', 'figures', 'fig.', 'figs'], folio: ['Blatt', 'Blätter', 'Fol.', 'folio', 'folios', 'fol.', 'fols', 'fᵒ', 'fᵒˢ'], issue: [ 'Nummer', 'Nummern', 'Nr.', 'number', 'numbers', 'no.', 'nos.', 'numéro', 'numéros', 'nᵒ', 'nᵒˢ', ], line: ['Zeile', 'Zeilen', 'Z', 'line', 'lines', 'l.', 'll.', 'ligne', 'lignes'], note: ['Note', 'Noten', 'N.', 'note', 'notes', 'n.', 'nn.'], opus: ['Opus', 'Opera', 'op.', 'opus', 'opera', 'opp.'], page: ['Seite', 'Seiten', 'S.', 'page', 'pages', 'p.', 'pp.'], paragraph: [ 'Absatz', 'Absätze', 'Abs.', '¶', '¶¶', 'paragraph', 'paragraphs', 'para.', 'paras', 'paragraphe', 'paragraphes', 'paragr.', ], part: ['Teil', 'Teile', 'part', 'parts', 'pt.', 'pts', 'partie', 'parties', 'part.'], section: [ 'Abschnitt', 'Abschnitte', 'Abschn.', '§', '§§', 'section', 'sections', 'sec.', 'secs', 'sect.', ], 'sub verbo': ['sub verbo', 'sub verbis', 's. v.', 's. vv.', 's.v.', 's.vv.'], verse: ['Vers', 'Verse', 'V.', 'verse', 'verses', 'v.', 'vv.', 'verset', 'versets'], volume: ['Band', 'Bände', 'Bd.', 'Bde.', 'volume', 'volumes', 'vol.', 'vols.'], }; /** * Parses a given citation string and return entries and isComposite flag required for cite-proc. * Adapted from https://github.com/Zettlr/Zettlr/blob/develop/source/common/util/extract-citations.ts * * @param {RegExpMatchArray} regexMatch Cite string in the form of '[@item]' or '@item' * @return {[CiteItem[], boolean]} [entries, isComposite] */ export const parseCitation = (regexMatch) => { /** @type {CiteItem[]} */ let entries = []; let isComposite = false; const fullCitation = regexMatch[1]; const inTextSuppressAuthor = regexMatch[2]; const inTextCitation = regexMatch[3]; const optionalSuffix = regexMatch[4]; if (fullCitation !== undefined) { // Handle citations in the form of [@item1; @item2] for (const citationPart of fullCitation.split(';')) { const match = fullCitationRE.exec(citationPart.trim()); if (match === null) { continue; // Faulty citation } // Prefix is the portion before @ e.g. [see @item1] or an empty string // We explicitly cast groups since we have groups in our RegExp and as // such the groups object will be set. /** @type {CiteItem} */ const thisCitation = { id: match.groups.citekey.replace(/{(.+)}/, '$1'), prefix: undefined, locator: undefined, label: 'page', 'suppress-author': false, suffix: undefined, }; // First, deal with the prefix. The speciality here is that it can // indicate if we should suppress the author. const rawPrefix = match.groups.prefix; if (rawPrefix !== undefined) { thisCitation['suppress-author'] = rawPrefix.trim().endsWith('-'); if (thisCitation['suppress-author']) { thisCitation.prefix = rawPrefix.substring(0, rawPrefix.trim().length - 1).trim(); } else { thisCitation.prefix = rawPrefix.trim(); } } // Second, deal with the suffix. This one can be much more tricky than // the prefix. We have three alternatives where the locator may be // present: If we have an explicitLocator or an explicitLocatorInSuffix, // we should extract the locator from there and leave the actual suffix // untouched. Only if those two alternatives are not present, then we // have a look at the rawSuffix and extract a (potential) locator. const explicitLocator = match.groups.explicitLocator; const explicitLocatorInSuffix = match.groups.explicitLocatorInSuffix; const rawSuffix = match.groups.suffix; let suffixToParse; let containsLocator = true; if (explicitLocator === undefined && explicitLocatorInSuffix === undefined) { // Potential locator in rawSuffix. Only in this case should we overwrite // the suffix (hence the same if-condition below) suffixToParse = rawSuffix; containsLocator = false; } else if (explicitLocatorInSuffix !== undefined || explicitLocator !== undefined) { suffixToParse = explicitLocator !== undefined ? explicitLocator : explicitLocatorInSuffix; thisCitation.suffix = rawSuffix?.trim(); } const { label, locator, suffix } = parseSuffix(suffixToParse, containsLocator); thisCitation.locator = locator; if (label !== undefined) { thisCitation.label = label; } if (explicitLocator === undefined && explicitLocatorInSuffix === undefined) { thisCitation.suffix = suffix; } else if (suffix !== undefined && thisCitation.locator !== undefined) { // If we're here, we should not change the suffix, but parseSuffix may // have put something into the suffix return. If we're here, that will // definitely be a part of the locator. thisCitation.locator += suffix; } entries.push(thisCitation); } } else { // We have an in-text citation, so we can take a shortcut isComposite = true; entries.push({ prefix: undefined, id: inTextCitation.replace(/{(.+)}/, '$1'), 'suppress-author': inTextSuppressAuthor !== undefined, ...parseSuffix(optionalSuffix, false), // Populate more depending on the suffix }); } return [entries, isComposite]; }; /** * This takes a suffix and extracts optional label and locator from this. Pass * true for the containsLocator property to indicate to this function that what * it got was not a regular suffix with an optional locator, but an explicit * locator so it knows it just needs to look for an optional label. * * @param {string} suffix The suffix to parse * @param {boolean} containsLocator If true, forces parseSuffix to return a locator * * @return {CiteItemSuffix} An object containing three optional properties locator, label, or suffix. */ function parseSuffix(suffix, containsLocator) { /** @type {CiteItemSuffix} */ const retValue = { locator: undefined, label: 'page', suffix: undefined, }; if (suffix === undefined) { return retValue; } // Make sure the suffix does not start or end with spaces suffix = suffix.trim(); // If there is a label, the suffix must start with it for (const label in locatorLabels) { for (const natural of locatorLabels[label]) { if (suffix.toLowerCase().startsWith(natural.toLowerCase())) { retValue.label = label; if (containsLocator) { // The suffix actually is the full locator, we just had to extract // the label from it. There is no remaining suffix. retValue.locator = suffix.substr(natural.length).trim(); } else { // The caller indicated that this is a regular suffix, so we must also // extract the locator from what is left after label extraction. retValue.suffix = suffix.substr(natural.length).trim(); const match = locatorRE.exec(retValue.suffix); if (match !== null) { retValue.locator = match[0]; // Extract the full match retValue.suffix = retValue.suffix.substr(match[0].length).trim(); } } return retValue; // Early exit } } } // If we're here, there was no explicit label given, but the caller has indicated // that this suffix MUST contain a locator. This means that the whole suffix is // the locator. if (containsLocator) { retValue.locator = suffix; } else { // The caller has not indicated that the whole suffix is the locator, so it // can be at the beginning. We only accept simple page/number ranges here. // For everything, the user should please be more specific. const match = locatorRE.exec(suffix); if (match !== null) { retValue.locator = match[0]; // Full match is the locator retValue.suffix = suffix.substr(match[0].length).trim(); // The rest is the suffix. } } return retValue; }