site/node_modules/retext-smartypants/lib/index.js
2024-10-14 08:09:33 +02:00

372 lines
9.5 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* @typedef {import('nlcst').Parents} Parents
* @typedef {import('nlcst').Punctuation} Punctuation
* @typedef {import('nlcst').Root} Root
* @typedef {import('nlcst').SentenceContent} SentenceContent
* @typedef {import('nlcst').Symbol} Symbol
*/
/**
* @callback Method
* Transform.
* @param {State} state
* Info passed around.
* @param {Punctuation | Symbol} node
* Node.
* @param {number} index
* Position of `node` in `parent`.
* @param {Parents} parent
* Parent of `node`.
* @returns {undefined}
* Nothing.
*
* @typedef Options
* Configuration.
* @property {'all' | boolean | null | undefined} [backticks=true]
* Transform backticks (default: `true`); when `true`, turns double
* backticks into an opening double quote and double straight single quotes
* into a closing double quote; when `'all'`, does that and turns single
* backticks into an opening single quote and a straight single quotes into
* a closing single smart quote; `quotes: false` must be used with
* `backticks: 'all'`.
* @property {QuoteCharacterMap | null | undefined} [closingQuotes]
* Closing quotes to use (default: `{double: '”', single: ''}`).
* @property {'inverted' | 'oldschool' | boolean | null | undefined} [dashes=true]
* Transform dashes (default: `true`);
* when `true`, turns two dashes into an em dash character;
* when `'oldschool'`, turns three dashes into an em dash and two into an en
* dash;
* when `'inverted'`, turns three dashes into an en dash and two into an em
* dash.
* @property {'spaced' | 'unspaced' | boolean | null | undefined} [ellipses=true]
* Transform triple dots (default: `true`).
* when `'spaced'`, turns triple dots with spaces into ellipses;
* when `'unspaced'`, turns triple dots without spaces into ellipses;
* when `true`, turns triple dots with or without spaces into ellipses.
* @property {QuoteCharacterMap | null | undefined} [openingQuotes]
* Opening quotes to use (default: `{double: '“', single: ''}`).
* @property {boolean | null | undefined} [quotes=true]
* Transform straight quotes into smart quotes (default: `true`).
*
* @typedef State
* Info passed around.
* @property {Quotes} close
* Closing quotes.
* @property {Quotes} open
* Opening quotes.
*
* @typedef QuoteCharacterMap
* Quote characters.
* @property {string} double
* Character to use for double quotes.
* @property {string} single
* Character to use for single quotes.
*
* @typedef {[string, string]} Quotes
* Quotes.
*/
import {visit} from 'unist-util-visit'
import {toString} from 'nlcst-to-string'
/** @type {Quotes} */
const defaultClosingQuotes = ['”', '']
/** @type {Quotes} */
const defaultOpeningQuotes = ['“', '']
/** @type {Readonly<Options>} */
const emptyOptions = {}
/**
* Replace straight punctuation marks with curly ones.
*
* @param {Readonly<Options> | null | undefined} [options]
* Configuration (optional).
* @returns
* Transform.
*/
export default function retextSmartypants(options) {
const settings = options || emptyOptions
/** @type {Array<Method>} */
const methods = []
if (settings.quotes !== false) {
methods.push(quotesDefault)
}
if (settings.ellipses === 'spaced') {
methods.push(ellipsesSpaced)
} else if (settings.ellipses === 'unspaced') {
methods.push(ellipsesUnspaced)
} else if (settings.ellipses !== false) {
methods.push(ellipsesDefault)
}
if (settings.backticks === 'all') {
if (settings.quotes !== false) {
throw new Error("Cannot accept `backticks: 'all'` with `quotes: true`")
}
methods.push(backticksAll)
} else if (settings.backticks !== false) {
methods.push(backticksDefault)
}
if (settings.dashes === 'inverted') {
methods.push(dashesInverted)
} else if (settings.dashes === 'oldschool') {
methods.push(dashesOldschool)
} else if (settings.dashes !== false) {
methods.push(dashesDefault)
}
/** @type {State} */
const state = {
close: settings.closingQuotes
? [settings.closingQuotes.double, settings.closingQuotes.single]
: defaultClosingQuotes,
open: settings.openingQuotes
? [settings.openingQuotes.double, settings.openingQuotes.single]
: defaultOpeningQuotes
}
/**
* Transform.
*
* @param {Root} tree
* Tree.
* @returns {undefined}
* Nothing.
*/
return function (tree) {
visit(tree, function (node, position, parent) {
let index = -1
if (
parent &&
position !== undefined &&
(node.type === 'PunctuationNode' || node.type === 'SymbolNode')
) {
while (++index < methods.length) {
methods[index](state, node, position, parent)
}
}
})
}
}
/**
* Transform single and double backticks and single quotes into smart quotes.
*
* @type {Method}
*/
function backticksAll(state, node, index, parent) {
backticksDefault(state, node, index, parent)
if (node.value === '`') {
node.value = ''
} else if (node.value === "'") {
node.value = ''
}
}
/**
* Transform double backticks and single quotes into smart quotes.
*
* @type {Method}
*/
function backticksDefault(_, node) {
if (node.value === '``') {
node.value = '“'
} else if (node.value === "''") {
node.value = '”'
}
}
/**
* Transform two dashes into an em dash.
*
* @type {Method}
*/
function dashesDefault(_, node) {
if (node.value === '--') {
node.value = '—'
}
}
/**
* Transform three dashes into an en dash, and two into an em dash.
*
* @type {Method}
*/
function dashesInverted(_, node) {
if (node.value === '---') {
node.value = ''
} else if (node.value === '--') {
node.value = '—'
}
}
/**
* Transform three dashes into an em dash, and two into an en dash.
*
* @type {Method}
*/
function dashesOldschool(_, node) {
if (node.value === '---') {
node.value = '—'
} else if (node.value === '--') {
node.value = ''
}
}
/**
* Transform multiple dots into unicode ellipses.
*
* @type {Method}
*/
function ellipsesDefault(_, node, index, parent) {
ellipsesSpaced(_, node, index, parent)
ellipsesUnspaced(_, node, index, parent)
}
/**
* Transform multiple dots with spaces into unicode ellipses.
*
* @type {Method}
*/
function ellipsesSpaced(_, node, index, parent) {
const value = node.value
const siblings = parent.children
if (!/^\.+$/.test(value)) {
return
}
// Search for dot-nodes with whitespace between.
/** @type {Array<SentenceContent>} */
const nodes = []
let position = index
let count = 1
// Its possible that the node is merged with an adjacent word-node. In that
// code, we cannot transform it because theres no reference to the
// grandparent.
while (--position > 0) {
let sibling = siblings[position]
if (sibling.type !== 'WhiteSpaceNode') {
break
}
const queue = sibling
sibling = siblings[--position]
if (
sibling &&
(sibling.type === 'PunctuationNode' || sibling.type === 'SymbolNode') &&
/^\.+$/.test(sibling.value)
) {
nodes.push(queue, sibling)
count++
continue
}
break
}
if (count < 3) {
return
}
siblings.splice(index - nodes.length, nodes.length)
node.value = '…'
}
/**
* Transform multiple dots without spaces into unicode ellipses.
*
* @type {Method}
*/
function ellipsesUnspaced(_, node) {
// Simple node with three dots and without whitespace.
if (/^\.{3,}$/.test(node.value)) {
node.value = '…'
}
}
/**
* Transform straight single- and double quotes into smart quotes.
*
* @type {Method}
*/
// eslint-disable-next-line complexity
function quotesDefault(state, node, index, parent) {
const siblings = parent.children
const value = node.value
if (value !== '"' && value !== "'") {
return
}
const quoteIndex = value === '"' ? 0 : 1
const previous = siblings[index - 1]
const next = siblings[index + 1]
const nextNext = siblings[index + 2]
const nextValue = next ? toString(next) : ''
if (
next &&
nextNext &&
(next.type === 'PunctuationNode' || next.type === 'SymbolNode') &&
nextNext.type !== 'WordNode'
) {
// Special case if the very first character is a quote followed by
// punctuation at a non-word-break. Close the quotes by brute force.
node.value = state.close[quoteIndex]
} else if (
next &&
(next.type === 'PunctuationNode' || next.type === 'SymbolNode') &&
(nextValue === '"' || nextValue === "'") &&
nextNext &&
nextNext.type === 'WordNode'
) {
// Special case for double sets of quotes:
// `He said, "'Quoted' words in a larger quote."`
node.value = state.open[quoteIndex]
next.value = state.open[nextValue === '"' ? 0 : 1]
} else if (next && /^\d\ds$/.test(nextValue)) {
// Special case for decade abbreviations: `the '80s`
node.value = state.close[quoteIndex]
} else if (
previous &&
(previous.type === 'WhiteSpaceNode' ||
previous.type === 'PunctuationNode' ||
previous.type === 'SymbolNode') &&
next &&
next.type === 'WordNode'
) {
// Get most opening single quotes.
node.value = state.open[quoteIndex]
} else if (
previous &&
previous.type !== 'WhiteSpaceNode' &&
previous.type !== 'SymbolNode' &&
previous.type !== 'PunctuationNode'
) {
// Closing quotes.
node.value = state.close[quoteIndex]
} else if (
!next ||
next.type === 'WhiteSpaceNode' ||
(value === "'" && nextValue === 's')
) {
node.value = state.close[quoteIndex]
} else {
node.value = state.open[quoteIndex]
}
}