site/node_modules/retext-smartypants/lib/index.js

372 lines
9.5 KiB
JavaScript
Raw Permalink Normal View History

2024-10-14 06:09:33 +00:00
/**
* @typedef {import('nlcst').Parents} Parents
* @typedef {import('nlcst').Punctuation} Punctuation
* @typedef {import('nlcst').Root} Root
* @typedef {import('nlcst').SentenceContent} SentenceContent
* @typedef {import('nlcst').Symbol} Symbol
*/
/**
* @callback Method
* Transform.
* @param {State} state
* Info passed around.
* @param {Punctuation | Symbol} node
* Node.
* @param {number} index
* Position of `node` in `parent`.
* @param {Parents} parent
* Parent of `node`.
* @returns {undefined}
* Nothing.
*
* @typedef Options
* Configuration.
* @property {'all' | boolean | null | undefined} [backticks=true]
* Transform backticks (default: `true`); when `true`, turns double
* backticks into an opening double quote and double straight single quotes
* into a closing double quote; when `'all'`, does that and turns single
* backticks into an opening single quote and a straight single quotes into
* a closing single smart quote; `quotes: false` must be used with
* `backticks: 'all'`.
* @property {QuoteCharacterMap | null | undefined} [closingQuotes]
* Closing quotes to use (default: `{double: '”', single: ''}`).
* @property {'inverted' | 'oldschool' | boolean | null | undefined} [dashes=true]
* Transform dashes (default: `true`);
* when `true`, turns two dashes into an em dash character;
* when `'oldschool'`, turns three dashes into an em dash and two into an en
* dash;
* when `'inverted'`, turns three dashes into an en dash and two into an em
* dash.
* @property {'spaced' | 'unspaced' | boolean | null | undefined} [ellipses=true]
* Transform triple dots (default: `true`).
* when `'spaced'`, turns triple dots with spaces into ellipses;
* when `'unspaced'`, turns triple dots without spaces into ellipses;
* when `true`, turns triple dots with or without spaces into ellipses.
* @property {QuoteCharacterMap | null | undefined} [openingQuotes]
* Opening quotes to use (default: `{double: '“', single: ''}`).
* @property {boolean | null | undefined} [quotes=true]
* Transform straight quotes into smart quotes (default: `true`).
*
* @typedef State
* Info passed around.
* @property {Quotes} close
* Closing quotes.
* @property {Quotes} open
* Opening quotes.
*
* @typedef QuoteCharacterMap
* Quote characters.
* @property {string} double
* Character to use for double quotes.
* @property {string} single
* Character to use for single quotes.
*
* @typedef {[string, string]} Quotes
* Quotes.
*/
import {visit} from 'unist-util-visit'
import {toString} from 'nlcst-to-string'
/** @type {Quotes} */
const defaultClosingQuotes = ['”', '']
/** @type {Quotes} */
const defaultOpeningQuotes = ['“', '']
/** @type {Readonly<Options>} */
const emptyOptions = {}
/**
* Replace straight punctuation marks with curly ones.
*
* @param {Readonly<Options> | null | undefined} [options]
* Configuration (optional).
* @returns
* Transform.
*/
export default function retextSmartypants(options) {
const settings = options || emptyOptions
/** @type {Array<Method>} */
const methods = []
if (settings.quotes !== false) {
methods.push(quotesDefault)
}
if (settings.ellipses === 'spaced') {
methods.push(ellipsesSpaced)
} else if (settings.ellipses === 'unspaced') {
methods.push(ellipsesUnspaced)
} else if (settings.ellipses !== false) {
methods.push(ellipsesDefault)
}
if (settings.backticks === 'all') {
if (settings.quotes !== false) {
throw new Error("Cannot accept `backticks: 'all'` with `quotes: true`")
}
methods.push(backticksAll)
} else if (settings.backticks !== false) {
methods.push(backticksDefault)
}
if (settings.dashes === 'inverted') {
methods.push(dashesInverted)
} else if (settings.dashes === 'oldschool') {
methods.push(dashesOldschool)
} else if (settings.dashes !== false) {
methods.push(dashesDefault)
}
/** @type {State} */
const state = {
close: settings.closingQuotes
? [settings.closingQuotes.double, settings.closingQuotes.single]
: defaultClosingQuotes,
open: settings.openingQuotes
? [settings.openingQuotes.double, settings.openingQuotes.single]
: defaultOpeningQuotes
}
/**
* Transform.
*
* @param {Root} tree
* Tree.
* @returns {undefined}
* Nothing.
*/
return function (tree) {
visit(tree, function (node, position, parent) {
let index = -1
if (
parent &&
position !== undefined &&
(node.type === 'PunctuationNode' || node.type === 'SymbolNode')
) {
while (++index < methods.length) {
methods[index](state, node, position, parent)
}
}
})
}
}
/**
* Transform single and double backticks and single quotes into smart quotes.
*
* @type {Method}
*/
function backticksAll(state, node, index, parent) {
backticksDefault(state, node, index, parent)
if (node.value === '`') {
node.value = ''
} else if (node.value === "'") {
node.value = ''
}
}
/**
* Transform double backticks and single quotes into smart quotes.
*
* @type {Method}
*/
function backticksDefault(_, node) {
if (node.value === '``') {
node.value = '“'
} else if (node.value === "''") {
node.value = '”'
}
}
/**
* Transform two dashes into an em dash.
*
* @type {Method}
*/
function dashesDefault(_, node) {
if (node.value === '--') {
node.value = '—'
}
}
/**
* Transform three dashes into an en dash, and two into an em dash.
*
* @type {Method}
*/
function dashesInverted(_, node) {
if (node.value === '---') {
node.value = ''
} else if (node.value === '--') {
node.value = '—'
}
}
/**
* Transform three dashes into an em dash, and two into an en dash.
*
* @type {Method}
*/
function dashesOldschool(_, node) {
if (node.value === '---') {
node.value = '—'
} else if (node.value === '--') {
node.value = ''
}
}
/**
* Transform multiple dots into unicode ellipses.
*
* @type {Method}
*/
function ellipsesDefault(_, node, index, parent) {
ellipsesSpaced(_, node, index, parent)
ellipsesUnspaced(_, node, index, parent)
}
/**
* Transform multiple dots with spaces into unicode ellipses.
*
* @type {Method}
*/
function ellipsesSpaced(_, node, index, parent) {
const value = node.value
const siblings = parent.children
if (!/^\.+$/.test(value)) {
return
}
// Search for dot-nodes with whitespace between.
/** @type {Array<SentenceContent>} */
const nodes = []
let position = index
let count = 1
// Its possible that the node is merged with an adjacent word-node. In that
// code, we cannot transform it because theres no reference to the
// grandparent.
while (--position > 0) {
let sibling = siblings[position]
if (sibling.type !== 'WhiteSpaceNode') {
break
}
const queue = sibling
sibling = siblings[--position]
if (
sibling &&
(sibling.type === 'PunctuationNode' || sibling.type === 'SymbolNode') &&
/^\.+$/.test(sibling.value)
) {
nodes.push(queue, sibling)
count++
continue
}
break
}
if (count < 3) {
return
}
siblings.splice(index - nodes.length, nodes.length)
node.value = '…'
}
/**
* Transform multiple dots without spaces into unicode ellipses.
*
* @type {Method}
*/
function ellipsesUnspaced(_, node) {
// Simple node with three dots and without whitespace.
if (/^\.{3,}$/.test(node.value)) {
node.value = '…'
}
}
/**
* Transform straight single- and double quotes into smart quotes.
*
* @type {Method}
*/
// eslint-disable-next-line complexity
function quotesDefault(state, node, index, parent) {
const siblings = parent.children
const value = node.value
if (value !== '"' && value !== "'") {
return
}
const quoteIndex = value === '"' ? 0 : 1
const previous = siblings[index - 1]
const next = siblings[index + 1]
const nextNext = siblings[index + 2]
const nextValue = next ? toString(next) : ''
if (
next &&
nextNext &&
(next.type === 'PunctuationNode' || next.type === 'SymbolNode') &&
nextNext.type !== 'WordNode'
) {
// Special case if the very first character is a quote followed by
// punctuation at a non-word-break. Close the quotes by brute force.
node.value = state.close[quoteIndex]
} else if (
next &&
(next.type === 'PunctuationNode' || next.type === 'SymbolNode') &&
(nextValue === '"' || nextValue === "'") &&
nextNext &&
nextNext.type === 'WordNode'
) {
// Special case for double sets of quotes:
// `He said, "'Quoted' words in a larger quote."`
node.value = state.open[quoteIndex]
next.value = state.open[nextValue === '"' ? 0 : 1]
} else if (next && /^\d\ds$/.test(nextValue)) {
// Special case for decade abbreviations: `the '80s`
node.value = state.close[quoteIndex]
} else if (
previous &&
(previous.type === 'WhiteSpaceNode' ||
previous.type === 'PunctuationNode' ||
previous.type === 'SymbolNode') &&
next &&
next.type === 'WordNode'
) {
// Get most opening single quotes.
node.value = state.open[quoteIndex]
} else if (
previous &&
previous.type !== 'WhiteSpaceNode' &&
previous.type !== 'SymbolNode' &&
previous.type !== 'PunctuationNode'
) {
// Closing quotes.
node.value = state.close[quoteIndex]
} else if (
!next ||
next.type === 'WhiteSpaceNode' ||
(value === "'" && nextValue === 's')
) {
node.value = state.close[quoteIndex]
} else {
node.value = state.open[quoteIndex]
}
}