372 lines
9.5 KiB
JavaScript
372 lines
9.5 KiB
JavaScript
|
/**
|
|||
|
* @typedef {import('nlcst').Parents} Parents
|
|||
|
* @typedef {import('nlcst').Punctuation} Punctuation
|
|||
|
* @typedef {import('nlcst').Root} Root
|
|||
|
* @typedef {import('nlcst').SentenceContent} SentenceContent
|
|||
|
* @typedef {import('nlcst').Symbol} Symbol
|
|||
|
*/
|
|||
|
|
|||
|
/**
|
|||
|
* @callback Method
|
|||
|
* Transform.
|
|||
|
* @param {State} state
|
|||
|
* Info passed around.
|
|||
|
* @param {Punctuation | Symbol} node
|
|||
|
* Node.
|
|||
|
* @param {number} index
|
|||
|
* Position of `node` in `parent`.
|
|||
|
* @param {Parents} parent
|
|||
|
* Parent of `node`.
|
|||
|
* @returns {undefined}
|
|||
|
* Nothing.
|
|||
|
*
|
|||
|
* @typedef Options
|
|||
|
* Configuration.
|
|||
|
* @property {'all' | boolean | null | undefined} [backticks=true]
|
|||
|
* Transform backticks (default: `true`); when `true`, turns double
|
|||
|
* backticks into an opening double quote and double straight single quotes
|
|||
|
* into a closing double quote; when `'all'`, does that and turns single
|
|||
|
* backticks into an opening single quote and a straight single quotes into
|
|||
|
* a closing single smart quote; `quotes: false` must be used with
|
|||
|
* `backticks: 'all'`.
|
|||
|
* @property {QuoteCharacterMap | null | undefined} [closingQuotes]
|
|||
|
* Closing quotes to use (default: `{double: '”', single: '’'}`).
|
|||
|
* @property {'inverted' | 'oldschool' | boolean | null | undefined} [dashes=true]
|
|||
|
* Transform dashes (default: `true`);
|
|||
|
* when `true`, turns two dashes into an em dash character;
|
|||
|
* when `'oldschool'`, turns three dashes into an em dash and two into an en
|
|||
|
* dash;
|
|||
|
* when `'inverted'`, turns three dashes into an en dash and two into an em
|
|||
|
* dash.
|
|||
|
* @property {'spaced' | 'unspaced' | boolean | null | undefined} [ellipses=true]
|
|||
|
* Transform triple dots (default: `true`).
|
|||
|
* when `'spaced'`, turns triple dots with spaces into ellipses;
|
|||
|
* when `'unspaced'`, turns triple dots without spaces into ellipses;
|
|||
|
* when `true`, turns triple dots with or without spaces into ellipses.
|
|||
|
* @property {QuoteCharacterMap | null | undefined} [openingQuotes]
|
|||
|
* Opening quotes to use (default: `{double: '“', single: '‘'}`).
|
|||
|
* @property {boolean | null | undefined} [quotes=true]
|
|||
|
* Transform straight quotes into smart quotes (default: `true`).
|
|||
|
*
|
|||
|
* @typedef State
|
|||
|
* Info passed around.
|
|||
|
* @property {Quotes} close
|
|||
|
* Closing quotes.
|
|||
|
* @property {Quotes} open
|
|||
|
* Opening quotes.
|
|||
|
*
|
|||
|
* @typedef QuoteCharacterMap
|
|||
|
* Quote characters.
|
|||
|
* @property {string} double
|
|||
|
* Character to use for double quotes.
|
|||
|
* @property {string} single
|
|||
|
* Character to use for single quotes.
|
|||
|
*
|
|||
|
* @typedef {[string, string]} Quotes
|
|||
|
* Quotes.
|
|||
|
*/
|
|||
|
|
|||
|
import {visit} from 'unist-util-visit'
|
|||
|
import {toString} from 'nlcst-to-string'
|
|||
|
|
|||
|
/** @type {Quotes} */
|
|||
|
const defaultClosingQuotes = ['”', '’']
|
|||
|
/** @type {Quotes} */
|
|||
|
const defaultOpeningQuotes = ['“', '‘']
|
|||
|
|
|||
|
/** @type {Readonly<Options>} */
|
|||
|
const emptyOptions = {}
|
|||
|
|
|||
|
/**
|
|||
|
* Replace straight punctuation marks with curly ones.
|
|||
|
*
|
|||
|
* @param {Readonly<Options> | null | undefined} [options]
|
|||
|
* Configuration (optional).
|
|||
|
* @returns
|
|||
|
* Transform.
|
|||
|
*/
|
|||
|
export default function retextSmartypants(options) {
|
|||
|
const settings = options || emptyOptions
|
|||
|
/** @type {Array<Method>} */
|
|||
|
const methods = []
|
|||
|
|
|||
|
if (settings.quotes !== false) {
|
|||
|
methods.push(quotesDefault)
|
|||
|
}
|
|||
|
|
|||
|
if (settings.ellipses === 'spaced') {
|
|||
|
methods.push(ellipsesSpaced)
|
|||
|
} else if (settings.ellipses === 'unspaced') {
|
|||
|
methods.push(ellipsesUnspaced)
|
|||
|
} else if (settings.ellipses !== false) {
|
|||
|
methods.push(ellipsesDefault)
|
|||
|
}
|
|||
|
|
|||
|
if (settings.backticks === 'all') {
|
|||
|
if (settings.quotes !== false) {
|
|||
|
throw new Error("Cannot accept `backticks: 'all'` with `quotes: true`")
|
|||
|
}
|
|||
|
|
|||
|
methods.push(backticksAll)
|
|||
|
} else if (settings.backticks !== false) {
|
|||
|
methods.push(backticksDefault)
|
|||
|
}
|
|||
|
|
|||
|
if (settings.dashes === 'inverted') {
|
|||
|
methods.push(dashesInverted)
|
|||
|
} else if (settings.dashes === 'oldschool') {
|
|||
|
methods.push(dashesOldschool)
|
|||
|
} else if (settings.dashes !== false) {
|
|||
|
methods.push(dashesDefault)
|
|||
|
}
|
|||
|
|
|||
|
/** @type {State} */
|
|||
|
const state = {
|
|||
|
close: settings.closingQuotes
|
|||
|
? [settings.closingQuotes.double, settings.closingQuotes.single]
|
|||
|
: defaultClosingQuotes,
|
|||
|
open: settings.openingQuotes
|
|||
|
? [settings.openingQuotes.double, settings.openingQuotes.single]
|
|||
|
: defaultOpeningQuotes
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform.
|
|||
|
*
|
|||
|
* @param {Root} tree
|
|||
|
* Tree.
|
|||
|
* @returns {undefined}
|
|||
|
* Nothing.
|
|||
|
*/
|
|||
|
return function (tree) {
|
|||
|
visit(tree, function (node, position, parent) {
|
|||
|
let index = -1
|
|||
|
|
|||
|
if (
|
|||
|
parent &&
|
|||
|
position !== undefined &&
|
|||
|
(node.type === 'PunctuationNode' || node.type === 'SymbolNode')
|
|||
|
) {
|
|||
|
while (++index < methods.length) {
|
|||
|
methods[index](state, node, position, parent)
|
|||
|
}
|
|||
|
}
|
|||
|
})
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform single and double backticks and single quotes into smart quotes.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
function backticksAll(state, node, index, parent) {
|
|||
|
backticksDefault(state, node, index, parent)
|
|||
|
|
|||
|
if (node.value === '`') {
|
|||
|
node.value = '‘'
|
|||
|
} else if (node.value === "'") {
|
|||
|
node.value = '’'
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform double backticks and single quotes into smart quotes.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
function backticksDefault(_, node) {
|
|||
|
if (node.value === '``') {
|
|||
|
node.value = '“'
|
|||
|
} else if (node.value === "''") {
|
|||
|
node.value = '”'
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform two dashes into an em dash.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
function dashesDefault(_, node) {
|
|||
|
if (node.value === '--') {
|
|||
|
node.value = '—'
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform three dashes into an en dash, and two into an em dash.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
function dashesInverted(_, node) {
|
|||
|
if (node.value === '---') {
|
|||
|
node.value = '–'
|
|||
|
} else if (node.value === '--') {
|
|||
|
node.value = '—'
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform three dashes into an em dash, and two into an en dash.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
function dashesOldschool(_, node) {
|
|||
|
if (node.value === '---') {
|
|||
|
node.value = '—'
|
|||
|
} else if (node.value === '--') {
|
|||
|
node.value = '–'
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform multiple dots into unicode ellipses.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
function ellipsesDefault(_, node, index, parent) {
|
|||
|
ellipsesSpaced(_, node, index, parent)
|
|||
|
ellipsesUnspaced(_, node, index, parent)
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform multiple dots with spaces into unicode ellipses.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
function ellipsesSpaced(_, node, index, parent) {
|
|||
|
const value = node.value
|
|||
|
const siblings = parent.children
|
|||
|
|
|||
|
if (!/^\.+$/.test(value)) {
|
|||
|
return
|
|||
|
}
|
|||
|
|
|||
|
// Search for dot-nodes with whitespace between.
|
|||
|
/** @type {Array<SentenceContent>} */
|
|||
|
const nodes = []
|
|||
|
let position = index
|
|||
|
let count = 1
|
|||
|
|
|||
|
// It’s possible that the node is merged with an adjacent word-node. In that
|
|||
|
// code, we cannot transform it because there’s no reference to the
|
|||
|
// grandparent.
|
|||
|
while (--position > 0) {
|
|||
|
let sibling = siblings[position]
|
|||
|
|
|||
|
if (sibling.type !== 'WhiteSpaceNode') {
|
|||
|
break
|
|||
|
}
|
|||
|
|
|||
|
const queue = sibling
|
|||
|
sibling = siblings[--position]
|
|||
|
|
|||
|
if (
|
|||
|
sibling &&
|
|||
|
(sibling.type === 'PunctuationNode' || sibling.type === 'SymbolNode') &&
|
|||
|
/^\.+$/.test(sibling.value)
|
|||
|
) {
|
|||
|
nodes.push(queue, sibling)
|
|||
|
|
|||
|
count++
|
|||
|
|
|||
|
continue
|
|||
|
}
|
|||
|
|
|||
|
break
|
|||
|
}
|
|||
|
|
|||
|
if (count < 3) {
|
|||
|
return
|
|||
|
}
|
|||
|
|
|||
|
siblings.splice(index - nodes.length, nodes.length)
|
|||
|
|
|||
|
node.value = '…'
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform multiple dots without spaces into unicode ellipses.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
function ellipsesUnspaced(_, node) {
|
|||
|
// Simple node with three dots and without whitespace.
|
|||
|
if (/^\.{3,}$/.test(node.value)) {
|
|||
|
node.value = '…'
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Transform straight single- and double quotes into smart quotes.
|
|||
|
*
|
|||
|
* @type {Method}
|
|||
|
*/
|
|||
|
// eslint-disable-next-line complexity
|
|||
|
function quotesDefault(state, node, index, parent) {
|
|||
|
const siblings = parent.children
|
|||
|
const value = node.value
|
|||
|
|
|||
|
if (value !== '"' && value !== "'") {
|
|||
|
return
|
|||
|
}
|
|||
|
|
|||
|
const quoteIndex = value === '"' ? 0 : 1
|
|||
|
const previous = siblings[index - 1]
|
|||
|
const next = siblings[index + 1]
|
|||
|
const nextNext = siblings[index + 2]
|
|||
|
const nextValue = next ? toString(next) : ''
|
|||
|
|
|||
|
if (
|
|||
|
next &&
|
|||
|
nextNext &&
|
|||
|
(next.type === 'PunctuationNode' || next.type === 'SymbolNode') &&
|
|||
|
nextNext.type !== 'WordNode'
|
|||
|
) {
|
|||
|
// Special case if the very first character is a quote followed by
|
|||
|
// punctuation at a non-word-break. Close the quotes by brute force.
|
|||
|
node.value = state.close[quoteIndex]
|
|||
|
} else if (
|
|||
|
next &&
|
|||
|
(next.type === 'PunctuationNode' || next.type === 'SymbolNode') &&
|
|||
|
(nextValue === '"' || nextValue === "'") &&
|
|||
|
nextNext &&
|
|||
|
nextNext.type === 'WordNode'
|
|||
|
) {
|
|||
|
// Special case for double sets of quotes:
|
|||
|
// `He said, "'Quoted' words in a larger quote."`
|
|||
|
node.value = state.open[quoteIndex]
|
|||
|
next.value = state.open[nextValue === '"' ? 0 : 1]
|
|||
|
} else if (next && /^\d\ds$/.test(nextValue)) {
|
|||
|
// Special case for decade abbreviations: `the '80s`
|
|||
|
node.value = state.close[quoteIndex]
|
|||
|
} else if (
|
|||
|
previous &&
|
|||
|
(previous.type === 'WhiteSpaceNode' ||
|
|||
|
previous.type === 'PunctuationNode' ||
|
|||
|
previous.type === 'SymbolNode') &&
|
|||
|
next &&
|
|||
|
next.type === 'WordNode'
|
|||
|
) {
|
|||
|
// Get most opening single quotes.
|
|||
|
node.value = state.open[quoteIndex]
|
|||
|
} else if (
|
|||
|
previous &&
|
|||
|
previous.type !== 'WhiteSpaceNode' &&
|
|||
|
previous.type !== 'SymbolNode' &&
|
|||
|
previous.type !== 'PunctuationNode'
|
|||
|
) {
|
|||
|
// Closing quotes.
|
|||
|
node.value = state.close[quoteIndex]
|
|||
|
} else if (
|
|||
|
!next ||
|
|||
|
next.type === 'WhiteSpaceNode' ||
|
|||
|
(value === "'" && nextValue === 's')
|
|||
|
) {
|
|||
|
node.value = state.close[quoteIndex]
|
|||
|
} else {
|
|||
|
node.value = state.open[quoteIndex]
|
|||
|
}
|
|||
|
}
|