site/node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js

100 lines
3 KiB
JavaScript
Raw Permalink Normal View History

2024-10-14 06:09:33 +00:00
/**
* @typedef {import('nlcst').Paragraph} Paragraph
*/
import {toString} from 'nlcst-to-string'
import {visitChildren} from 'unist-util-visit-children'
// Full stop characters that should not be treated as terminal sentence markers:
// A case-insensitive abbreviation.
import {terminalMarker} from '../expressions.js'
// Merge non-terminal-marker full stops into the previous word (if available),
// or the next word (if available).
export const mergeRemainingFullStops = visitChildren(
/**
* @type {import('unist-util-visit-children').Visitor<Paragraph>}
*/
// eslint-disable-next-line complexity
function (child, _, _parent) {
if ('children' in child) {
let position = child.children.length
let hasFoundDelimiter = false
while (child.children[--position]) {
const grandchild = child.children[position]
if (
grandchild.type !== 'SymbolNode' &&
grandchild.type !== 'PunctuationNode'
) {
// This is a sentence without terminal marker, so we 'fool' the code to
// make it think we have found one.
if (grandchild.type === 'WordNode') {
hasFoundDelimiter = true
}
continue
}
// Exit when this token is not a terminal marker.
if (!terminalMarker.test(toString(grandchild))) {
continue
}
// Ignore the first terminal marker found (starting at the end), as it
// should not be merged.
if (!hasFoundDelimiter) {
hasFoundDelimiter = true
continue
}
// Only merge a single full stop.
if (toString(grandchild) !== '.') {
continue
}
const previous = child.children[position - 1]
const next = child.children[position + 1]
if (previous && previous.type === 'WordNode') {
const nextNext = child.children[position + 2]
// Continue when the full stop is followed by a space and another full
// stop, such as: `{.} .`
if (
next &&
nextNext &&
next.type === 'WhiteSpaceNode' &&
toString(nextNext) === '.'
) {
continue
}
// Remove `child` from parent.
child.children.splice(position, 1)
// Add the punctuation mark at the end of the previous node.
previous.children.push(grandchild)
// Update position.
if (grandchild.position && previous.position) {
previous.position.end = grandchild.position.end
}
position--
} else if (next && next.type === 'WordNode') {
// Remove `child` from parent.
child.children.splice(position, 1)
// Add the punctuation mark at the start of the next node.
next.children.unshift(grandchild)
if (grandchild.position && next.position) {
next.position.start = grandchild.position.start
}
}
}
}
}
)