site/node_modules/micromark-util-subtokenize/index.js

225 lines
6.2 KiB
JavaScript
Raw Normal View History

2024-10-14 06:09:33 +00:00
/**
* @typedef {import('micromark-util-types').Chunk} Chunk
* @typedef {import('micromark-util-types').Event} Event
* @typedef {import('micromark-util-types').Token} Token
*/
import {splice} from 'micromark-util-chunked'
/**
* Tokenize subcontent.
*
* @param {Array<Event>} events
* List of events.
* @returns {boolean}
* Whether subtokens were found.
*/ // eslint-disable-next-line complexity
export function subtokenize(events) {
/** @type {Record<string, number>} */
const jumps = {}
let index = -1
/** @type {Event} */
let event
/** @type {number | undefined} */
let lineIndex
/** @type {number} */
let otherIndex
/** @type {Event} */
let otherEvent
/** @type {Array<Event>} */
let parameters
/** @type {Array<Event>} */
let subevents
/** @type {boolean | undefined} */
let more
while (++index < events.length) {
while (index in jumps) {
index = jumps[index]
}
event = events[index]
// Add a hook for the GFM tasklist extension, which needs to know if text
// is in the first content of a list item.
if (
index &&
event[1].type === 'chunkFlow' &&
events[index - 1][1].type === 'listItemPrefix'
) {
subevents = event[1]._tokenizer.events
otherIndex = 0
if (
otherIndex < subevents.length &&
subevents[otherIndex][1].type === 'lineEndingBlank'
) {
otherIndex += 2
}
if (
otherIndex < subevents.length &&
subevents[otherIndex][1].type === 'content'
) {
while (++otherIndex < subevents.length) {
if (subevents[otherIndex][1].type === 'content') {
break
}
if (subevents[otherIndex][1].type === 'chunkText') {
subevents[otherIndex][1]._isInFirstContentOfListItem = true
otherIndex++
}
}
}
}
// Enter.
if (event[0] === 'enter') {
if (event[1].contentType) {
Object.assign(jumps, subcontent(events, index))
index = jumps[index]
more = true
}
}
// Exit.
else if (event[1]._container) {
otherIndex = index
lineIndex = undefined
while (otherIndex--) {
otherEvent = events[otherIndex]
if (
otherEvent[1].type === 'lineEnding' ||
otherEvent[1].type === 'lineEndingBlank'
) {
if (otherEvent[0] === 'enter') {
if (lineIndex) {
events[lineIndex][1].type = 'lineEndingBlank'
}
otherEvent[1].type = 'lineEnding'
lineIndex = otherIndex
}
} else {
break
}
}
if (lineIndex) {
// Fix position.
event[1].end = Object.assign({}, events[lineIndex][1].start)
// Switch container exit w/ line endings.
parameters = events.slice(lineIndex, index)
parameters.unshift(event)
splice(events, lineIndex, index - lineIndex + 1, parameters)
}
}
}
return !more
}
/**
* Tokenize embedded tokens.
*
* @param {Array<Event>} events
* @param {number} eventIndex
* @returns {Record<string, number>}
*/
function subcontent(events, eventIndex) {
const token = events[eventIndex][1]
const context = events[eventIndex][2]
let startPosition = eventIndex - 1
/** @type {Array<number>} */
const startPositions = []
const tokenizer =
token._tokenizer || context.parser[token.contentType](token.start)
const childEvents = tokenizer.events
/** @type {Array<[number, number]>} */
const jumps = []
/** @type {Record<string, number>} */
const gaps = {}
/** @type {Array<Chunk>} */
let stream
/** @type {Token | undefined} */
let previous
let index = -1
/** @type {Token | undefined} */
let current = token
let adjust = 0
let start = 0
const breaks = [start]
// Loop forward through the linked tokens to pass them in order to the
// subtokenizer.
while (current) {
// Find the position of the event for this token.
while (events[++startPosition][1] !== current) {
// Empty.
}
startPositions.push(startPosition)
if (!current._tokenizer) {
stream = context.sliceStream(current)
if (!current.next) {
stream.push(null)
}
if (previous) {
tokenizer.defineSkip(current.start)
}
if (current._isInFirstContentOfListItem) {
tokenizer._gfmTasklistFirstContentOfListItem = true
}
tokenizer.write(stream)
if (current._isInFirstContentOfListItem) {
tokenizer._gfmTasklistFirstContentOfListItem = undefined
}
}
// Unravel the next token.
previous = current
current = current.next
}
// Now, loop back through all events (and linked tokens), to figure out which
// parts belong where.
current = token
while (++index < childEvents.length) {
if (
// Find a void token that includes a break.
childEvents[index][0] === 'exit' &&
childEvents[index - 1][0] === 'enter' &&
childEvents[index][1].type === childEvents[index - 1][1].type &&
childEvents[index][1].start.line !== childEvents[index][1].end.line
) {
start = index + 1
breaks.push(start)
// Help GC.
current._tokenizer = undefined
current.previous = undefined
current = current.next
}
}
// Help GC.
tokenizer.events = []
// If theres one more token (which is the cases for lines that end in an
// EOF), thats perfect: the last point we found starts it.
// If there isnt then make sure any remaining content is added to it.
if (current) {
// Help GC.
current._tokenizer = undefined
current.previous = undefined
} else {
breaks.pop()
}
// Now splice the events from the subtokenizer into the current events,
// moving back to front so that splice indices arent affected.
index = breaks.length
while (index--) {
const slice = childEvents.slice(breaks[index], breaks[index + 1])
const start = startPositions.pop()
jumps.unshift([start, start + slice.length - 1])
splice(events, start, 2, slice)
}
index = -1
while (++index < jumps.length) {
gaps[adjust + jumps[index][0]] = adjust + jumps[index][1]
adjust += jumps[index][1] - jumps[index][0] - 1
}
return gaps
}