262 lines
6.8 KiB
JavaScript
262 lines
6.8 KiB
JavaScript
/**
|
||
* @typedef {import('micromark-util-types').Chunk} Chunk
|
||
* @typedef {import('micromark-util-types').Event} Event
|
||
* @typedef {import('micromark-util-types').Token} Token
|
||
*/
|
||
|
||
import {splice} from 'micromark-util-chunked'
|
||
import {codes, types} from 'micromark-util-symbol'
|
||
import {ok as assert} from 'devlop'
|
||
|
||
/**
|
||
* Tokenize subcontent.
|
||
*
|
||
* @param {Array<Event>} events
|
||
* List of events.
|
||
* @returns {boolean}
|
||
* Whether subtokens were found.
|
||
*/
|
||
// eslint-disable-next-line complexity
|
||
export function subtokenize(events) {
|
||
/** @type {Record<string, number>} */
|
||
const jumps = {}
|
||
let index = -1
|
||
/** @type {Event} */
|
||
let event
|
||
/** @type {number | undefined} */
|
||
let lineIndex
|
||
/** @type {number} */
|
||
let otherIndex
|
||
/** @type {Event} */
|
||
let otherEvent
|
||
/** @type {Array<Event>} */
|
||
let parameters
|
||
/** @type {Array<Event>} */
|
||
let subevents
|
||
/** @type {boolean | undefined} */
|
||
let more
|
||
|
||
while (++index < events.length) {
|
||
while (index in jumps) {
|
||
index = jumps[index]
|
||
}
|
||
|
||
event = events[index]
|
||
|
||
// Add a hook for the GFM tasklist extension, which needs to know if text
|
||
// is in the first content of a list item.
|
||
if (
|
||
index &&
|
||
event[1].type === types.chunkFlow &&
|
||
events[index - 1][1].type === types.listItemPrefix
|
||
) {
|
||
assert(event[1]._tokenizer, 'expected `_tokenizer` on subtokens')
|
||
subevents = event[1]._tokenizer.events
|
||
otherIndex = 0
|
||
|
||
if (
|
||
otherIndex < subevents.length &&
|
||
subevents[otherIndex][1].type === types.lineEndingBlank
|
||
) {
|
||
otherIndex += 2
|
||
}
|
||
|
||
if (
|
||
otherIndex < subevents.length &&
|
||
subevents[otherIndex][1].type === types.content
|
||
) {
|
||
while (++otherIndex < subevents.length) {
|
||
if (subevents[otherIndex][1].type === types.content) {
|
||
break
|
||
}
|
||
|
||
if (subevents[otherIndex][1].type === types.chunkText) {
|
||
subevents[otherIndex][1]._isInFirstContentOfListItem = true
|
||
otherIndex++
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Enter.
|
||
if (event[0] === 'enter') {
|
||
if (event[1].contentType) {
|
||
Object.assign(jumps, subcontent(events, index))
|
||
index = jumps[index]
|
||
more = true
|
||
}
|
||
}
|
||
// Exit.
|
||
else if (event[1]._container) {
|
||
otherIndex = index
|
||
lineIndex = undefined
|
||
|
||
while (otherIndex--) {
|
||
otherEvent = events[otherIndex]
|
||
|
||
if (
|
||
otherEvent[1].type === types.lineEnding ||
|
||
otherEvent[1].type === types.lineEndingBlank
|
||
) {
|
||
if (otherEvent[0] === 'enter') {
|
||
if (lineIndex) {
|
||
events[lineIndex][1].type = types.lineEndingBlank
|
||
}
|
||
|
||
otherEvent[1].type = types.lineEnding
|
||
lineIndex = otherIndex
|
||
}
|
||
} else {
|
||
break
|
||
}
|
||
}
|
||
|
||
if (lineIndex) {
|
||
// Fix position.
|
||
event[1].end = Object.assign({}, events[lineIndex][1].start)
|
||
|
||
// Switch container exit w/ line endings.
|
||
parameters = events.slice(lineIndex, index)
|
||
parameters.unshift(event)
|
||
splice(events, lineIndex, index - lineIndex + 1, parameters)
|
||
}
|
||
}
|
||
}
|
||
|
||
return !more
|
||
}
|
||
|
||
/**
|
||
* Tokenize embedded tokens.
|
||
*
|
||
* @param {Array<Event>} events
|
||
* @param {number} eventIndex
|
||
* @returns {Record<string, number>}
|
||
*/
|
||
function subcontent(events, eventIndex) {
|
||
const token = events[eventIndex][1]
|
||
const context = events[eventIndex][2]
|
||
let startPosition = eventIndex - 1
|
||
/** @type {Array<number>} */
|
||
const startPositions = []
|
||
assert(token.contentType, 'expected `contentType` on subtokens')
|
||
const tokenizer =
|
||
token._tokenizer || context.parser[token.contentType](token.start)
|
||
const childEvents = tokenizer.events
|
||
/** @type {Array<[number, number]>} */
|
||
const jumps = []
|
||
/** @type {Record<string, number>} */
|
||
const gaps = {}
|
||
/** @type {Array<Chunk>} */
|
||
let stream
|
||
/** @type {Token | undefined} */
|
||
let previous
|
||
let index = -1
|
||
/** @type {Token | undefined} */
|
||
let current = token
|
||
let adjust = 0
|
||
let start = 0
|
||
const breaks = [start]
|
||
|
||
// Loop forward through the linked tokens to pass them in order to the
|
||
// subtokenizer.
|
||
while (current) {
|
||
// Find the position of the event for this token.
|
||
while (events[++startPosition][1] !== current) {
|
||
// Empty.
|
||
}
|
||
|
||
assert(
|
||
!previous || current.previous === previous,
|
||
'expected previous to match'
|
||
)
|
||
assert(!previous || previous.next === current, 'expected next to match')
|
||
|
||
startPositions.push(startPosition)
|
||
|
||
if (!current._tokenizer) {
|
||
stream = context.sliceStream(current)
|
||
|
||
if (!current.next) {
|
||
stream.push(codes.eof)
|
||
}
|
||
|
||
if (previous) {
|
||
tokenizer.defineSkip(current.start)
|
||
}
|
||
|
||
if (current._isInFirstContentOfListItem) {
|
||
tokenizer._gfmTasklistFirstContentOfListItem = true
|
||
}
|
||
|
||
tokenizer.write(stream)
|
||
|
||
if (current._isInFirstContentOfListItem) {
|
||
tokenizer._gfmTasklistFirstContentOfListItem = undefined
|
||
}
|
||
}
|
||
|
||
// Unravel the next token.
|
||
previous = current
|
||
current = current.next
|
||
}
|
||
|
||
// Now, loop back through all events (and linked tokens), to figure out which
|
||
// parts belong where.
|
||
current = token
|
||
|
||
while (++index < childEvents.length) {
|
||
if (
|
||
// Find a void token that includes a break.
|
||
childEvents[index][0] === 'exit' &&
|
||
childEvents[index - 1][0] === 'enter' &&
|
||
childEvents[index][1].type === childEvents[index - 1][1].type &&
|
||
childEvents[index][1].start.line !== childEvents[index][1].end.line
|
||
) {
|
||
assert(current, 'expected a current token')
|
||
start = index + 1
|
||
breaks.push(start)
|
||
// Help GC.
|
||
current._tokenizer = undefined
|
||
current.previous = undefined
|
||
current = current.next
|
||
}
|
||
}
|
||
|
||
// Help GC.
|
||
tokenizer.events = []
|
||
|
||
// If there’s one more token (which is the cases for lines that end in an
|
||
// EOF), that’s perfect: the last point we found starts it.
|
||
// If there isn’t then make sure any remaining content is added to it.
|
||
if (current) {
|
||
// Help GC.
|
||
current._tokenizer = undefined
|
||
current.previous = undefined
|
||
assert(!current.next, 'expected no next token')
|
||
} else {
|
||
breaks.pop()
|
||
}
|
||
|
||
// Now splice the events from the subtokenizer into the current events,
|
||
// moving back to front so that splice indices aren’t affected.
|
||
index = breaks.length
|
||
|
||
while (index--) {
|
||
const slice = childEvents.slice(breaks[index], breaks[index + 1])
|
||
const start = startPositions.pop()
|
||
assert(start !== undefined, 'expected a start position when splicing')
|
||
jumps.unshift([start, start + slice.length - 1])
|
||
splice(events, start, 2, slice)
|
||
}
|
||
|
||
index = -1
|
||
|
||
while (++index < jumps.length) {
|
||
gaps[adjust + jumps[index][0]] = adjust + jumps[index][1]
|
||
adjust += jumps[index][1] - jumps[index][0] - 1
|
||
}
|
||
|
||
return gaps
|
||
}
|