262 lines
6.8 KiB
JavaScript
262 lines
6.8 KiB
JavaScript
|
/**
|
|||
|
* @typedef {import('micromark-util-types').Chunk} Chunk
|
|||
|
* @typedef {import('micromark-util-types').Event} Event
|
|||
|
* @typedef {import('micromark-util-types').Token} Token
|
|||
|
*/
|
|||
|
|
|||
|
import {splice} from 'micromark-util-chunked'
|
|||
|
import {codes, types} from 'micromark-util-symbol'
|
|||
|
import {ok as assert} from 'devlop'
|
|||
|
|
|||
|
/**
|
|||
|
* Tokenize subcontent.
|
|||
|
*
|
|||
|
* @param {Array<Event>} events
|
|||
|
* List of events.
|
|||
|
* @returns {boolean}
|
|||
|
* Whether subtokens were found.
|
|||
|
*/
|
|||
|
// eslint-disable-next-line complexity
|
|||
|
export function subtokenize(events) {
|
|||
|
/** @type {Record<string, number>} */
|
|||
|
const jumps = {}
|
|||
|
let index = -1
|
|||
|
/** @type {Event} */
|
|||
|
let event
|
|||
|
/** @type {number | undefined} */
|
|||
|
let lineIndex
|
|||
|
/** @type {number} */
|
|||
|
let otherIndex
|
|||
|
/** @type {Event} */
|
|||
|
let otherEvent
|
|||
|
/** @type {Array<Event>} */
|
|||
|
let parameters
|
|||
|
/** @type {Array<Event>} */
|
|||
|
let subevents
|
|||
|
/** @type {boolean | undefined} */
|
|||
|
let more
|
|||
|
|
|||
|
while (++index < events.length) {
|
|||
|
while (index in jumps) {
|
|||
|
index = jumps[index]
|
|||
|
}
|
|||
|
|
|||
|
event = events[index]
|
|||
|
|
|||
|
// Add a hook for the GFM tasklist extension, which needs to know if text
|
|||
|
// is in the first content of a list item.
|
|||
|
if (
|
|||
|
index &&
|
|||
|
event[1].type === types.chunkFlow &&
|
|||
|
events[index - 1][1].type === types.listItemPrefix
|
|||
|
) {
|
|||
|
assert(event[1]._tokenizer, 'expected `_tokenizer` on subtokens')
|
|||
|
subevents = event[1]._tokenizer.events
|
|||
|
otherIndex = 0
|
|||
|
|
|||
|
if (
|
|||
|
otherIndex < subevents.length &&
|
|||
|
subevents[otherIndex][1].type === types.lineEndingBlank
|
|||
|
) {
|
|||
|
otherIndex += 2
|
|||
|
}
|
|||
|
|
|||
|
if (
|
|||
|
otherIndex < subevents.length &&
|
|||
|
subevents[otherIndex][1].type === types.content
|
|||
|
) {
|
|||
|
while (++otherIndex < subevents.length) {
|
|||
|
if (subevents[otherIndex][1].type === types.content) {
|
|||
|
break
|
|||
|
}
|
|||
|
|
|||
|
if (subevents[otherIndex][1].type === types.chunkText) {
|
|||
|
subevents[otherIndex][1]._isInFirstContentOfListItem = true
|
|||
|
otherIndex++
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Enter.
|
|||
|
if (event[0] === 'enter') {
|
|||
|
if (event[1].contentType) {
|
|||
|
Object.assign(jumps, subcontent(events, index))
|
|||
|
index = jumps[index]
|
|||
|
more = true
|
|||
|
}
|
|||
|
}
|
|||
|
// Exit.
|
|||
|
else if (event[1]._container) {
|
|||
|
otherIndex = index
|
|||
|
lineIndex = undefined
|
|||
|
|
|||
|
while (otherIndex--) {
|
|||
|
otherEvent = events[otherIndex]
|
|||
|
|
|||
|
if (
|
|||
|
otherEvent[1].type === types.lineEnding ||
|
|||
|
otherEvent[1].type === types.lineEndingBlank
|
|||
|
) {
|
|||
|
if (otherEvent[0] === 'enter') {
|
|||
|
if (lineIndex) {
|
|||
|
events[lineIndex][1].type = types.lineEndingBlank
|
|||
|
}
|
|||
|
|
|||
|
otherEvent[1].type = types.lineEnding
|
|||
|
lineIndex = otherIndex
|
|||
|
}
|
|||
|
} else {
|
|||
|
break
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if (lineIndex) {
|
|||
|
// Fix position.
|
|||
|
event[1].end = Object.assign({}, events[lineIndex][1].start)
|
|||
|
|
|||
|
// Switch container exit w/ line endings.
|
|||
|
parameters = events.slice(lineIndex, index)
|
|||
|
parameters.unshift(event)
|
|||
|
splice(events, lineIndex, index - lineIndex + 1, parameters)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return !more
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Tokenize embedded tokens.
|
|||
|
*
|
|||
|
* @param {Array<Event>} events
|
|||
|
* @param {number} eventIndex
|
|||
|
* @returns {Record<string, number>}
|
|||
|
*/
|
|||
|
function subcontent(events, eventIndex) {
|
|||
|
const token = events[eventIndex][1]
|
|||
|
const context = events[eventIndex][2]
|
|||
|
let startPosition = eventIndex - 1
|
|||
|
/** @type {Array<number>} */
|
|||
|
const startPositions = []
|
|||
|
assert(token.contentType, 'expected `contentType` on subtokens')
|
|||
|
const tokenizer =
|
|||
|
token._tokenizer || context.parser[token.contentType](token.start)
|
|||
|
const childEvents = tokenizer.events
|
|||
|
/** @type {Array<[number, number]>} */
|
|||
|
const jumps = []
|
|||
|
/** @type {Record<string, number>} */
|
|||
|
const gaps = {}
|
|||
|
/** @type {Array<Chunk>} */
|
|||
|
let stream
|
|||
|
/** @type {Token | undefined} */
|
|||
|
let previous
|
|||
|
let index = -1
|
|||
|
/** @type {Token | undefined} */
|
|||
|
let current = token
|
|||
|
let adjust = 0
|
|||
|
let start = 0
|
|||
|
const breaks = [start]
|
|||
|
|
|||
|
// Loop forward through the linked tokens to pass them in order to the
|
|||
|
// subtokenizer.
|
|||
|
while (current) {
|
|||
|
// Find the position of the event for this token.
|
|||
|
while (events[++startPosition][1] !== current) {
|
|||
|
// Empty.
|
|||
|
}
|
|||
|
|
|||
|
assert(
|
|||
|
!previous || current.previous === previous,
|
|||
|
'expected previous to match'
|
|||
|
)
|
|||
|
assert(!previous || previous.next === current, 'expected next to match')
|
|||
|
|
|||
|
startPositions.push(startPosition)
|
|||
|
|
|||
|
if (!current._tokenizer) {
|
|||
|
stream = context.sliceStream(current)
|
|||
|
|
|||
|
if (!current.next) {
|
|||
|
stream.push(codes.eof)
|
|||
|
}
|
|||
|
|
|||
|
if (previous) {
|
|||
|
tokenizer.defineSkip(current.start)
|
|||
|
}
|
|||
|
|
|||
|
if (current._isInFirstContentOfListItem) {
|
|||
|
tokenizer._gfmTasklistFirstContentOfListItem = true
|
|||
|
}
|
|||
|
|
|||
|
tokenizer.write(stream)
|
|||
|
|
|||
|
if (current._isInFirstContentOfListItem) {
|
|||
|
tokenizer._gfmTasklistFirstContentOfListItem = undefined
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Unravel the next token.
|
|||
|
previous = current
|
|||
|
current = current.next
|
|||
|
}
|
|||
|
|
|||
|
// Now, loop back through all events (and linked tokens), to figure out which
|
|||
|
// parts belong where.
|
|||
|
current = token
|
|||
|
|
|||
|
while (++index < childEvents.length) {
|
|||
|
if (
|
|||
|
// Find a void token that includes a break.
|
|||
|
childEvents[index][0] === 'exit' &&
|
|||
|
childEvents[index - 1][0] === 'enter' &&
|
|||
|
childEvents[index][1].type === childEvents[index - 1][1].type &&
|
|||
|
childEvents[index][1].start.line !== childEvents[index][1].end.line
|
|||
|
) {
|
|||
|
assert(current, 'expected a current token')
|
|||
|
start = index + 1
|
|||
|
breaks.push(start)
|
|||
|
// Help GC.
|
|||
|
current._tokenizer = undefined
|
|||
|
current.previous = undefined
|
|||
|
current = current.next
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Help GC.
|
|||
|
tokenizer.events = []
|
|||
|
|
|||
|
// If there’s one more token (which is the cases for lines that end in an
|
|||
|
// EOF), that’s perfect: the last point we found starts it.
|
|||
|
// If there isn’t then make sure any remaining content is added to it.
|
|||
|
if (current) {
|
|||
|
// Help GC.
|
|||
|
current._tokenizer = undefined
|
|||
|
current.previous = undefined
|
|||
|
assert(!current.next, 'expected no next token')
|
|||
|
} else {
|
|||
|
breaks.pop()
|
|||
|
}
|
|||
|
|
|||
|
// Now splice the events from the subtokenizer into the current events,
|
|||
|
// moving back to front so that splice indices aren’t affected.
|
|||
|
index = breaks.length
|
|||
|
|
|||
|
while (index--) {
|
|||
|
const slice = childEvents.slice(breaks[index], breaks[index + 1])
|
|||
|
const start = startPositions.pop()
|
|||
|
assert(start !== undefined, 'expected a start position when splicing')
|
|||
|
jumps.unshift([start, start + slice.length - 1])
|
|||
|
splice(events, start, 2, slice)
|
|||
|
}
|
|||
|
|
|||
|
index = -1
|
|||
|
|
|||
|
while (++index < jumps.length) {
|
|||
|
gaps[adjust + jumps[index][0]] = adjust + jumps[index][1]
|
|||
|
adjust += jumps[index][1] - jumps[index][0] - 1
|
|||
|
}
|
|||
|
|
|||
|
return gaps
|
|||
|
}
|