site/node_modules/micromark/lib/initialize/document.js
2024-10-14 08:09:33 +02:00

382 lines
11 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* @typedef {import('micromark-util-types').Construct} Construct
* @typedef {import('micromark-util-types').ContainerState} ContainerState
* @typedef {import('micromark-util-types').InitialConstruct} InitialConstruct
* @typedef {import('micromark-util-types').Initializer} Initializer
* @typedef {import('micromark-util-types').Point} Point
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').Token} Token
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
/**
* @typedef {[Construct, ContainerState]} StackItem
*/
import {factorySpace} from 'micromark-factory-space'
import {markdownLineEnding} from 'micromark-util-character'
import {splice} from 'micromark-util-chunked'
/** @type {InitialConstruct} */
export const document = {
tokenize: initializeDocument
}
/** @type {Construct} */
const containerConstruct = {
tokenize: tokenizeContainer
}
/**
* @this {TokenizeContext}
* @type {Initializer}
*/
function initializeDocument(effects) {
const self = this
/** @type {Array<StackItem>} */
const stack = []
let continued = 0
/** @type {TokenizeContext | undefined} */
let childFlow
/** @type {Token | undefined} */
let childToken
/** @type {number} */
let lineStartOffset
return start
/** @type {State} */
function start(code) {
// First we iterate through the open blocks, starting with the root
// document, and descending through last children down to the last open
// block.
// Each block imposes a condition that the line must satisfy if the block is
// to remain open.
// For example, a block quote requires a `>` character.
// A paragraph requires a non-blank line.
// In this phase we may match all or just some of the open blocks.
// But we cannot close unmatched blocks yet, because we may have a lazy
// continuation line.
if (continued < stack.length) {
const item = stack[continued]
self.containerState = item[1]
return effects.attempt(
item[0].continuation,
documentContinue,
checkNewContainers
)(code)
}
// Done.
return checkNewContainers(code)
}
/** @type {State} */
function documentContinue(code) {
continued++
// Note: this field is called `_closeFlow` but it also closes containers.
// Perhaps a good idea to rename it but its already used in the wild by
// extensions.
if (self.containerState._closeFlow) {
self.containerState._closeFlow = undefined
if (childFlow) {
closeFlow()
}
// Note: this algorithm for moving events around is similar to the
// algorithm when dealing with lazy lines in `writeToChild`.
const indexBeforeExits = self.events.length
let indexBeforeFlow = indexBeforeExits
/** @type {Point | undefined} */
let point
// Find the flow chunk.
while (indexBeforeFlow--) {
if (
self.events[indexBeforeFlow][0] === 'exit' &&
self.events[indexBeforeFlow][1].type === 'chunkFlow'
) {
point = self.events[indexBeforeFlow][1].end
break
}
}
exitContainers(continued)
// Fix positions.
let index = indexBeforeExits
while (index < self.events.length) {
self.events[index][1].end = Object.assign({}, point)
index++
}
// Inject the exits earlier (theyre still also at the end).
splice(
self.events,
indexBeforeFlow + 1,
0,
self.events.slice(indexBeforeExits)
)
// Discard the duplicate exits.
self.events.length = index
return checkNewContainers(code)
}
return start(code)
}
/** @type {State} */
function checkNewContainers(code) {
// Next, after consuming the continuation markers for existing blocks, we
// look for new block starts (e.g. `>` for a block quote).
// If we encounter a new block start, we close any blocks unmatched in
// step 1 before creating the new block as a child of the last matched
// block.
if (continued === stack.length) {
// No need to `check` whether theres a container, of `exitContainers`
// would be moot.
// We can instead immediately `attempt` to parse one.
if (!childFlow) {
return documentContinued(code)
}
// If we have concrete content, such as block HTML or fenced code,
// we cant have containers “pierce” into them, so we can immediately
// start.
if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) {
return flowStart(code)
}
// If we do have flow, it could still be a blank line,
// but wed be interrupting it w/ a new container if theres a current
// construct.
// To do: next major: remove `_gfmTableDynamicInterruptHack` (no longer
// needed in micromark-extension-gfm-table@1.0.6).
self.interrupt = Boolean(
childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack
)
}
// Check if there is a new container.
self.containerState = {}
return effects.check(
containerConstruct,
thereIsANewContainer,
thereIsNoNewContainer
)(code)
}
/** @type {State} */
function thereIsANewContainer(code) {
if (childFlow) closeFlow()
exitContainers(continued)
return documentContinued(code)
}
/** @type {State} */
function thereIsNoNewContainer(code) {
self.parser.lazy[self.now().line] = continued !== stack.length
lineStartOffset = self.now().offset
return flowStart(code)
}
/** @type {State} */
function documentContinued(code) {
// Try new containers.
self.containerState = {}
return effects.attempt(
containerConstruct,
containerContinue,
flowStart
)(code)
}
/** @type {State} */
function containerContinue(code) {
continued++
stack.push([self.currentConstruct, self.containerState])
// Try another.
return documentContinued(code)
}
/** @type {State} */
function flowStart(code) {
if (code === null) {
if (childFlow) closeFlow()
exitContainers(0)
effects.consume(code)
return
}
childFlow = childFlow || self.parser.flow(self.now())
effects.enter('chunkFlow', {
contentType: 'flow',
previous: childToken,
_tokenizer: childFlow
})
return flowContinue(code)
}
/** @type {State} */
function flowContinue(code) {
if (code === null) {
writeToChild(effects.exit('chunkFlow'), true)
exitContainers(0)
effects.consume(code)
return
}
if (markdownLineEnding(code)) {
effects.consume(code)
writeToChild(effects.exit('chunkFlow'))
// Get ready for the next line.
continued = 0
self.interrupt = undefined
return start
}
effects.consume(code)
return flowContinue
}
/**
* @param {Token} token
* @param {boolean | undefined} [eof]
* @returns {undefined}
*/
function writeToChild(token, eof) {
const stream = self.sliceStream(token)
if (eof) stream.push(null)
token.previous = childToken
if (childToken) childToken.next = token
childToken = token
childFlow.defineSkip(token.start)
childFlow.write(stream)
// Alright, so we just added a lazy line:
//
// ```markdown
// > a
// b.
//
// Or:
//
// > ~~~c
// d
//
// Or:
//
// > | e |
// f
// ```
//
// The construct in the second example (fenced code) does not accept lazy
// lines, so it marked itself as done at the end of its first line, and
// then the content construct parses `d`.
// Most constructs in markdown match on the first line: if the first line
// forms a construct, a non-lazy line cant “unmake” it.
//
// The construct in the third example is potentially a GFM table, and
// those are *weird*.
// It *could* be a table, from the first line, if the following line
// matches a condition.
// In this case, that second line is lazy, which “unmakes” the first line
// and turns the whole into one content block.
//
// Weve now parsed the non-lazy and the lazy line, and can figure out
// whether the lazy line started a new flow block.
// If it did, we exit the current containers between the two flow blocks.
if (self.parser.lazy[token.start.line]) {
let index = childFlow.events.length
while (index--) {
if (
// The token starts before the line ending…
childFlow.events[index][1].start.offset < lineStartOffset &&
// …and either is not ended yet…
(!childFlow.events[index][1].end ||
// …or ends after it.
childFlow.events[index][1].end.offset > lineStartOffset)
) {
// Exit: theres still something open, which means its a lazy line
// part of something.
return
}
}
// Note: this algorithm for moving events around is similar to the
// algorithm when closing flow in `documentContinue`.
const indexBeforeExits = self.events.length
let indexBeforeFlow = indexBeforeExits
/** @type {boolean | undefined} */
let seen
/** @type {Point | undefined} */
let point
// Find the previous chunk (the one before the lazy line).
while (indexBeforeFlow--) {
if (
self.events[indexBeforeFlow][0] === 'exit' &&
self.events[indexBeforeFlow][1].type === 'chunkFlow'
) {
if (seen) {
point = self.events[indexBeforeFlow][1].end
break
}
seen = true
}
}
exitContainers(continued)
// Fix positions.
index = indexBeforeExits
while (index < self.events.length) {
self.events[index][1].end = Object.assign({}, point)
index++
}
// Inject the exits earlier (theyre still also at the end).
splice(
self.events,
indexBeforeFlow + 1,
0,
self.events.slice(indexBeforeExits)
)
// Discard the duplicate exits.
self.events.length = index
}
}
/**
* @param {number} size
* @returns {undefined}
*/
function exitContainers(size) {
let index = stack.length
// Exit open containers.
while (index-- > size) {
const entry = stack[index]
self.containerState = entry[1]
entry[0].exit.call(self, effects)
}
stack.length = size
}
function closeFlow() {
childFlow.write([null])
childToken = undefined
childFlow = undefined
self.containerState._closeFlow = undefined
}
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeContainer(effects, ok, nok) {
// Always populated by defaults.
return factorySpace(
effects,
effects.attempt(this.parser.constructs.document, ok, nok),
'linePrefix',
this.parser.constructs.disable.null.includes('codeIndented') ? undefined : 4
)
}