site/node_modules/micromark-extension-gfm-footnote/dev/lib/syntax.js

561 lines
13 KiB
JavaScript
Raw Permalink Normal View History

2024-10-14 06:09:33 +00:00
/**
* @typedef {import('micromark-util-types').Event} Event
* @typedef {import('micromark-util-types').Exiter} Exiter
* @typedef {import('micromark-util-types').Extension} Extension
* @typedef {import('micromark-util-types').Resolver} Resolver
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').Token} Token
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
import {ok as assert} from 'devlop'
import {blankLine} from 'micromark-core-commonmark'
import {factorySpace} from 'micromark-factory-space'
import {markdownLineEndingOrSpace} from 'micromark-util-character'
import {normalizeIdentifier} from 'micromark-util-normalize-identifier'
import {codes, constants, types} from 'micromark-util-symbol'
const indent = {tokenize: tokenizeIndent, partial: true}
// To do: micromark should support a `_hiddenGfmFootnoteSupport`, which only
// affects label start (image).
// That will let us drop `tokenizePotentialGfmFootnote*`.
// It currently has a `_hiddenFootnoteSupport`, which affects that and more.
// That can be removed when `micromark-extension-footnote` is archived.
/**
* Create an extension for `micromark` to enable GFM footnote syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to
* enable GFM footnote syntax.
*/
export function gfmFootnote() {
/** @type {Extension} */
return {
document: {
[codes.leftSquareBracket]: {
tokenize: tokenizeDefinitionStart,
continuation: {tokenize: tokenizeDefinitionContinuation},
exit: gfmFootnoteDefinitionEnd
}
},
text: {
[codes.leftSquareBracket]: {tokenize: tokenizeGfmFootnoteCall},
[codes.rightSquareBracket]: {
add: 'after',
tokenize: tokenizePotentialGfmFootnoteCall,
resolveTo: resolveToPotentialGfmFootnoteCall
}
}
}
}
// To do: remove after micromark update.
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizePotentialGfmFootnoteCall(effects, ok, nok) {
const self = this
let index = self.events.length
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
/** @type {Token} */
let labelStart
// Find an opening.
while (index--) {
const token = self.events[index][1]
if (token.type === types.labelImage) {
labelStart = token
break
}
// Exit if weve walked far enough.
if (
token.type === 'gfmFootnoteCall' ||
token.type === types.labelLink ||
token.type === types.label ||
token.type === types.image ||
token.type === types.link
) {
break
}
}
return start
/**
* @type {State}
*/
function start(code) {
assert(code === codes.rightSquareBracket, 'expected `]`')
if (!labelStart || !labelStart._balanced) {
return nok(code)
}
const id = normalizeIdentifier(
self.sliceSerialize({start: labelStart.end, end: self.now()})
)
if (id.codePointAt(0) !== codes.caret || !defined.includes(id.slice(1))) {
return nok(code)
}
effects.enter('gfmFootnoteCallLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteCallLabelMarker')
return ok(code)
}
}
// To do: remove after micromark update.
/** @type {Resolver} */
function resolveToPotentialGfmFootnoteCall(events, context) {
let index = events.length
/** @type {Token | undefined} */
let labelStart
// Find an opening.
while (index--) {
if (
events[index][1].type === types.labelImage &&
events[index][0] === 'enter'
) {
labelStart = events[index][1]
break
}
}
assert(labelStart, 'expected `labelStart` to resolve')
// Change the `labelImageMarker` to a `data`.
events[index + 1][1].type = types.data
events[index + 3][1].type = 'gfmFootnoteCallLabelMarker'
// The whole (without `!`):
/** @type {Token} */
const call = {
type: 'gfmFootnoteCall',
start: Object.assign({}, events[index + 3][1].start),
end: Object.assign({}, events[events.length - 1][1].end)
}
// The `^` marker
/** @type {Token} */
const marker = {
type: 'gfmFootnoteCallMarker',
start: Object.assign({}, events[index + 3][1].end),
end: Object.assign({}, events[index + 3][1].end)
}
// Increment the end 1 character.
marker.end.column++
marker.end.offset++
marker.end._bufferIndex++
/** @type {Token} */
const string = {
type: 'gfmFootnoteCallString',
start: Object.assign({}, marker.end),
end: Object.assign({}, events[events.length - 1][1].start)
}
/** @type {Token} */
const chunk = {
type: types.chunkString,
contentType: 'string',
start: Object.assign({}, string.start),
end: Object.assign({}, string.end)
}
/** @type {Array<Event>} */
const replacement = [
// Take the `labelImageMarker` (now `data`, the `!`)
events[index + 1],
events[index + 2],
['enter', call, context],
// The `[`
events[index + 3],
events[index + 4],
// The `^`.
['enter', marker, context],
['exit', marker, context],
// Everything in between.
['enter', string, context],
['enter', chunk, context],
['exit', chunk, context],
['exit', string, context],
// The ending (`]`, properly parsed and labelled).
events[events.length - 2],
events[events.length - 1],
['exit', call, context]
]
events.splice(index, events.length - index + 1, ...replacement)
return events
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeGfmFootnoteCall(effects, ok, nok) {
const self = this
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
let size = 0
/** @type {boolean} */
let data
// Note: the implementation of `markdown-rs` is different, because it houses
// core *and* extensions in one project.
// Therefore, it can include footnote logic inside `label-end`.
// We cant do that, but luckily, we can parse footnotes in a simpler way than
// needed for labels.
return start
/**
* Start of footnote label.
*
* ```markdown
* > | a [^b] c
* ^
* ```
*
* @type {State}
*/
function start(code) {
assert(code === codes.leftSquareBracket, 'expected `[`')
effects.enter('gfmFootnoteCall')
effects.enter('gfmFootnoteCallLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteCallLabelMarker')
return callStart
}
/**
* After `[`, at `^`.
*
* ```markdown
* > | a [^b] c
* ^
* ```
*
* @type {State}
*/
function callStart(code) {
if (code !== codes.caret) return nok(code)
effects.enter('gfmFootnoteCallMarker')
effects.consume(code)
effects.exit('gfmFootnoteCallMarker')
effects.enter('gfmFootnoteCallString')
effects.enter('chunkString').contentType = 'string'
return callData
}
/**
* In label.
*
* ```markdown
* > | a [^b] c
* ^
* ```
*
* @type {State}
*/
function callData(code) {
if (
// Too long.
size > constants.linkReferenceSizeMax ||
// Closing brace with nothing.
(code === codes.rightSquareBracket && !data) ||
// Space or tab is not supported by GFM for some reason.
// `\n` and `[` not being supported makes sense.
code === codes.eof ||
code === codes.leftSquareBracket ||
markdownLineEndingOrSpace(code)
) {
return nok(code)
}
if (code === codes.rightSquareBracket) {
effects.exit('chunkString')
const token = effects.exit('gfmFootnoteCallString')
if (!defined.includes(normalizeIdentifier(self.sliceSerialize(token)))) {
return nok(code)
}
effects.enter('gfmFootnoteCallLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteCallLabelMarker')
effects.exit('gfmFootnoteCall')
return ok
}
if (!markdownLineEndingOrSpace(code)) {
data = true
}
size++
effects.consume(code)
return code === codes.backslash ? callEscape : callData
}
/**
* On character after escape.
*
* ```markdown
* > | a [^b\c] d
* ^
* ```
*
* @type {State}
*/
function callEscape(code) {
if (
code === codes.leftSquareBracket ||
code === codes.backslash ||
code === codes.rightSquareBracket
) {
effects.consume(code)
size++
return callData
}
return callData(code)
}
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeDefinitionStart(effects, ok, nok) {
const self = this
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
/** @type {string} */
let identifier
let size = 0
/** @type {boolean | undefined} */
let data
return start
/**
* Start of GFM footnote definition.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function start(code) {
assert(code === codes.leftSquareBracket, 'expected `[`')
effects.enter('gfmFootnoteDefinition')._container = true
effects.enter('gfmFootnoteDefinitionLabel')
effects.enter('gfmFootnoteDefinitionLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteDefinitionLabelMarker')
return labelAtMarker
}
/**
* In label, at caret.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function labelAtMarker(code) {
if (code === codes.caret) {
effects.enter('gfmFootnoteDefinitionMarker')
effects.consume(code)
effects.exit('gfmFootnoteDefinitionMarker')
effects.enter('gfmFootnoteDefinitionLabelString')
effects.enter('chunkString').contentType = 'string'
return labelInside
}
return nok(code)
}
/**
* In label.
*
* > 👉 **Note**: `cmark-gfm` prevents whitespace from occurring in footnote
* > definition labels.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function labelInside(code) {
if (
// Too long.
size > constants.linkReferenceSizeMax ||
// Closing brace with nothing.
(code === codes.rightSquareBracket && !data) ||
// Space or tab is not supported by GFM for some reason.
// `\n` and `[` not being supported makes sense.
code === codes.eof ||
code === codes.leftSquareBracket ||
markdownLineEndingOrSpace(code)
) {
return nok(code)
}
if (code === codes.rightSquareBracket) {
effects.exit('chunkString')
const token = effects.exit('gfmFootnoteDefinitionLabelString')
identifier = normalizeIdentifier(self.sliceSerialize(token))
effects.enter('gfmFootnoteDefinitionLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteDefinitionLabelMarker')
effects.exit('gfmFootnoteDefinitionLabel')
return labelAfter
}
if (!markdownLineEndingOrSpace(code)) {
data = true
}
size++
effects.consume(code)
return code === codes.backslash ? labelEscape : labelInside
}
/**
* After `\`, at a special character.
*
* > 👉 **Note**: `cmark-gfm` currently does not support escaped brackets:
* > <https://github.com/github/cmark-gfm/issues/240>
*
* ```markdown
* > | [^a\*b]: c
* ^
* ```
*
* @type {State}
*/
function labelEscape(code) {
if (
code === codes.leftSquareBracket ||
code === codes.backslash ||
code === codes.rightSquareBracket
) {
effects.consume(code)
size++
return labelInside
}
return labelInside(code)
}
/**
* After definition label.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function labelAfter(code) {
if (code === codes.colon) {
effects.enter('definitionMarker')
effects.consume(code)
effects.exit('definitionMarker')
if (!defined.includes(identifier)) {
defined.push(identifier)
}
// Any whitespace after the marker is eaten, forming indented code
// is not possible.
// No space is also fine, just like a block quote marker.
return factorySpace(
effects,
whitespaceAfter,
'gfmFootnoteDefinitionWhitespace'
)
}
return nok(code)
}
/**
* After definition prefix.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function whitespaceAfter(code) {
// `markdown-rs` has a wrapping token for the prefix that is closed here.
return ok(code)
}
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeDefinitionContinuation(effects, ok, nok) {
/// Start of footnote definition continuation.
///
/// ```markdown
/// | [^a]: b
/// > | c
/// ^
/// ```
//
// Either a blank line, which is okay, or an indented thing.
return effects.check(blankLine, ok, effects.attempt(indent, ok, nok))
}
/** @type {Exiter} */
function gfmFootnoteDefinitionEnd(effects) {
effects.exit('gfmFootnoteDefinition')
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeIndent(effects, ok, nok) {
const self = this
return factorySpace(
effects,
afterPrefix,
'gfmFootnoteDefinitionIndent',
constants.tabSize + 1
)
/**
* @type {State}
*/
function afterPrefix(code) {
const tail = self.events[self.events.length - 1]
return tail &&
tail[1].type === 'gfmFootnoteDefinitionIndent' &&
tail[2].sliceSerialize(tail[1], true).length === constants.tabSize
? ok(code)
: nok(code)
}
}