site/node_modules/micromark-extension-gfm-footnote/dev/lib/syntax.js
2024-10-14 08:09:33 +02:00

560 lines
13 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* @typedef {import('micromark-util-types').Event} Event
* @typedef {import('micromark-util-types').Exiter} Exiter
* @typedef {import('micromark-util-types').Extension} Extension
* @typedef {import('micromark-util-types').Resolver} Resolver
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').Token} Token
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
import {ok as assert} from 'devlop'
import {blankLine} from 'micromark-core-commonmark'
import {factorySpace} from 'micromark-factory-space'
import {markdownLineEndingOrSpace} from 'micromark-util-character'
import {normalizeIdentifier} from 'micromark-util-normalize-identifier'
import {codes, constants, types} from 'micromark-util-symbol'
const indent = {tokenize: tokenizeIndent, partial: true}
// To do: micromark should support a `_hiddenGfmFootnoteSupport`, which only
// affects label start (image).
// That will let us drop `tokenizePotentialGfmFootnote*`.
// It currently has a `_hiddenFootnoteSupport`, which affects that and more.
// That can be removed when `micromark-extension-footnote` is archived.
/**
* Create an extension for `micromark` to enable GFM footnote syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to
* enable GFM footnote syntax.
*/
export function gfmFootnote() {
/** @type {Extension} */
return {
document: {
[codes.leftSquareBracket]: {
tokenize: tokenizeDefinitionStart,
continuation: {tokenize: tokenizeDefinitionContinuation},
exit: gfmFootnoteDefinitionEnd
}
},
text: {
[codes.leftSquareBracket]: {tokenize: tokenizeGfmFootnoteCall},
[codes.rightSquareBracket]: {
add: 'after',
tokenize: tokenizePotentialGfmFootnoteCall,
resolveTo: resolveToPotentialGfmFootnoteCall
}
}
}
}
// To do: remove after micromark update.
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizePotentialGfmFootnoteCall(effects, ok, nok) {
const self = this
let index = self.events.length
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
/** @type {Token} */
let labelStart
// Find an opening.
while (index--) {
const token = self.events[index][1]
if (token.type === types.labelImage) {
labelStart = token
break
}
// Exit if weve walked far enough.
if (
token.type === 'gfmFootnoteCall' ||
token.type === types.labelLink ||
token.type === types.label ||
token.type === types.image ||
token.type === types.link
) {
break
}
}
return start
/**
* @type {State}
*/
function start(code) {
assert(code === codes.rightSquareBracket, 'expected `]`')
if (!labelStart || !labelStart._balanced) {
return nok(code)
}
const id = normalizeIdentifier(
self.sliceSerialize({start: labelStart.end, end: self.now()})
)
if (id.codePointAt(0) !== codes.caret || !defined.includes(id.slice(1))) {
return nok(code)
}
effects.enter('gfmFootnoteCallLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteCallLabelMarker')
return ok(code)
}
}
// To do: remove after micromark update.
/** @type {Resolver} */
function resolveToPotentialGfmFootnoteCall(events, context) {
let index = events.length
/** @type {Token | undefined} */
let labelStart
// Find an opening.
while (index--) {
if (
events[index][1].type === types.labelImage &&
events[index][0] === 'enter'
) {
labelStart = events[index][1]
break
}
}
assert(labelStart, 'expected `labelStart` to resolve')
// Change the `labelImageMarker` to a `data`.
events[index + 1][1].type = types.data
events[index + 3][1].type = 'gfmFootnoteCallLabelMarker'
// The whole (without `!`):
/** @type {Token} */
const call = {
type: 'gfmFootnoteCall',
start: Object.assign({}, events[index + 3][1].start),
end: Object.assign({}, events[events.length - 1][1].end)
}
// The `^` marker
/** @type {Token} */
const marker = {
type: 'gfmFootnoteCallMarker',
start: Object.assign({}, events[index + 3][1].end),
end: Object.assign({}, events[index + 3][1].end)
}
// Increment the end 1 character.
marker.end.column++
marker.end.offset++
marker.end._bufferIndex++
/** @type {Token} */
const string = {
type: 'gfmFootnoteCallString',
start: Object.assign({}, marker.end),
end: Object.assign({}, events[events.length - 1][1].start)
}
/** @type {Token} */
const chunk = {
type: types.chunkString,
contentType: 'string',
start: Object.assign({}, string.start),
end: Object.assign({}, string.end)
}
/** @type {Array<Event>} */
const replacement = [
// Take the `labelImageMarker` (now `data`, the `!`)
events[index + 1],
events[index + 2],
['enter', call, context],
// The `[`
events[index + 3],
events[index + 4],
// The `^`.
['enter', marker, context],
['exit', marker, context],
// Everything in between.
['enter', string, context],
['enter', chunk, context],
['exit', chunk, context],
['exit', string, context],
// The ending (`]`, properly parsed and labelled).
events[events.length - 2],
events[events.length - 1],
['exit', call, context]
]
events.splice(index, events.length - index + 1, ...replacement)
return events
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeGfmFootnoteCall(effects, ok, nok) {
const self = this
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
let size = 0
/** @type {boolean} */
let data
// Note: the implementation of `markdown-rs` is different, because it houses
// core *and* extensions in one project.
// Therefore, it can include footnote logic inside `label-end`.
// We cant do that, but luckily, we can parse footnotes in a simpler way than
// needed for labels.
return start
/**
* Start of footnote label.
*
* ```markdown
* > | a [^b] c
* ^
* ```
*
* @type {State}
*/
function start(code) {
assert(code === codes.leftSquareBracket, 'expected `[`')
effects.enter('gfmFootnoteCall')
effects.enter('gfmFootnoteCallLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteCallLabelMarker')
return callStart
}
/**
* After `[`, at `^`.
*
* ```markdown
* > | a [^b] c
* ^
* ```
*
* @type {State}
*/
function callStart(code) {
if (code !== codes.caret) return nok(code)
effects.enter('gfmFootnoteCallMarker')
effects.consume(code)
effects.exit('gfmFootnoteCallMarker')
effects.enter('gfmFootnoteCallString')
effects.enter('chunkString').contentType = 'string'
return callData
}
/**
* In label.
*
* ```markdown
* > | a [^b] c
* ^
* ```
*
* @type {State}
*/
function callData(code) {
if (
// Too long.
size > constants.linkReferenceSizeMax ||
// Closing brace with nothing.
(code === codes.rightSquareBracket && !data) ||
// Space or tab is not supported by GFM for some reason.
// `\n` and `[` not being supported makes sense.
code === codes.eof ||
code === codes.leftSquareBracket ||
markdownLineEndingOrSpace(code)
) {
return nok(code)
}
if (code === codes.rightSquareBracket) {
effects.exit('chunkString')
const token = effects.exit('gfmFootnoteCallString')
if (!defined.includes(normalizeIdentifier(self.sliceSerialize(token)))) {
return nok(code)
}
effects.enter('gfmFootnoteCallLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteCallLabelMarker')
effects.exit('gfmFootnoteCall')
return ok
}
if (!markdownLineEndingOrSpace(code)) {
data = true
}
size++
effects.consume(code)
return code === codes.backslash ? callEscape : callData
}
/**
* On character after escape.
*
* ```markdown
* > | a [^b\c] d
* ^
* ```
*
* @type {State}
*/
function callEscape(code) {
if (
code === codes.leftSquareBracket ||
code === codes.backslash ||
code === codes.rightSquareBracket
) {
effects.consume(code)
size++
return callData
}
return callData(code)
}
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeDefinitionStart(effects, ok, nok) {
const self = this
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
/** @type {string} */
let identifier
let size = 0
/** @type {boolean | undefined} */
let data
return start
/**
* Start of GFM footnote definition.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function start(code) {
assert(code === codes.leftSquareBracket, 'expected `[`')
effects.enter('gfmFootnoteDefinition')._container = true
effects.enter('gfmFootnoteDefinitionLabel')
effects.enter('gfmFootnoteDefinitionLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteDefinitionLabelMarker')
return labelAtMarker
}
/**
* In label, at caret.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function labelAtMarker(code) {
if (code === codes.caret) {
effects.enter('gfmFootnoteDefinitionMarker')
effects.consume(code)
effects.exit('gfmFootnoteDefinitionMarker')
effects.enter('gfmFootnoteDefinitionLabelString')
effects.enter('chunkString').contentType = 'string'
return labelInside
}
return nok(code)
}
/**
* In label.
*
* > 👉 **Note**: `cmark-gfm` prevents whitespace from occurring in footnote
* > definition labels.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function labelInside(code) {
if (
// Too long.
size > constants.linkReferenceSizeMax ||
// Closing brace with nothing.
(code === codes.rightSquareBracket && !data) ||
// Space or tab is not supported by GFM for some reason.
// `\n` and `[` not being supported makes sense.
code === codes.eof ||
code === codes.leftSquareBracket ||
markdownLineEndingOrSpace(code)
) {
return nok(code)
}
if (code === codes.rightSquareBracket) {
effects.exit('chunkString')
const token = effects.exit('gfmFootnoteDefinitionLabelString')
identifier = normalizeIdentifier(self.sliceSerialize(token))
effects.enter('gfmFootnoteDefinitionLabelMarker')
effects.consume(code)
effects.exit('gfmFootnoteDefinitionLabelMarker')
effects.exit('gfmFootnoteDefinitionLabel')
return labelAfter
}
if (!markdownLineEndingOrSpace(code)) {
data = true
}
size++
effects.consume(code)
return code === codes.backslash ? labelEscape : labelInside
}
/**
* After `\`, at a special character.
*
* > 👉 **Note**: `cmark-gfm` currently does not support escaped brackets:
* > <https://github.com/github/cmark-gfm/issues/240>
*
* ```markdown
* > | [^a\*b]: c
* ^
* ```
*
* @type {State}
*/
function labelEscape(code) {
if (
code === codes.leftSquareBracket ||
code === codes.backslash ||
code === codes.rightSquareBracket
) {
effects.consume(code)
size++
return labelInside
}
return labelInside(code)
}
/**
* After definition label.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function labelAfter(code) {
if (code === codes.colon) {
effects.enter('definitionMarker')
effects.consume(code)
effects.exit('definitionMarker')
if (!defined.includes(identifier)) {
defined.push(identifier)
}
// Any whitespace after the marker is eaten, forming indented code
// is not possible.
// No space is also fine, just like a block quote marker.
return factorySpace(
effects,
whitespaceAfter,
'gfmFootnoteDefinitionWhitespace'
)
}
return nok(code)
}
/**
* After definition prefix.
*
* ```markdown
* > | [^a]: b
* ^
* ```
*
* @type {State}
*/
function whitespaceAfter(code) {
// `markdown-rs` has a wrapping token for the prefix that is closed here.
return ok(code)
}
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeDefinitionContinuation(effects, ok, nok) {
/// Start of footnote definition continuation.
///
/// ```markdown
/// | [^a]: b
/// > | c
/// ^
/// ```
//
// Either a blank line, which is okay, or an indented thing.
return effects.check(blankLine, ok, effects.attempt(indent, ok, nok))
}
/** @type {Exiter} */
function gfmFootnoteDefinitionEnd(effects) {
effects.exit('gfmFootnoteDefinition')
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeIndent(effects, ok, nok) {
const self = this
return factorySpace(
effects,
afterPrefix,
'gfmFootnoteDefinitionIndent',
constants.tabSize + 1
)
/**
* @type {State}
*/
function afterPrefix(code) {
const tail = self.events[self.events.length - 1]
return tail &&
tail[1].type === 'gfmFootnoteDefinitionIndent' &&
tail[2].sliceSerialize(tail[1], true).length === constants.tabSize
? ok(code)
: nok(code)
}
}