561 lines
13 KiB
JavaScript
561 lines
13 KiB
JavaScript
|
/**
|
|||
|
* @typedef {import('micromark-util-types').Event} Event
|
|||
|
* @typedef {import('micromark-util-types').Exiter} Exiter
|
|||
|
* @typedef {import('micromark-util-types').Extension} Extension
|
|||
|
* @typedef {import('micromark-util-types').Resolver} Resolver
|
|||
|
* @typedef {import('micromark-util-types').State} State
|
|||
|
* @typedef {import('micromark-util-types').Token} Token
|
|||
|
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
|
|||
|
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
|
|||
|
*/
|
|||
|
|
|||
|
import {ok as assert} from 'devlop'
|
|||
|
import {blankLine} from 'micromark-core-commonmark'
|
|||
|
import {factorySpace} from 'micromark-factory-space'
|
|||
|
import {markdownLineEndingOrSpace} from 'micromark-util-character'
|
|||
|
import {normalizeIdentifier} from 'micromark-util-normalize-identifier'
|
|||
|
import {codes, constants, types} from 'micromark-util-symbol'
|
|||
|
|
|||
|
const indent = {tokenize: tokenizeIndent, partial: true}
|
|||
|
|
|||
|
// To do: micromark should support a `_hiddenGfmFootnoteSupport`, which only
|
|||
|
// affects label start (image).
|
|||
|
// That will let us drop `tokenizePotentialGfmFootnote*`.
|
|||
|
// It currently has a `_hiddenFootnoteSupport`, which affects that and more.
|
|||
|
// That can be removed when `micromark-extension-footnote` is archived.
|
|||
|
|
|||
|
/**
|
|||
|
* Create an extension for `micromark` to enable GFM footnote syntax.
|
|||
|
*
|
|||
|
* @returns {Extension}
|
|||
|
* Extension for `micromark` that can be passed in `extensions` to
|
|||
|
* enable GFM footnote syntax.
|
|||
|
*/
|
|||
|
export function gfmFootnote() {
|
|||
|
/** @type {Extension} */
|
|||
|
return {
|
|||
|
document: {
|
|||
|
[codes.leftSquareBracket]: {
|
|||
|
tokenize: tokenizeDefinitionStart,
|
|||
|
continuation: {tokenize: tokenizeDefinitionContinuation},
|
|||
|
exit: gfmFootnoteDefinitionEnd
|
|||
|
}
|
|||
|
},
|
|||
|
text: {
|
|||
|
[codes.leftSquareBracket]: {tokenize: tokenizeGfmFootnoteCall},
|
|||
|
[codes.rightSquareBracket]: {
|
|||
|
add: 'after',
|
|||
|
tokenize: tokenizePotentialGfmFootnoteCall,
|
|||
|
resolveTo: resolveToPotentialGfmFootnoteCall
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// To do: remove after micromark update.
|
|||
|
/**
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizePotentialGfmFootnoteCall(effects, ok, nok) {
|
|||
|
const self = this
|
|||
|
let index = self.events.length
|
|||
|
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
|
|||
|
/** @type {Token} */
|
|||
|
let labelStart
|
|||
|
|
|||
|
// Find an opening.
|
|||
|
while (index--) {
|
|||
|
const token = self.events[index][1]
|
|||
|
|
|||
|
if (token.type === types.labelImage) {
|
|||
|
labelStart = token
|
|||
|
break
|
|||
|
}
|
|||
|
|
|||
|
// Exit if we’ve walked far enough.
|
|||
|
if (
|
|||
|
token.type === 'gfmFootnoteCall' ||
|
|||
|
token.type === types.labelLink ||
|
|||
|
token.type === types.label ||
|
|||
|
token.type === types.image ||
|
|||
|
token.type === types.link
|
|||
|
) {
|
|||
|
break
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return start
|
|||
|
|
|||
|
/**
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function start(code) {
|
|||
|
assert(code === codes.rightSquareBracket, 'expected `]`')
|
|||
|
|
|||
|
if (!labelStart || !labelStart._balanced) {
|
|||
|
return nok(code)
|
|||
|
}
|
|||
|
|
|||
|
const id = normalizeIdentifier(
|
|||
|
self.sliceSerialize({start: labelStart.end, end: self.now()})
|
|||
|
)
|
|||
|
|
|||
|
if (id.codePointAt(0) !== codes.caret || !defined.includes(id.slice(1))) {
|
|||
|
return nok(code)
|
|||
|
}
|
|||
|
|
|||
|
effects.enter('gfmFootnoteCallLabelMarker')
|
|||
|
effects.consume(code)
|
|||
|
effects.exit('gfmFootnoteCallLabelMarker')
|
|||
|
return ok(code)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// To do: remove after micromark update.
|
|||
|
/** @type {Resolver} */
|
|||
|
function resolveToPotentialGfmFootnoteCall(events, context) {
|
|||
|
let index = events.length
|
|||
|
/** @type {Token | undefined} */
|
|||
|
let labelStart
|
|||
|
|
|||
|
// Find an opening.
|
|||
|
while (index--) {
|
|||
|
if (
|
|||
|
events[index][1].type === types.labelImage &&
|
|||
|
events[index][0] === 'enter'
|
|||
|
) {
|
|||
|
labelStart = events[index][1]
|
|||
|
break
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
assert(labelStart, 'expected `labelStart` to resolve')
|
|||
|
|
|||
|
// Change the `labelImageMarker` to a `data`.
|
|||
|
events[index + 1][1].type = types.data
|
|||
|
events[index + 3][1].type = 'gfmFootnoteCallLabelMarker'
|
|||
|
|
|||
|
// The whole (without `!`):
|
|||
|
/** @type {Token} */
|
|||
|
const call = {
|
|||
|
type: 'gfmFootnoteCall',
|
|||
|
start: Object.assign({}, events[index + 3][1].start),
|
|||
|
end: Object.assign({}, events[events.length - 1][1].end)
|
|||
|
}
|
|||
|
// The `^` marker
|
|||
|
/** @type {Token} */
|
|||
|
const marker = {
|
|||
|
type: 'gfmFootnoteCallMarker',
|
|||
|
start: Object.assign({}, events[index + 3][1].end),
|
|||
|
end: Object.assign({}, events[index + 3][1].end)
|
|||
|
}
|
|||
|
// Increment the end 1 character.
|
|||
|
marker.end.column++
|
|||
|
marker.end.offset++
|
|||
|
marker.end._bufferIndex++
|
|||
|
/** @type {Token} */
|
|||
|
const string = {
|
|||
|
type: 'gfmFootnoteCallString',
|
|||
|
start: Object.assign({}, marker.end),
|
|||
|
end: Object.assign({}, events[events.length - 1][1].start)
|
|||
|
}
|
|||
|
/** @type {Token} */
|
|||
|
const chunk = {
|
|||
|
type: types.chunkString,
|
|||
|
contentType: 'string',
|
|||
|
start: Object.assign({}, string.start),
|
|||
|
end: Object.assign({}, string.end)
|
|||
|
}
|
|||
|
|
|||
|
/** @type {Array<Event>} */
|
|||
|
const replacement = [
|
|||
|
// Take the `labelImageMarker` (now `data`, the `!`)
|
|||
|
events[index + 1],
|
|||
|
events[index + 2],
|
|||
|
['enter', call, context],
|
|||
|
// The `[`
|
|||
|
events[index + 3],
|
|||
|
events[index + 4],
|
|||
|
// The `^`.
|
|||
|
['enter', marker, context],
|
|||
|
['exit', marker, context],
|
|||
|
// Everything in between.
|
|||
|
['enter', string, context],
|
|||
|
['enter', chunk, context],
|
|||
|
['exit', chunk, context],
|
|||
|
['exit', string, context],
|
|||
|
// The ending (`]`, properly parsed and labelled).
|
|||
|
events[events.length - 2],
|
|||
|
events[events.length - 1],
|
|||
|
['exit', call, context]
|
|||
|
]
|
|||
|
|
|||
|
events.splice(index, events.length - index + 1, ...replacement)
|
|||
|
|
|||
|
return events
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeGfmFootnoteCall(effects, ok, nok) {
|
|||
|
const self = this
|
|||
|
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
|
|||
|
let size = 0
|
|||
|
/** @type {boolean} */
|
|||
|
let data
|
|||
|
|
|||
|
// Note: the implementation of `markdown-rs` is different, because it houses
|
|||
|
// core *and* extensions in one project.
|
|||
|
// Therefore, it can include footnote logic inside `label-end`.
|
|||
|
// We can’t do that, but luckily, we can parse footnotes in a simpler way than
|
|||
|
// needed for labels.
|
|||
|
return start
|
|||
|
|
|||
|
/**
|
|||
|
* Start of footnote label.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a [^b] c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function start(code) {
|
|||
|
assert(code === codes.leftSquareBracket, 'expected `[`')
|
|||
|
effects.enter('gfmFootnoteCall')
|
|||
|
effects.enter('gfmFootnoteCallLabelMarker')
|
|||
|
effects.consume(code)
|
|||
|
effects.exit('gfmFootnoteCallLabelMarker')
|
|||
|
return callStart
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After `[`, at `^`.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a [^b] c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function callStart(code) {
|
|||
|
if (code !== codes.caret) return nok(code)
|
|||
|
|
|||
|
effects.enter('gfmFootnoteCallMarker')
|
|||
|
effects.consume(code)
|
|||
|
effects.exit('gfmFootnoteCallMarker')
|
|||
|
effects.enter('gfmFootnoteCallString')
|
|||
|
effects.enter('chunkString').contentType = 'string'
|
|||
|
return callData
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In label.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a [^b] c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function callData(code) {
|
|||
|
if (
|
|||
|
// Too long.
|
|||
|
size > constants.linkReferenceSizeMax ||
|
|||
|
// Closing brace with nothing.
|
|||
|
(code === codes.rightSquareBracket && !data) ||
|
|||
|
// Space or tab is not supported by GFM for some reason.
|
|||
|
// `\n` and `[` not being supported makes sense.
|
|||
|
code === codes.eof ||
|
|||
|
code === codes.leftSquareBracket ||
|
|||
|
markdownLineEndingOrSpace(code)
|
|||
|
) {
|
|||
|
return nok(code)
|
|||
|
}
|
|||
|
|
|||
|
if (code === codes.rightSquareBracket) {
|
|||
|
effects.exit('chunkString')
|
|||
|
const token = effects.exit('gfmFootnoteCallString')
|
|||
|
|
|||
|
if (!defined.includes(normalizeIdentifier(self.sliceSerialize(token)))) {
|
|||
|
return nok(code)
|
|||
|
}
|
|||
|
|
|||
|
effects.enter('gfmFootnoteCallLabelMarker')
|
|||
|
effects.consume(code)
|
|||
|
effects.exit('gfmFootnoteCallLabelMarker')
|
|||
|
effects.exit('gfmFootnoteCall')
|
|||
|
return ok
|
|||
|
}
|
|||
|
|
|||
|
if (!markdownLineEndingOrSpace(code)) {
|
|||
|
data = true
|
|||
|
}
|
|||
|
|
|||
|
size++
|
|||
|
effects.consume(code)
|
|||
|
return code === codes.backslash ? callEscape : callData
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* On character after escape.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | a [^b\c] d
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function callEscape(code) {
|
|||
|
if (
|
|||
|
code === codes.leftSquareBracket ||
|
|||
|
code === codes.backslash ||
|
|||
|
code === codes.rightSquareBracket
|
|||
|
) {
|
|||
|
effects.consume(code)
|
|||
|
size++
|
|||
|
return callData
|
|||
|
}
|
|||
|
|
|||
|
return callData(code)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeDefinitionStart(effects, ok, nok) {
|
|||
|
const self = this
|
|||
|
const defined = self.parser.gfmFootnotes || (self.parser.gfmFootnotes = [])
|
|||
|
/** @type {string} */
|
|||
|
let identifier
|
|||
|
let size = 0
|
|||
|
/** @type {boolean | undefined} */
|
|||
|
let data
|
|||
|
|
|||
|
return start
|
|||
|
|
|||
|
/**
|
|||
|
* Start of GFM footnote definition.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | [^a]: b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function start(code) {
|
|||
|
assert(code === codes.leftSquareBracket, 'expected `[`')
|
|||
|
effects.enter('gfmFootnoteDefinition')._container = true
|
|||
|
effects.enter('gfmFootnoteDefinitionLabel')
|
|||
|
effects.enter('gfmFootnoteDefinitionLabelMarker')
|
|||
|
effects.consume(code)
|
|||
|
effects.exit('gfmFootnoteDefinitionLabelMarker')
|
|||
|
return labelAtMarker
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In label, at caret.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | [^a]: b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function labelAtMarker(code) {
|
|||
|
if (code === codes.caret) {
|
|||
|
effects.enter('gfmFootnoteDefinitionMarker')
|
|||
|
effects.consume(code)
|
|||
|
effects.exit('gfmFootnoteDefinitionMarker')
|
|||
|
effects.enter('gfmFootnoteDefinitionLabelString')
|
|||
|
effects.enter('chunkString').contentType = 'string'
|
|||
|
return labelInside
|
|||
|
}
|
|||
|
|
|||
|
return nok(code)
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* In label.
|
|||
|
*
|
|||
|
* > 👉 **Note**: `cmark-gfm` prevents whitespace from occurring in footnote
|
|||
|
* > definition labels.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | [^a]: b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function labelInside(code) {
|
|||
|
if (
|
|||
|
// Too long.
|
|||
|
size > constants.linkReferenceSizeMax ||
|
|||
|
// Closing brace with nothing.
|
|||
|
(code === codes.rightSquareBracket && !data) ||
|
|||
|
// Space or tab is not supported by GFM for some reason.
|
|||
|
// `\n` and `[` not being supported makes sense.
|
|||
|
code === codes.eof ||
|
|||
|
code === codes.leftSquareBracket ||
|
|||
|
markdownLineEndingOrSpace(code)
|
|||
|
) {
|
|||
|
return nok(code)
|
|||
|
}
|
|||
|
|
|||
|
if (code === codes.rightSquareBracket) {
|
|||
|
effects.exit('chunkString')
|
|||
|
const token = effects.exit('gfmFootnoteDefinitionLabelString')
|
|||
|
identifier = normalizeIdentifier(self.sliceSerialize(token))
|
|||
|
effects.enter('gfmFootnoteDefinitionLabelMarker')
|
|||
|
effects.consume(code)
|
|||
|
effects.exit('gfmFootnoteDefinitionLabelMarker')
|
|||
|
effects.exit('gfmFootnoteDefinitionLabel')
|
|||
|
return labelAfter
|
|||
|
}
|
|||
|
|
|||
|
if (!markdownLineEndingOrSpace(code)) {
|
|||
|
data = true
|
|||
|
}
|
|||
|
|
|||
|
size++
|
|||
|
effects.consume(code)
|
|||
|
return code === codes.backslash ? labelEscape : labelInside
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After `\`, at a special character.
|
|||
|
*
|
|||
|
* > 👉 **Note**: `cmark-gfm` currently does not support escaped brackets:
|
|||
|
* > <https://github.com/github/cmark-gfm/issues/240>
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | [^a\*b]: c
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function labelEscape(code) {
|
|||
|
if (
|
|||
|
code === codes.leftSquareBracket ||
|
|||
|
code === codes.backslash ||
|
|||
|
code === codes.rightSquareBracket
|
|||
|
) {
|
|||
|
effects.consume(code)
|
|||
|
size++
|
|||
|
return labelInside
|
|||
|
}
|
|||
|
|
|||
|
return labelInside(code)
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After definition label.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | [^a]: b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function labelAfter(code) {
|
|||
|
if (code === codes.colon) {
|
|||
|
effects.enter('definitionMarker')
|
|||
|
effects.consume(code)
|
|||
|
effects.exit('definitionMarker')
|
|||
|
|
|||
|
if (!defined.includes(identifier)) {
|
|||
|
defined.push(identifier)
|
|||
|
}
|
|||
|
|
|||
|
// Any whitespace after the marker is eaten, forming indented code
|
|||
|
// is not possible.
|
|||
|
// No space is also fine, just like a block quote marker.
|
|||
|
return factorySpace(
|
|||
|
effects,
|
|||
|
whitespaceAfter,
|
|||
|
'gfmFootnoteDefinitionWhitespace'
|
|||
|
)
|
|||
|
}
|
|||
|
|
|||
|
return nok(code)
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* After definition prefix.
|
|||
|
*
|
|||
|
* ```markdown
|
|||
|
* > | [^a]: b
|
|||
|
* ^
|
|||
|
* ```
|
|||
|
*
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function whitespaceAfter(code) {
|
|||
|
// `markdown-rs` has a wrapping token for the prefix that is closed here.
|
|||
|
return ok(code)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeDefinitionContinuation(effects, ok, nok) {
|
|||
|
/// Start of footnote definition continuation.
|
|||
|
///
|
|||
|
/// ```markdown
|
|||
|
/// | [^a]: b
|
|||
|
/// > | c
|
|||
|
/// ^
|
|||
|
/// ```
|
|||
|
//
|
|||
|
// Either a blank line, which is okay, or an indented thing.
|
|||
|
return effects.check(blankLine, ok, effects.attempt(indent, ok, nok))
|
|||
|
}
|
|||
|
|
|||
|
/** @type {Exiter} */
|
|||
|
function gfmFootnoteDefinitionEnd(effects) {
|
|||
|
effects.exit('gfmFootnoteDefinition')
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @this {TokenizeContext}
|
|||
|
* @type {Tokenizer}
|
|||
|
*/
|
|||
|
function tokenizeIndent(effects, ok, nok) {
|
|||
|
const self = this
|
|||
|
|
|||
|
return factorySpace(
|
|||
|
effects,
|
|||
|
afterPrefix,
|
|||
|
'gfmFootnoteDefinitionIndent',
|
|||
|
constants.tabSize + 1
|
|||
|
)
|
|||
|
|
|||
|
/**
|
|||
|
* @type {State}
|
|||
|
*/
|
|||
|
function afterPrefix(code) {
|
|||
|
const tail = self.events[self.events.length - 1]
|
|||
|
return tail &&
|
|||
|
tail[1].type === 'gfmFootnoteDefinitionIndent' &&
|
|||
|
tail[2].sliceSerialize(tail[1], true).length === constants.tabSize
|
|||
|
? ok(code)
|
|||
|
: nok(code)
|
|||
|
}
|
|||
|
}
|