site/node_modules/micromark-extension-gfm-table/dev/lib/syntax.js
2024-10-14 08:09:33 +02:00

947 lines
22 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* @typedef {import('micromark-util-types').Event} Event
* @typedef {import('micromark-util-types').Extension} Extension
* @typedef {import('micromark-util-types').Point} Point
* @typedef {import('micromark-util-types').Resolver} Resolver
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').Token} Token
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
/**
* @typedef {[number, number, number, number]} Range
* Cell info.
*
* @typedef {0 | 1 | 2 | 3} RowKind
* Where we are: `1` for head row, `2` for delimiter row, `3` for body row.
*/
import {ok as assert} from 'devlop'
import {factorySpace} from 'micromark-factory-space'
import {
markdownLineEnding,
markdownLineEndingOrSpace,
markdownSpace
} from 'micromark-util-character'
import {codes, constants, types} from 'micromark-util-symbol'
import {EditMap} from './edit-map.js'
import {gfmTableAlign} from './infer.js'
/**
* Create an HTML extension for `micromark` to support GitHub tables syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* table syntax.
*/
export function gfmTable() {
return {
flow: {null: {tokenize: tokenizeTable, resolveAll: resolveTable}}
}
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTable(effects, ok, nok) {
const self = this
let size = 0
let sizeB = 0
/** @type {boolean | undefined} */
let seen
return start
/**
* Start of a GFM table.
*
* If there is a valid table row or table head before, then we try to parse
* another row.
* Otherwise, we try to parse a head.
*
* ```markdown
* > | | a |
* ^
* | | - |
* > | | b |
* ^
* ```
* @type {State}
*/
function start(code) {
let index = self.events.length - 1
while (index > -1) {
const type = self.events[index][1].type
if (
type === types.lineEnding ||
// Note: markdown-rs uses `whitespace` instead of `linePrefix`
type === types.linePrefix
)
index--
else break
}
const tail = index > -1 ? self.events[index][1].type : null
const next =
tail === 'tableHead' || tail === 'tableRow' ? bodyRowStart : headRowBefore
// Dont allow lazy body rows.
if (next === bodyRowStart && self.parser.lazy[self.now().line]) {
return nok(code)
}
return next(code)
}
/**
* Before table head row.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowBefore(code) {
effects.enter('tableHead')
effects.enter('tableRow')
return headRowStart(code)
}
/**
* Before table head row, after whitespace.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowStart(code) {
if (code === codes.verticalBar) {
return headRowBreak(code)
}
// To do: micromark-js should let us parse our own whitespace in extensions,
// like `markdown-rs`:
//
// ```js
// // 4+ spaces.
// if (markdownSpace(code)) {
// return nok(code)
// }
// ```
seen = true
// Count the first character, that isnt a pipe, double.
sizeB += 1
return headRowBreak(code)
}
/**
* At break in table head row.
*
* ```markdown
* > | | a |
* ^
* ^
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowBreak(code) {
if (code === codes.eof) {
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code)
}
if (markdownLineEnding(code)) {
// If anything other than one pipe (ignoring whitespace) was used, its fine.
if (sizeB > 1) {
sizeB = 0
// To do: check if this works.
// Feel free to interrupt:
self.interrupt = true
effects.exit('tableRow')
effects.enter(types.lineEnding)
effects.consume(code)
effects.exit(types.lineEnding)
return headDelimiterStart
}
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code)
}
if (markdownSpace(code)) {
// To do: check if this is fine.
// effects.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok)
// State::Retry(space_or_tab(tokenizer))
return factorySpace(effects, headRowBreak, types.whitespace)(code)
}
sizeB += 1
if (seen) {
seen = false
// Header cell count.
size += 1
}
if (code === codes.verticalBar) {
effects.enter('tableCellDivider')
effects.consume(code)
effects.exit('tableCellDivider')
// Whether a delimiter was seen.
seen = true
return headRowBreak
}
// Anything else is cell data.
effects.enter(types.data)
return headRowData(code)
}
/**
* In table head row data.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowData(code) {
if (
code === codes.eof ||
code === codes.verticalBar ||
markdownLineEndingOrSpace(code)
) {
effects.exit(types.data)
return headRowBreak(code)
}
effects.consume(code)
return code === codes.backslash ? headRowEscape : headRowData
}
/**
* In table head row escape.
*
* ```markdown
* > | | a\-b |
* ^
* | | ---- |
* | | c |
* ```
*
* @type {State}
*/
function headRowEscape(code) {
if (code === codes.backslash || code === codes.verticalBar) {
effects.consume(code)
return headRowData
}
return headRowData(code)
}
/**
* Before delimiter row.
*
* ```markdown
* | | a |
* > | | - |
* ^
* | | b |
* ```
*
* @type {State}
*/
function headDelimiterStart(code) {
// Reset `interrupt`.
self.interrupt = false
// Note: in `markdown-rs`, we need to handle piercing here too.
if (self.parser.lazy[self.now().line]) {
return nok(code)
}
effects.enter('tableDelimiterRow')
// Track if weve seen a `:` or `|`.
seen = false
if (markdownSpace(code)) {
assert(self.parser.constructs.disable.null, 'expected `disabled.null`')
return factorySpace(
effects,
headDelimiterBefore,
types.linePrefix,
self.parser.constructs.disable.null.includes('codeIndented')
? undefined
: constants.tabSize
)(code)
}
return headDelimiterBefore(code)
}
/**
* Before delimiter row, after optional whitespace.
*
* Reused when a `|` is found later, to parse another cell.
*
* ```markdown
* | | a |
* > | | - |
* ^
* | | b |
* ```
*
* @type {State}
*/
function headDelimiterBefore(code) {
if (code === codes.dash || code === codes.colon) {
return headDelimiterValueBefore(code)
}
if (code === codes.verticalBar) {
seen = true
// If we start with a pipe, we open a cell marker.
effects.enter('tableCellDivider')
effects.consume(code)
effects.exit('tableCellDivider')
return headDelimiterCellBefore
}
// More whitespace / empty row not allowed at start.
return headDelimiterNok(code)
}
/**
* After `|`, before delimiter cell.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterCellBefore(code) {
if (markdownSpace(code)) {
return factorySpace(
effects,
headDelimiterValueBefore,
types.whitespace
)(code)
}
return headDelimiterValueBefore(code)
}
/**
* Before delimiter cell value.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterValueBefore(code) {
// Align: left.
if (code === codes.colon) {
sizeB += 1
seen = true
effects.enter('tableDelimiterMarker')
effects.consume(code)
effects.exit('tableDelimiterMarker')
return headDelimiterLeftAlignmentAfter
}
// Align: none.
if (code === codes.dash) {
sizeB += 1
// To do: seems weird that this *isnt* left aligned, but that state is used?
return headDelimiterLeftAlignmentAfter(code)
}
if (code === codes.eof || markdownLineEnding(code)) {
return headDelimiterCellAfter(code)
}
return headDelimiterNok(code)
}
/**
* After delimiter cell left alignment marker.
*
* ```markdown
* | | a |
* > | | :- |
* ^
* ```
*
* @type {State}
*/
function headDelimiterLeftAlignmentAfter(code) {
if (code === codes.dash) {
effects.enter('tableDelimiterFiller')
return headDelimiterFiller(code)
}
// Anything else is not ok after the left-align colon.
return headDelimiterNok(code)
}
/**
* In delimiter cell filler.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterFiller(code) {
if (code === codes.dash) {
effects.consume(code)
return headDelimiterFiller
}
// Align is `center` if it was `left`, `right` otherwise.
if (code === codes.colon) {
seen = true
effects.exit('tableDelimiterFiller')
effects.enter('tableDelimiterMarker')
effects.consume(code)
effects.exit('tableDelimiterMarker')
return headDelimiterRightAlignmentAfter
}
effects.exit('tableDelimiterFiller')
return headDelimiterRightAlignmentAfter(code)
}
/**
* After delimiter cell right alignment marker.
*
* ```markdown
* | | a |
* > | | -: |
* ^
* ```
*
* @type {State}
*/
function headDelimiterRightAlignmentAfter(code) {
if (markdownSpace(code)) {
return factorySpace(
effects,
headDelimiterCellAfter,
types.whitespace
)(code)
}
return headDelimiterCellAfter(code)
}
/**
* After delimiter cell.
*
* ```markdown
* | | a |
* > | | -: |
* ^
* ```
*
* @type {State}
*/
function headDelimiterCellAfter(code) {
if (code === codes.verticalBar) {
return headDelimiterBefore(code)
}
if (code === codes.eof || markdownLineEnding(code)) {
// Exit when:
// * there was no `:` or `|` at all (its a thematic break or setext
// underline instead)
// * the header cell count is not the delimiter cell count
if (!seen || size !== sizeB) {
return headDelimiterNok(code)
}
// Note: in markdown-rs`, a reset is needed here.
effects.exit('tableDelimiterRow')
effects.exit('tableHead')
// To do: in `markdown-rs`, resolvers need to be registered manually.
// effects.register_resolver(ResolveName::GfmTable)
return ok(code)
}
return headDelimiterNok(code)
}
/**
* In delimiter row, at a disallowed byte.
*
* ```markdown
* | | a |
* > | | x |
* ^
* ```
*
* @type {State}
*/
function headDelimiterNok(code) {
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code)
}
/**
* Before table body row.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ```
*
* @type {State}
*/
function bodyRowStart(code) {
// Note: in `markdown-rs` we need to manually take care of a prefix,
// but in `micromark-js` that is done for us, so if were here, were
// never at whitespace.
effects.enter('tableRow')
return bodyRowBreak(code)
}
/**
* At break in table body row.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ^
* ^
* ```
*
* @type {State}
*/
function bodyRowBreak(code) {
if (code === codes.verticalBar) {
effects.enter('tableCellDivider')
effects.consume(code)
effects.exit('tableCellDivider')
return bodyRowBreak
}
if (code === codes.eof || markdownLineEnding(code)) {
effects.exit('tableRow')
return ok(code)
}
if (markdownSpace(code)) {
return factorySpace(effects, bodyRowBreak, types.whitespace)(code)
}
// Anything else is cell content.
effects.enter(types.data)
return bodyRowData(code)
}
/**
* In table body row data.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ```
*
* @type {State}
*/
function bodyRowData(code) {
if (
code === codes.eof ||
code === codes.verticalBar ||
markdownLineEndingOrSpace(code)
) {
effects.exit(types.data)
return bodyRowBreak(code)
}
effects.consume(code)
return code === codes.backslash ? bodyRowEscape : bodyRowData
}
/**
* In table body row escape.
*
* ```markdown
* | | a |
* | | ---- |
* > | | b\-c |
* ^
* ```
*
* @type {State}
*/
function bodyRowEscape(code) {
if (code === codes.backslash || code === codes.verticalBar) {
effects.consume(code)
return bodyRowData
}
return bodyRowData(code)
}
}
/** @type {Resolver} */
function resolveTable(events, context) {
let index = -1
let inFirstCellAwaitingPipe = true
/** @type {RowKind} */
let rowKind = 0
/** @type {Range} */
let lastCell = [0, 0, 0, 0]
/** @type {Range} */
let cell = [0, 0, 0, 0]
let afterHeadAwaitingFirstBodyRow = false
let lastTableEnd = 0
/** @type {Token | undefined} */
let currentTable
/** @type {Token | undefined} */
let currentBody
/** @type {Token | undefined} */
let currentCell
const map = new EditMap()
while (++index < events.length) {
const event = events[index]
const token = event[1]
if (event[0] === 'enter') {
// Start of head.
if (token.type === 'tableHead') {
afterHeadAwaitingFirstBodyRow = false
// Inject previous (body end and) table end.
if (lastTableEnd !== 0) {
assert(currentTable, 'there should be a table opening')
flushTableEnd(map, context, lastTableEnd, currentTable, currentBody)
currentBody = undefined
lastTableEnd = 0
}
// Inject table start.
currentTable = {
type: 'table',
start: Object.assign({}, token.start),
// Note: correct end is set later.
end: Object.assign({}, token.end)
}
map.add(index, 0, [['enter', currentTable, context]])
} else if (
token.type === 'tableRow' ||
token.type === 'tableDelimiterRow'
) {
inFirstCellAwaitingPipe = true
currentCell = undefined
lastCell = [0, 0, 0, 0]
cell = [0, index + 1, 0, 0]
// Inject table body start.
if (afterHeadAwaitingFirstBodyRow) {
afterHeadAwaitingFirstBodyRow = false
currentBody = {
type: 'tableBody',
start: Object.assign({}, token.start),
// Note: correct end is set later.
end: Object.assign({}, token.end)
}
map.add(index, 0, [['enter', currentBody, context]])
}
rowKind = token.type === 'tableDelimiterRow' ? 2 : currentBody ? 3 : 1
}
// Cell data.
else if (
rowKind &&
(token.type === types.data ||
token.type === 'tableDelimiterMarker' ||
token.type === 'tableDelimiterFiller')
) {
inFirstCellAwaitingPipe = false
// First value in cell.
if (cell[2] === 0) {
if (lastCell[1] !== 0) {
cell[0] = cell[1]
currentCell = flushCell(
map,
context,
lastCell,
rowKind,
undefined,
currentCell
)
lastCell = [0, 0, 0, 0]
}
cell[2] = index
}
} else if (token.type === 'tableCellDivider') {
if (inFirstCellAwaitingPipe) {
inFirstCellAwaitingPipe = false
} else {
if (lastCell[1] !== 0) {
cell[0] = cell[1]
currentCell = flushCell(
map,
context,
lastCell,
rowKind,
undefined,
currentCell
)
}
lastCell = cell
cell = [lastCell[1], index, 0, 0]
}
}
}
// Exit events.
else if (token.type === 'tableHead') {
afterHeadAwaitingFirstBodyRow = true
lastTableEnd = index
} else if (
token.type === 'tableRow' ||
token.type === 'tableDelimiterRow'
) {
lastTableEnd = index
if (lastCell[1] !== 0) {
cell[0] = cell[1]
currentCell = flushCell(
map,
context,
lastCell,
rowKind,
index,
currentCell
)
} else if (cell[1] !== 0) {
currentCell = flushCell(map, context, cell, rowKind, index, currentCell)
}
rowKind = 0
} else if (
rowKind &&
(token.type === types.data ||
token.type === 'tableDelimiterMarker' ||
token.type === 'tableDelimiterFiller')
) {
cell[3] = index
}
}
if (lastTableEnd !== 0) {
assert(currentTable, 'expected table opening')
flushTableEnd(map, context, lastTableEnd, currentTable, currentBody)
}
map.consume(context.events)
// To do: move this into `html`, when events are exposed there.
// Thats what `markdown-rs` does.
// That needs updates to `mdast-util-gfm-table`.
index = -1
while (++index < context.events.length) {
const event = context.events[index]
if (event[0] === 'enter' && event[1].type === 'table') {
event[1]._align = gfmTableAlign(context.events, index)
}
}
return events
}
/**
* Generate a cell.
*
* @param {EditMap} map
* @param {Readonly<TokenizeContext>} context
* @param {Readonly<Range>} range
* @param {RowKind} rowKind
* @param {number | undefined} rowEnd
* @param {Token | undefined} previousCell
* @returns {Token | undefined}
*/
// eslint-disable-next-line max-params
function flushCell(map, context, range, rowKind, rowEnd, previousCell) {
// `markdown-rs` uses:
// rowKind === 2 ? 'tableDelimiterCell' : 'tableCell'
const groupName =
rowKind === 1
? 'tableHeader'
: rowKind === 2
? 'tableDelimiter'
: 'tableData'
// `markdown-rs` uses:
// rowKind === 2 ? 'tableDelimiterCellValue' : 'tableCellText'
const valueName = 'tableContent'
// Insert an exit for the previous cell, if there is one.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- exit
// ^^^^-- this cell
// ```
if (range[0] !== 0) {
assert(previousCell, 'expected previous cell enter')
previousCell.end = Object.assign({}, getPoint(context.events, range[0]))
map.add(range[0], 0, [['exit', previousCell, context]])
}
// Insert enter of this cell.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- enter
// ^^^^-- this cell
// ```
const now = getPoint(context.events, range[1])
previousCell = {
type: groupName,
start: Object.assign({}, now),
// Note: correct end is set later.
end: Object.assign({}, now)
}
map.add(range[1], 0, [['enter', previousCell, context]])
// Insert text start at first data start and end at last data end, and
// remove events between.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- enter
// ^-- exit
// ^^^^-- this cell
// ```
if (range[2] !== 0) {
const relatedStart = getPoint(context.events, range[2])
const relatedEnd = getPoint(context.events, range[3])
/** @type {Token} */
const valueToken = {
type: valueName,
start: Object.assign({}, relatedStart),
end: Object.assign({}, relatedEnd)
}
map.add(range[2], 0, [['enter', valueToken, context]])
assert(range[3] !== 0)
if (rowKind !== 2) {
// Fix positional info on remaining events
const start = context.events[range[2]]
const end = context.events[range[3]]
start[1].end = Object.assign({}, end[1].end)
start[1].type = types.chunkText
start[1].contentType = constants.contentTypeText
// Remove if needed.
if (range[3] > range[2] + 1) {
const a = range[2] + 1
const b = range[3] - range[2] - 1
map.add(a, b, [])
}
}
map.add(range[3] + 1, 0, [['exit', valueToken, context]])
}
// Insert an exit for the last cell, if at the row end.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- exit
// ^^^^^^-- this cell (the last one contains two “between” parts)
// ```
if (rowEnd !== undefined) {
previousCell.end = Object.assign({}, getPoint(context.events, rowEnd))
map.add(rowEnd, 0, [['exit', previousCell, context]])
previousCell = undefined
}
return previousCell
}
/**
* Generate table end (and table body end).
*
* @param {Readonly<EditMap>} map
* @param {Readonly<TokenizeContext>} context
* @param {number} index
* @param {Token} table
* @param {Token | undefined} tableBody
*/
// eslint-disable-next-line max-params
function flushTableEnd(map, context, index, table, tableBody) {
/** @type {Array<Event>} */
const exits = []
const related = getPoint(context.events, index)
if (tableBody) {
tableBody.end = Object.assign({}, related)
exits.push(['exit', tableBody, context])
}
table.end = Object.assign({}, related)
exits.push(['exit', table, context])
map.add(index + 1, 0, exits)
}
/**
* @param {Readonly<Array<Event>>} events
* @param {number} index
* @returns {Readonly<Point>}
*/
function getPoint(events, index) {
const event = events[index]
const side = event[0] === 'enter' ? 'start' : 'end'
return event[1][side]
}