site/node_modules/micromark-extension-gfm-table/lib/syntax.js

884 lines
21 KiB
JavaScript
Raw Permalink Normal View History

2024-10-14 06:09:33 +00:00
/**
* @typedef {import('micromark-util-types').Event} Event
* @typedef {import('micromark-util-types').Extension} Extension
* @typedef {import('micromark-util-types').Point} Point
* @typedef {import('micromark-util-types').Resolver} Resolver
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').Token} Token
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
/**
* @typedef {[number, number, number, number]} Range
* Cell info.
*
* @typedef {0 | 1 | 2 | 3} RowKind
* Where we are: `1` for head row, `2` for delimiter row, `3` for body row.
*/
import {factorySpace} from 'micromark-factory-space'
import {
markdownLineEnding,
markdownLineEndingOrSpace,
markdownSpace
} from 'micromark-util-character'
import {EditMap} from './edit-map.js'
import {gfmTableAlign} from './infer.js'
/**
* Create an HTML extension for `micromark` to support GitHub tables syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* table syntax.
*/
export function gfmTable() {
return {
flow: {
null: {
tokenize: tokenizeTable,
resolveAll: resolveTable
}
}
}
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTable(effects, ok, nok) {
const self = this
let size = 0
let sizeB = 0
/** @type {boolean | undefined} */
let seen
return start
/**
* Start of a GFM table.
*
* If there is a valid table row or table head before, then we try to parse
* another row.
* Otherwise, we try to parse a head.
*
* ```markdown
* > | | a |
* ^
* | | - |
* > | | b |
* ^
* ```
* @type {State}
*/
function start(code) {
let index = self.events.length - 1
while (index > -1) {
const type = self.events[index][1].type
if (
type === 'lineEnding' ||
// Note: markdown-rs uses `whitespace` instead of `linePrefix`
type === 'linePrefix'
)
index--
else break
}
const tail = index > -1 ? self.events[index][1].type : null
const next =
tail === 'tableHead' || tail === 'tableRow' ? bodyRowStart : headRowBefore
// Dont allow lazy body rows.
if (next === bodyRowStart && self.parser.lazy[self.now().line]) {
return nok(code)
}
return next(code)
}
/**
* Before table head row.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowBefore(code) {
effects.enter('tableHead')
effects.enter('tableRow')
return headRowStart(code)
}
/**
* Before table head row, after whitespace.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowStart(code) {
if (code === 124) {
return headRowBreak(code)
}
// To do: micromark-js should let us parse our own whitespace in extensions,
// like `markdown-rs`:
//
// ```js
// // 4+ spaces.
// if (markdownSpace(code)) {
// return nok(code)
// }
// ```
seen = true
// Count the first character, that isnt a pipe, double.
sizeB += 1
return headRowBreak(code)
}
/**
* At break in table head row.
*
* ```markdown
* > | | a |
* ^
* ^
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowBreak(code) {
if (code === null) {
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code)
}
if (markdownLineEnding(code)) {
// If anything other than one pipe (ignoring whitespace) was used, its fine.
if (sizeB > 1) {
sizeB = 0
// To do: check if this works.
// Feel free to interrupt:
self.interrupt = true
effects.exit('tableRow')
effects.enter('lineEnding')
effects.consume(code)
effects.exit('lineEnding')
return headDelimiterStart
}
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code)
}
if (markdownSpace(code)) {
// To do: check if this is fine.
// effects.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok)
// State::Retry(space_or_tab(tokenizer))
return factorySpace(effects, headRowBreak, 'whitespace')(code)
}
sizeB += 1
if (seen) {
seen = false
// Header cell count.
size += 1
}
if (code === 124) {
effects.enter('tableCellDivider')
effects.consume(code)
effects.exit('tableCellDivider')
// Whether a delimiter was seen.
seen = true
return headRowBreak
}
// Anything else is cell data.
effects.enter('data')
return headRowData(code)
}
/**
* In table head row data.
*
* ```markdown
* > | | a |
* ^
* | | - |
* | | b |
* ```
*
* @type {State}
*/
function headRowData(code) {
if (code === null || code === 124 || markdownLineEndingOrSpace(code)) {
effects.exit('data')
return headRowBreak(code)
}
effects.consume(code)
return code === 92 ? headRowEscape : headRowData
}
/**
* In table head row escape.
*
* ```markdown
* > | | a\-b |
* ^
* | | ---- |
* | | c |
* ```
*
* @type {State}
*/
function headRowEscape(code) {
if (code === 92 || code === 124) {
effects.consume(code)
return headRowData
}
return headRowData(code)
}
/**
* Before delimiter row.
*
* ```markdown
* | | a |
* > | | - |
* ^
* | | b |
* ```
*
* @type {State}
*/
function headDelimiterStart(code) {
// Reset `interrupt`.
self.interrupt = false
// Note: in `markdown-rs`, we need to handle piercing here too.
if (self.parser.lazy[self.now().line]) {
return nok(code)
}
effects.enter('tableDelimiterRow')
// Track if weve seen a `:` or `|`.
seen = false
if (markdownSpace(code)) {
return factorySpace(
effects,
headDelimiterBefore,
'linePrefix',
self.parser.constructs.disable.null.includes('codeIndented')
? undefined
: 4
)(code)
}
return headDelimiterBefore(code)
}
/**
* Before delimiter row, after optional whitespace.
*
* Reused when a `|` is found later, to parse another cell.
*
* ```markdown
* | | a |
* > | | - |
* ^
* | | b |
* ```
*
* @type {State}
*/
function headDelimiterBefore(code) {
if (code === 45 || code === 58) {
return headDelimiterValueBefore(code)
}
if (code === 124) {
seen = true
// If we start with a pipe, we open a cell marker.
effects.enter('tableCellDivider')
effects.consume(code)
effects.exit('tableCellDivider')
return headDelimiterCellBefore
}
// More whitespace / empty row not allowed at start.
return headDelimiterNok(code)
}
/**
* After `|`, before delimiter cell.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterCellBefore(code) {
if (markdownSpace(code)) {
return factorySpace(effects, headDelimiterValueBefore, 'whitespace')(code)
}
return headDelimiterValueBefore(code)
}
/**
* Before delimiter cell value.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterValueBefore(code) {
// Align: left.
if (code === 58) {
sizeB += 1
seen = true
effects.enter('tableDelimiterMarker')
effects.consume(code)
effects.exit('tableDelimiterMarker')
return headDelimiterLeftAlignmentAfter
}
// Align: none.
if (code === 45) {
sizeB += 1
// To do: seems weird that this *isnt* left aligned, but that state is used?
return headDelimiterLeftAlignmentAfter(code)
}
if (code === null || markdownLineEnding(code)) {
return headDelimiterCellAfter(code)
}
return headDelimiterNok(code)
}
/**
* After delimiter cell left alignment marker.
*
* ```markdown
* | | a |
* > | | :- |
* ^
* ```
*
* @type {State}
*/
function headDelimiterLeftAlignmentAfter(code) {
if (code === 45) {
effects.enter('tableDelimiterFiller')
return headDelimiterFiller(code)
}
// Anything else is not ok after the left-align colon.
return headDelimiterNok(code)
}
/**
* In delimiter cell filler.
*
* ```markdown
* | | a |
* > | | - |
* ^
* ```
*
* @type {State}
*/
function headDelimiterFiller(code) {
if (code === 45) {
effects.consume(code)
return headDelimiterFiller
}
// Align is `center` if it was `left`, `right` otherwise.
if (code === 58) {
seen = true
effects.exit('tableDelimiterFiller')
effects.enter('tableDelimiterMarker')
effects.consume(code)
effects.exit('tableDelimiterMarker')
return headDelimiterRightAlignmentAfter
}
effects.exit('tableDelimiterFiller')
return headDelimiterRightAlignmentAfter(code)
}
/**
* After delimiter cell right alignment marker.
*
* ```markdown
* | | a |
* > | | -: |
* ^
* ```
*
* @type {State}
*/
function headDelimiterRightAlignmentAfter(code) {
if (markdownSpace(code)) {
return factorySpace(effects, headDelimiterCellAfter, 'whitespace')(code)
}
return headDelimiterCellAfter(code)
}
/**
* After delimiter cell.
*
* ```markdown
* | | a |
* > | | -: |
* ^
* ```
*
* @type {State}
*/
function headDelimiterCellAfter(code) {
if (code === 124) {
return headDelimiterBefore(code)
}
if (code === null || markdownLineEnding(code)) {
// Exit when:
// * there was no `:` or `|` at all (its a thematic break or setext
// underline instead)
// * the header cell count is not the delimiter cell count
if (!seen || size !== sizeB) {
return headDelimiterNok(code)
}
// Note: in markdown-rs`, a reset is needed here.
effects.exit('tableDelimiterRow')
effects.exit('tableHead')
// To do: in `markdown-rs`, resolvers need to be registered manually.
// effects.register_resolver(ResolveName::GfmTable)
return ok(code)
}
return headDelimiterNok(code)
}
/**
* In delimiter row, at a disallowed byte.
*
* ```markdown
* | | a |
* > | | x |
* ^
* ```
*
* @type {State}
*/
function headDelimiterNok(code) {
// Note: in `markdown-rs`, we need to reset, in `micromark-js` we dont.
return nok(code)
}
/**
* Before table body row.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ```
*
* @type {State}
*/
function bodyRowStart(code) {
// Note: in `markdown-rs` we need to manually take care of a prefix,
// but in `micromark-js` that is done for us, so if were here, were
// never at whitespace.
effects.enter('tableRow')
return bodyRowBreak(code)
}
/**
* At break in table body row.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ^
* ^
* ```
*
* @type {State}
*/
function bodyRowBreak(code) {
if (code === 124) {
effects.enter('tableCellDivider')
effects.consume(code)
effects.exit('tableCellDivider')
return bodyRowBreak
}
if (code === null || markdownLineEnding(code)) {
effects.exit('tableRow')
return ok(code)
}
if (markdownSpace(code)) {
return factorySpace(effects, bodyRowBreak, 'whitespace')(code)
}
// Anything else is cell content.
effects.enter('data')
return bodyRowData(code)
}
/**
* In table body row data.
*
* ```markdown
* | | a |
* | | - |
* > | | b |
* ^
* ```
*
* @type {State}
*/
function bodyRowData(code) {
if (code === null || code === 124 || markdownLineEndingOrSpace(code)) {
effects.exit('data')
return bodyRowBreak(code)
}
effects.consume(code)
return code === 92 ? bodyRowEscape : bodyRowData
}
/**
* In table body row escape.
*
* ```markdown
* | | a |
* | | ---- |
* > | | b\-c |
* ^
* ```
*
* @type {State}
*/
function bodyRowEscape(code) {
if (code === 92 || code === 124) {
effects.consume(code)
return bodyRowData
}
return bodyRowData(code)
}
}
/** @type {Resolver} */
function resolveTable(events, context) {
let index = -1
let inFirstCellAwaitingPipe = true
/** @type {RowKind} */
let rowKind = 0
/** @type {Range} */
let lastCell = [0, 0, 0, 0]
/** @type {Range} */
let cell = [0, 0, 0, 0]
let afterHeadAwaitingFirstBodyRow = false
let lastTableEnd = 0
/** @type {Token | undefined} */
let currentTable
/** @type {Token | undefined} */
let currentBody
/** @type {Token | undefined} */
let currentCell
const map = new EditMap()
while (++index < events.length) {
const event = events[index]
const token = event[1]
if (event[0] === 'enter') {
// Start of head.
if (token.type === 'tableHead') {
afterHeadAwaitingFirstBodyRow = false
// Inject previous (body end and) table end.
if (lastTableEnd !== 0) {
flushTableEnd(map, context, lastTableEnd, currentTable, currentBody)
currentBody = undefined
lastTableEnd = 0
}
// Inject table start.
currentTable = {
type: 'table',
start: Object.assign({}, token.start),
// Note: correct end is set later.
end: Object.assign({}, token.end)
}
map.add(index, 0, [['enter', currentTable, context]])
} else if (
token.type === 'tableRow' ||
token.type === 'tableDelimiterRow'
) {
inFirstCellAwaitingPipe = true
currentCell = undefined
lastCell = [0, 0, 0, 0]
cell = [0, index + 1, 0, 0]
// Inject table body start.
if (afterHeadAwaitingFirstBodyRow) {
afterHeadAwaitingFirstBodyRow = false
currentBody = {
type: 'tableBody',
start: Object.assign({}, token.start),
// Note: correct end is set later.
end: Object.assign({}, token.end)
}
map.add(index, 0, [['enter', currentBody, context]])
}
rowKind = token.type === 'tableDelimiterRow' ? 2 : currentBody ? 3 : 1
}
// Cell data.
else if (
rowKind &&
(token.type === 'data' ||
token.type === 'tableDelimiterMarker' ||
token.type === 'tableDelimiterFiller')
) {
inFirstCellAwaitingPipe = false
// First value in cell.
if (cell[2] === 0) {
if (lastCell[1] !== 0) {
cell[0] = cell[1]
currentCell = flushCell(
map,
context,
lastCell,
rowKind,
undefined,
currentCell
)
lastCell = [0, 0, 0, 0]
}
cell[2] = index
}
} else if (token.type === 'tableCellDivider') {
if (inFirstCellAwaitingPipe) {
inFirstCellAwaitingPipe = false
} else {
if (lastCell[1] !== 0) {
cell[0] = cell[1]
currentCell = flushCell(
map,
context,
lastCell,
rowKind,
undefined,
currentCell
)
}
lastCell = cell
cell = [lastCell[1], index, 0, 0]
}
}
}
// Exit events.
else if (token.type === 'tableHead') {
afterHeadAwaitingFirstBodyRow = true
lastTableEnd = index
} else if (
token.type === 'tableRow' ||
token.type === 'tableDelimiterRow'
) {
lastTableEnd = index
if (lastCell[1] !== 0) {
cell[0] = cell[1]
currentCell = flushCell(
map,
context,
lastCell,
rowKind,
index,
currentCell
)
} else if (cell[1] !== 0) {
currentCell = flushCell(map, context, cell, rowKind, index, currentCell)
}
rowKind = 0
} else if (
rowKind &&
(token.type === 'data' ||
token.type === 'tableDelimiterMarker' ||
token.type === 'tableDelimiterFiller')
) {
cell[3] = index
}
}
if (lastTableEnd !== 0) {
flushTableEnd(map, context, lastTableEnd, currentTable, currentBody)
}
map.consume(context.events)
// To do: move this into `html`, when events are exposed there.
// Thats what `markdown-rs` does.
// That needs updates to `mdast-util-gfm-table`.
index = -1
while (++index < context.events.length) {
const event = context.events[index]
if (event[0] === 'enter' && event[1].type === 'table') {
event[1]._align = gfmTableAlign(context.events, index)
}
}
return events
}
/**
* Generate a cell.
*
* @param {EditMap} map
* @param {Readonly<TokenizeContext>} context
* @param {Readonly<Range>} range
* @param {RowKind} rowKind
* @param {number | undefined} rowEnd
* @param {Token | undefined} previousCell
* @returns {Token | undefined}
*/
// eslint-disable-next-line max-params
function flushCell(map, context, range, rowKind, rowEnd, previousCell) {
// `markdown-rs` uses:
// rowKind === 2 ? 'tableDelimiterCell' : 'tableCell'
const groupName =
rowKind === 1
? 'tableHeader'
: rowKind === 2
? 'tableDelimiter'
: 'tableData'
// `markdown-rs` uses:
// rowKind === 2 ? 'tableDelimiterCellValue' : 'tableCellText'
const valueName = 'tableContent'
// Insert an exit for the previous cell, if there is one.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- exit
// ^^^^-- this cell
// ```
if (range[0] !== 0) {
previousCell.end = Object.assign({}, getPoint(context.events, range[0]))
map.add(range[0], 0, [['exit', previousCell, context]])
}
// Insert enter of this cell.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- enter
// ^^^^-- this cell
// ```
const now = getPoint(context.events, range[1])
previousCell = {
type: groupName,
start: Object.assign({}, now),
// Note: correct end is set later.
end: Object.assign({}, now)
}
map.add(range[1], 0, [['enter', previousCell, context]])
// Insert text start at first data start and end at last data end, and
// remove events between.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- enter
// ^-- exit
// ^^^^-- this cell
// ```
if (range[2] !== 0) {
const relatedStart = getPoint(context.events, range[2])
const relatedEnd = getPoint(context.events, range[3])
/** @type {Token} */
const valueToken = {
type: valueName,
start: Object.assign({}, relatedStart),
end: Object.assign({}, relatedEnd)
}
map.add(range[2], 0, [['enter', valueToken, context]])
if (rowKind !== 2) {
// Fix positional info on remaining events
const start = context.events[range[2]]
const end = context.events[range[3]]
start[1].end = Object.assign({}, end[1].end)
start[1].type = 'chunkText'
start[1].contentType = 'text'
// Remove if needed.
if (range[3] > range[2] + 1) {
const a = range[2] + 1
const b = range[3] - range[2] - 1
map.add(a, b, [])
}
}
map.add(range[3] + 1, 0, [['exit', valueToken, context]])
}
// Insert an exit for the last cell, if at the row end.
//
// ```markdown
// > | | aa | bb | cc |
// ^-- exit
// ^^^^^^-- this cell (the last one contains two “between” parts)
// ```
if (rowEnd !== undefined) {
previousCell.end = Object.assign({}, getPoint(context.events, rowEnd))
map.add(rowEnd, 0, [['exit', previousCell, context]])
previousCell = undefined
}
return previousCell
}
/**
* Generate table end (and table body end).
*
* @param {Readonly<EditMap>} map
* @param {Readonly<TokenizeContext>} context
* @param {number} index
* @param {Token} table
* @param {Token | undefined} tableBody
*/
// eslint-disable-next-line max-params
function flushTableEnd(map, context, index, table, tableBody) {
/** @type {Array<Event>} */
const exits = []
const related = getPoint(context.events, index)
if (tableBody) {
tableBody.end = Object.assign({}, related)
exits.push(['exit', tableBody, context])
}
table.end = Object.assign({}, related)
exits.push(['exit', table, context])
map.add(index + 1, 0, exits)
}
/**
* @param {Readonly<Array<Event>>} events
* @param {number} index
* @returns {Readonly<Point>}
*/
function getPoint(events, index) {
const event = events[index]
const side = event[0] === 'enter' ? 'start' : 'end'
return event[1][side]
}