/** * @typedef {import('micromark-util-types').Event} Event * @typedef {import('micromark-util-types').Extension} Extension * @typedef {import('micromark-util-types').Point} Point * @typedef {import('micromark-util-types').Resolver} Resolver * @typedef {import('micromark-util-types').State} State * @typedef {import('micromark-util-types').Token} Token * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext * @typedef {import('micromark-util-types').Tokenizer} Tokenizer */ /** * @typedef {[number, number, number, number]} Range * Cell info. * * @typedef {0 | 1 | 2 | 3} RowKind * Where we are: `1` for head row, `2` for delimiter row, `3` for body row. */ import {ok as assert} from 'devlop' import {factorySpace} from 'micromark-factory-space' import { markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character' import {codes, constants, types} from 'micromark-util-symbol' import {EditMap} from './edit-map.js' import {gfmTableAlign} from './infer.js' /** * Create an HTML extension for `micromark` to support GitHub tables syntax. * * @returns {Extension} * Extension for `micromark` that can be passed in `extensions` to enable GFM * table syntax. */ export function gfmTable() { return { flow: {null: {tokenize: tokenizeTable, resolveAll: resolveTable}} } } /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeTable(effects, ok, nok) { const self = this let size = 0 let sizeB = 0 /** @type {boolean | undefined} */ let seen return start /** * Start of a GFM table. * * If there is a valid table row or table head before, then we try to parse * another row. * Otherwise, we try to parse a head. * * ```markdown * > | | a | * ^ * | | - | * > | | b | * ^ * ``` * @type {State} */ function start(code) { let index = self.events.length - 1 while (index > -1) { const type = self.events[index][1].type if ( type === types.lineEnding || // Note: markdown-rs uses `whitespace` instead of `linePrefix` type === types.linePrefix ) index-- else break } const tail = index > -1 ? self.events[index][1].type : null const next = tail === 'tableHead' || tail === 'tableRow' ? bodyRowStart : headRowBefore // Don’t allow lazy body rows. if (next === bodyRowStart && self.parser.lazy[self.now().line]) { return nok(code) } return next(code) } /** * Before table head row. * * ```markdown * > | | a | * ^ * | | - | * | | b | * ``` * * @type {State} */ function headRowBefore(code) { effects.enter('tableHead') effects.enter('tableRow') return headRowStart(code) } /** * Before table head row, after whitespace. * * ```markdown * > | | a | * ^ * | | - | * | | b | * ``` * * @type {State} */ function headRowStart(code) { if (code === codes.verticalBar) { return headRowBreak(code) } // To do: micromark-js should let us parse our own whitespace in extensions, // like `markdown-rs`: // // ```js // // 4+ spaces. // if (markdownSpace(code)) { // return nok(code) // } // ``` seen = true // Count the first character, that isn’t a pipe, double. sizeB += 1 return headRowBreak(code) } /** * At break in table head row. * * ```markdown * > | | a | * ^ * ^ * ^ * | | - | * | | b | * ``` * * @type {State} */ function headRowBreak(code) { if (code === codes.eof) { // Note: in `markdown-rs`, we need to reset, in `micromark-js` we don‘t. return nok(code) } if (markdownLineEnding(code)) { // If anything other than one pipe (ignoring whitespace) was used, it’s fine. if (sizeB > 1) { sizeB = 0 // To do: check if this works. // Feel free to interrupt: self.interrupt = true effects.exit('tableRow') effects.enter(types.lineEnding) effects.consume(code) effects.exit(types.lineEnding) return headDelimiterStart } // Note: in `markdown-rs`, we need to reset, in `micromark-js` we don‘t. return nok(code) } if (markdownSpace(code)) { // To do: check if this is fine. // effects.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok) // State::Retry(space_or_tab(tokenizer)) return factorySpace(effects, headRowBreak, types.whitespace)(code) } sizeB += 1 if (seen) { seen = false // Header cell count. size += 1 } if (code === codes.verticalBar) { effects.enter('tableCellDivider') effects.consume(code) effects.exit('tableCellDivider') // Whether a delimiter was seen. seen = true return headRowBreak } // Anything else is cell data. effects.enter(types.data) return headRowData(code) } /** * In table head row data. * * ```markdown * > | | a | * ^ * | | - | * | | b | * ``` * * @type {State} */ function headRowData(code) { if ( code === codes.eof || code === codes.verticalBar || markdownLineEndingOrSpace(code) ) { effects.exit(types.data) return headRowBreak(code) } effects.consume(code) return code === codes.backslash ? headRowEscape : headRowData } /** * In table head row escape. * * ```markdown * > | | a\-b | * ^ * | | ---- | * | | c | * ``` * * @type {State} */ function headRowEscape(code) { if (code === codes.backslash || code === codes.verticalBar) { effects.consume(code) return headRowData } return headRowData(code) } /** * Before delimiter row. * * ```markdown * | | a | * > | | - | * ^ * | | b | * ``` * * @type {State} */ function headDelimiterStart(code) { // Reset `interrupt`. self.interrupt = false // Note: in `markdown-rs`, we need to handle piercing here too. if (self.parser.lazy[self.now().line]) { return nok(code) } effects.enter('tableDelimiterRow') // Track if we’ve seen a `:` or `|`. seen = false if (markdownSpace(code)) { assert(self.parser.constructs.disable.null, 'expected `disabled.null`') return factorySpace( effects, headDelimiterBefore, types.linePrefix, self.parser.constructs.disable.null.includes('codeIndented') ? undefined : constants.tabSize )(code) } return headDelimiterBefore(code) } /** * Before delimiter row, after optional whitespace. * * Reused when a `|` is found later, to parse another cell. * * ```markdown * | | a | * > | | - | * ^ * | | b | * ``` * * @type {State} */ function headDelimiterBefore(code) { if (code === codes.dash || code === codes.colon) { return headDelimiterValueBefore(code) } if (code === codes.verticalBar) { seen = true // If we start with a pipe, we open a cell marker. effects.enter('tableCellDivider') effects.consume(code) effects.exit('tableCellDivider') return headDelimiterCellBefore } // More whitespace / empty row not allowed at start. return headDelimiterNok(code) } /** * After `|`, before delimiter cell. * * ```markdown * | | a | * > | | - | * ^ * ``` * * @type {State} */ function headDelimiterCellBefore(code) { if (markdownSpace(code)) { return factorySpace( effects, headDelimiterValueBefore, types.whitespace )(code) } return headDelimiterValueBefore(code) } /** * Before delimiter cell value. * * ```markdown * | | a | * > | | - | * ^ * ``` * * @type {State} */ function headDelimiterValueBefore(code) { // Align: left. if (code === codes.colon) { sizeB += 1 seen = true effects.enter('tableDelimiterMarker') effects.consume(code) effects.exit('tableDelimiterMarker') return headDelimiterLeftAlignmentAfter } // Align: none. if (code === codes.dash) { sizeB += 1 // To do: seems weird that this *isn’t* left aligned, but that state is used? return headDelimiterLeftAlignmentAfter(code) } if (code === codes.eof || markdownLineEnding(code)) { return headDelimiterCellAfter(code) } return headDelimiterNok(code) } /** * After delimiter cell left alignment marker. * * ```markdown * | | a | * > | | :- | * ^ * ``` * * @type {State} */ function headDelimiterLeftAlignmentAfter(code) { if (code === codes.dash) { effects.enter('tableDelimiterFiller') return headDelimiterFiller(code) } // Anything else is not ok after the left-align colon. return headDelimiterNok(code) } /** * In delimiter cell filler. * * ```markdown * | | a | * > | | - | * ^ * ``` * * @type {State} */ function headDelimiterFiller(code) { if (code === codes.dash) { effects.consume(code) return headDelimiterFiller } // Align is `center` if it was `left`, `right` otherwise. if (code === codes.colon) { seen = true effects.exit('tableDelimiterFiller') effects.enter('tableDelimiterMarker') effects.consume(code) effects.exit('tableDelimiterMarker') return headDelimiterRightAlignmentAfter } effects.exit('tableDelimiterFiller') return headDelimiterRightAlignmentAfter(code) } /** * After delimiter cell right alignment marker. * * ```markdown * | | a | * > | | -: | * ^ * ``` * * @type {State} */ function headDelimiterRightAlignmentAfter(code) { if (markdownSpace(code)) { return factorySpace( effects, headDelimiterCellAfter, types.whitespace )(code) } return headDelimiterCellAfter(code) } /** * After delimiter cell. * * ```markdown * | | a | * > | | -: | * ^ * ``` * * @type {State} */ function headDelimiterCellAfter(code) { if (code === codes.verticalBar) { return headDelimiterBefore(code) } if (code === codes.eof || markdownLineEnding(code)) { // Exit when: // * there was no `:` or `|` at all (it’s a thematic break or setext // underline instead) // * the header cell count is not the delimiter cell count if (!seen || size !== sizeB) { return headDelimiterNok(code) } // Note: in markdown-rs`, a reset is needed here. effects.exit('tableDelimiterRow') effects.exit('tableHead') // To do: in `markdown-rs`, resolvers need to be registered manually. // effects.register_resolver(ResolveName::GfmTable) return ok(code) } return headDelimiterNok(code) } /** * In delimiter row, at a disallowed byte. * * ```markdown * | | a | * > | | x | * ^ * ``` * * @type {State} */ function headDelimiterNok(code) { // Note: in `markdown-rs`, we need to reset, in `micromark-js` we don‘t. return nok(code) } /** * Before table body row. * * ```markdown * | | a | * | | - | * > | | b | * ^ * ``` * * @type {State} */ function bodyRowStart(code) { // Note: in `markdown-rs` we need to manually take care of a prefix, // but in `micromark-js` that is done for us, so if we’re here, we’re // never at whitespace. effects.enter('tableRow') return bodyRowBreak(code) } /** * At break in table body row. * * ```markdown * | | a | * | | - | * > | | b | * ^ * ^ * ^ * ``` * * @type {State} */ function bodyRowBreak(code) { if (code === codes.verticalBar) { effects.enter('tableCellDivider') effects.consume(code) effects.exit('tableCellDivider') return bodyRowBreak } if (code === codes.eof || markdownLineEnding(code)) { effects.exit('tableRow') return ok(code) } if (markdownSpace(code)) { return factorySpace(effects, bodyRowBreak, types.whitespace)(code) } // Anything else is cell content. effects.enter(types.data) return bodyRowData(code) } /** * In table body row data. * * ```markdown * | | a | * | | - | * > | | b | * ^ * ``` * * @type {State} */ function bodyRowData(code) { if ( code === codes.eof || code === codes.verticalBar || markdownLineEndingOrSpace(code) ) { effects.exit(types.data) return bodyRowBreak(code) } effects.consume(code) return code === codes.backslash ? bodyRowEscape : bodyRowData } /** * In table body row escape. * * ```markdown * | | a | * | | ---- | * > | | b\-c | * ^ * ``` * * @type {State} */ function bodyRowEscape(code) { if (code === codes.backslash || code === codes.verticalBar) { effects.consume(code) return bodyRowData } return bodyRowData(code) } } /** @type {Resolver} */ function resolveTable(events, context) { let index = -1 let inFirstCellAwaitingPipe = true /** @type {RowKind} */ let rowKind = 0 /** @type {Range} */ let lastCell = [0, 0, 0, 0] /** @type {Range} */ let cell = [0, 0, 0, 0] let afterHeadAwaitingFirstBodyRow = false let lastTableEnd = 0 /** @type {Token | undefined} */ let currentTable /** @type {Token | undefined} */ let currentBody /** @type {Token | undefined} */ let currentCell const map = new EditMap() while (++index < events.length) { const event = events[index] const token = event[1] if (event[0] === 'enter') { // Start of head. if (token.type === 'tableHead') { afterHeadAwaitingFirstBodyRow = false // Inject previous (body end and) table end. if (lastTableEnd !== 0) { assert(currentTable, 'there should be a table opening') flushTableEnd(map, context, lastTableEnd, currentTable, currentBody) currentBody = undefined lastTableEnd = 0 } // Inject table start. currentTable = { type: 'table', start: Object.assign({}, token.start), // Note: correct end is set later. end: Object.assign({}, token.end) } map.add(index, 0, [['enter', currentTable, context]]) } else if ( token.type === 'tableRow' || token.type === 'tableDelimiterRow' ) { inFirstCellAwaitingPipe = true currentCell = undefined lastCell = [0, 0, 0, 0] cell = [0, index + 1, 0, 0] // Inject table body start. if (afterHeadAwaitingFirstBodyRow) { afterHeadAwaitingFirstBodyRow = false currentBody = { type: 'tableBody', start: Object.assign({}, token.start), // Note: correct end is set later. end: Object.assign({}, token.end) } map.add(index, 0, [['enter', currentBody, context]]) } rowKind = token.type === 'tableDelimiterRow' ? 2 : currentBody ? 3 : 1 } // Cell data. else if ( rowKind && (token.type === types.data || token.type === 'tableDelimiterMarker' || token.type === 'tableDelimiterFiller') ) { inFirstCellAwaitingPipe = false // First value in cell. if (cell[2] === 0) { if (lastCell[1] !== 0) { cell[0] = cell[1] currentCell = flushCell( map, context, lastCell, rowKind, undefined, currentCell ) lastCell = [0, 0, 0, 0] } cell[2] = index } } else if (token.type === 'tableCellDivider') { if (inFirstCellAwaitingPipe) { inFirstCellAwaitingPipe = false } else { if (lastCell[1] !== 0) { cell[0] = cell[1] currentCell = flushCell( map, context, lastCell, rowKind, undefined, currentCell ) } lastCell = cell cell = [lastCell[1], index, 0, 0] } } } // Exit events. else if (token.type === 'tableHead') { afterHeadAwaitingFirstBodyRow = true lastTableEnd = index } else if ( token.type === 'tableRow' || token.type === 'tableDelimiterRow' ) { lastTableEnd = index if (lastCell[1] !== 0) { cell[0] = cell[1] currentCell = flushCell( map, context, lastCell, rowKind, index, currentCell ) } else if (cell[1] !== 0) { currentCell = flushCell(map, context, cell, rowKind, index, currentCell) } rowKind = 0 } else if ( rowKind && (token.type === types.data || token.type === 'tableDelimiterMarker' || token.type === 'tableDelimiterFiller') ) { cell[3] = index } } if (lastTableEnd !== 0) { assert(currentTable, 'expected table opening') flushTableEnd(map, context, lastTableEnd, currentTable, currentBody) } map.consume(context.events) // To do: move this into `html`, when events are exposed there. // That’s what `markdown-rs` does. // That needs updates to `mdast-util-gfm-table`. index = -1 while (++index < context.events.length) { const event = context.events[index] if (event[0] === 'enter' && event[1].type === 'table') { event[1]._align = gfmTableAlign(context.events, index) } } return events } /** * Generate a cell. * * @param {EditMap} map * @param {Readonly} context * @param {Readonly} range * @param {RowKind} rowKind * @param {number | undefined} rowEnd * @param {Token | undefined} previousCell * @returns {Token | undefined} */ // eslint-disable-next-line max-params function flushCell(map, context, range, rowKind, rowEnd, previousCell) { // `markdown-rs` uses: // rowKind === 2 ? 'tableDelimiterCell' : 'tableCell' const groupName = rowKind === 1 ? 'tableHeader' : rowKind === 2 ? 'tableDelimiter' : 'tableData' // `markdown-rs` uses: // rowKind === 2 ? 'tableDelimiterCellValue' : 'tableCellText' const valueName = 'tableContent' // Insert an exit for the previous cell, if there is one. // // ```markdown // > | | aa | bb | cc | // ^-- exit // ^^^^-- this cell // ``` if (range[0] !== 0) { assert(previousCell, 'expected previous cell enter') previousCell.end = Object.assign({}, getPoint(context.events, range[0])) map.add(range[0], 0, [['exit', previousCell, context]]) } // Insert enter of this cell. // // ```markdown // > | | aa | bb | cc | // ^-- enter // ^^^^-- this cell // ``` const now = getPoint(context.events, range[1]) previousCell = { type: groupName, start: Object.assign({}, now), // Note: correct end is set later. end: Object.assign({}, now) } map.add(range[1], 0, [['enter', previousCell, context]]) // Insert text start at first data start and end at last data end, and // remove events between. // // ```markdown // > | | aa | bb | cc | // ^-- enter // ^-- exit // ^^^^-- this cell // ``` if (range[2] !== 0) { const relatedStart = getPoint(context.events, range[2]) const relatedEnd = getPoint(context.events, range[3]) /** @type {Token} */ const valueToken = { type: valueName, start: Object.assign({}, relatedStart), end: Object.assign({}, relatedEnd) } map.add(range[2], 0, [['enter', valueToken, context]]) assert(range[3] !== 0) if (rowKind !== 2) { // Fix positional info on remaining events const start = context.events[range[2]] const end = context.events[range[3]] start[1].end = Object.assign({}, end[1].end) start[1].type = types.chunkText start[1].contentType = constants.contentTypeText // Remove if needed. if (range[3] > range[2] + 1) { const a = range[2] + 1 const b = range[3] - range[2] - 1 map.add(a, b, []) } } map.add(range[3] + 1, 0, [['exit', valueToken, context]]) } // Insert an exit for the last cell, if at the row end. // // ```markdown // > | | aa | bb | cc | // ^-- exit // ^^^^^^-- this cell (the last one contains two “between” parts) // ``` if (rowEnd !== undefined) { previousCell.end = Object.assign({}, getPoint(context.events, rowEnd)) map.add(rowEnd, 0, [['exit', previousCell, context]]) previousCell = undefined } return previousCell } /** * Generate table end (and table body end). * * @param {Readonly} map * @param {Readonly} context * @param {number} index * @param {Token} table * @param {Token | undefined} tableBody */ // eslint-disable-next-line max-params function flushTableEnd(map, context, index, table, tableBody) { /** @type {Array} */ const exits = [] const related = getPoint(context.events, index) if (tableBody) { tableBody.end = Object.assign({}, related) exits.push(['exit', tableBody, context]) } table.end = Object.assign({}, related) exits.push(['exit', table, context]) map.add(index + 1, 0, exits) } /** * @param {Readonly>} events * @param {number} index * @returns {Readonly} */ function getPoint(events, index) { const event = events[index] const side = event[0] === 'enter' ? 'start' : 'end' return event[1][side] }