site/node_modules/micromark-extension-gfm-table/dev/lib/syntax.js

/**
 * @typedef {import('micromark-util-types').Event} Event
 * @typedef {import('micromark-util-types').Extension} Extension
 * @typedef {import('micromark-util-types').Point} Point
 * @typedef {import('micromark-util-types').Resolver} Resolver
 * @typedef {import('micromark-util-types').State} State
 * @typedef {import('micromark-util-types').Token} Token
 * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
 * @typedef {import('micromark-util-types').Tokenizer} Tokenizer
 */

/**
 * @typedef {[number, number, number, number]} Range
 *   Cell info.
 *
 * @typedef {0 | 1 | 2 | 3} RowKind
 *   Where we are: `1` for head row, `2` for delimiter row, `3` for body row.
 */

import {ok as assert} from 'devlop'
import {factorySpace} from 'micromark-factory-space'
import {
  markdownLineEnding,
  markdownLineEndingOrSpace,
  markdownSpace
} from 'micromark-util-character'
import {codes, constants, types} from 'micromark-util-symbol'
import {EditMap} from './edit-map.js'
import {gfmTableAlign} from './infer.js'

/**
 * Create an HTML extension for `micromark` to support GitHub tables syntax.
 *
 * @returns {Extension}
 *   Extension for `micromark` that can be passed in `extensions` to enable GFM
 *   table syntax.
 */
export function gfmTable() {
  return {
    flow: {null: {tokenize: tokenizeTable, resolveAll: resolveTable}}
  }
}

/**
 * @this {TokenizeContext}
 * @type {Tokenizer}
 */
function tokenizeTable(effects, ok, nok) {
  const self = this
  let size = 0
  let sizeB = 0
  /** @type {boolean | undefined} */
  let seen

  return start

  /**
   * Start of a GFM table.
   *
   * If there is a valid table row or table head before, then we try to parse
   * another row.
   * Otherwise, we try to parse a head.
   *
   * ```markdown
   * > | | a |
   *     ^
   *   | | - |
   * > | | b |
   *     ^
   * ```
   * @type {State}
   */
  function start(code) {
    let index = self.events.length - 1

    while (index > -1) {
      const type = self.events[index][1].type
      if (
        type === types.lineEnding ||
        // Note: markdown-rs uses `whitespace` instead of `linePrefix`
        type === types.linePrefix
      )
        index--
      else break
    }

    const tail = index > -1 ? self.events[index][1].type : null

    const next =
      tail === 'tableHead' || tail === 'tableRow' ? bodyRowStart : headRowBefore

    // Don’t allow lazy body rows.
    if (next === bodyRowStart && self.parser.lazy[self.now().line]) {
      return nok(code)
    }

    return next(code)
  }

  /**
   * Before table head row.
   *
   * ```markdown
   * > | | a |
   *     ^
   *   | | - |
   *   | | b |
   * ```
   *
   * @type {State}
   */
  function headRowBefore(code) {
    effects.enter('tableHead')
    effects.enter('tableRow')
    return headRowStart(code)
  }

  /**
   * Before table head row, after whitespace.
   *
   * ```markdown
   * > | | a |
   *     ^
   *   | | - |
   *   | | b |
   * ```
   *
   * @type {State}
   */
  function headRowStart(code) {
    if (code === codes.verticalBar) {
      return headRowBreak(code)
    }

    // To do: micromark-js should let us parse our own whitespace in extensions,
    // like `markdown-rs`:
    //
    // ```js
    // // 4+ spaces.
    // if (markdownSpace(code)) {
    //   return nok(code)
    // }
    // ```

    seen = true
    // Count the first character, that isn’t a pipe, double.
    sizeB += 1
    return headRowBreak(code)
  }

  /**
   * At break in table head row.
   *
   * ```markdown
   * > | | a |
   *     ^
   *       ^
   *         ^
   *   | | - |
   *   | | b |
   * ```
   *
   * @type {State}
   */
  function headRowBreak(code) {
    if (code === codes.eof) {
      // Note: in `markdown-rs`, we need to reset, in `micromark-js` we don‘t.
      return nok(code)
    }

    if (markdownLineEnding(code)) {
      // If anything other than one pipe (ignoring whitespace) was used, it’s fine.
      if (sizeB > 1) {
        sizeB = 0
        // To do: check if this works.
        // Feel free to interrupt:
        self.interrupt = true
        effects.exit('tableRow')
        effects.enter(types.lineEnding)
        effects.consume(code)
        effects.exit(types.lineEnding)
        return headDelimiterStart
      }

      // Note: in `markdown-rs`, we need to reset, in `micromark-js` we don‘t.
      return nok(code)
    }

    if (markdownSpace(code)) {
      // To do: check if this is fine.
      // effects.attempt(State::Next(StateName::GfmTableHeadRowBreak), State::Nok)
      // State::Retry(space_or_tab(tokenizer))
      return factorySpace(effects, headRowBreak, types.whitespace)(code)
    }

    sizeB += 1

    if (seen) {
      seen = false
      // Header cell count.
      size += 1
    }

    if (code === codes.verticalBar) {
      effects.enter('tableCellDivider')
      effects.consume(code)
      effects.exit('tableCellDivider')
      // Whether a delimiter was seen.
      seen = true
      return headRowBreak
    }

    // Anything else is cell data.
    effects.enter(types.data)
    return headRowData(code)
  }

  /**
   * In table head row data.
   *
   * ```markdown
   * > | | a |
   *       ^
   *   | | - |
   *   | | b |
   * ```
   *
   * @type {State}
   */
  function headRowData(code) {
    if (
      code === codes.eof ||
      code === codes.verticalBar ||
      markdownLineEndingOrSpace(code)
    ) {
      effects.exit(types.data)
      return headRowBreak(code)
    }

    effects.consume(code)
    return code === codes.backslash ? headRowEscape : headRowData
  }

  /**
   * In table head row escape.
   *
   * ```markdown
   * > | | a\-b |
   *         ^
   *   | | ---- |
   *   | | c    |
   * ```
   *
   * @type {State}
   */
  function headRowEscape(code) {
    if (code === codes.backslash || code === codes.verticalBar) {
      effects.consume(code)
      return headRowData
    }

    return headRowData(code)
  }

  /**
   * Before delimiter row.
   *
   * ```markdown
   *   | | a |
   * > | | - |
   *     ^
   *   | | b |
   * ```
   *
   * @type {State}
   */
  function headDelimiterStart(code) {
    // Reset `interrupt`.
    self.interrupt = false

    // Note: in `markdown-rs`, we need to handle piercing here too.
    if (self.parser.lazy[self.now().line]) {
      return nok(code)
    }

    effects.enter('tableDelimiterRow')
    // Track if we’ve seen a `:` or `|`.
    seen = false

    if (markdownSpace(code)) {
      assert(self.parser.constructs.disable.null, 'expected `disabled.null`')
      return factorySpace(
        effects,
        headDelimiterBefore,
        types.linePrefix,
        self.parser.constructs.disable.null.includes('codeIndented')
          ? undefined
          : constants.tabSize
      )(code)
    }

    return headDelimiterBefore(code)
  }

  /**
   * Before delimiter row, after optional whitespace.
   *
   * Reused when a `|` is found later, to parse another cell.
   *
   * ```markdown
   *   | | a |
   * > | | - |
   *     ^
   *   | | b |
   * ```
   *
   * @type {State}
   */
  function headDelimiterBefore(code) {
    if (code === codes.dash || code === codes.colon) {
      return headDelimiterValueBefore(code)
    }

    if (code === codes.verticalBar) {
      seen = true
      // If we start with a pipe, we open a cell marker.
      effects.enter('tableCellDivider')
      effects.consume(code)
      effects.exit('tableCellDivider')
      return headDelimiterCellBefore
    }

    // More whitespace / empty row not allowed at start.
    return headDelimiterNok(code)
  }

  /**
   * After `|`, before delimiter cell.
   *
   * ```markdown
   *   | | a |
   * > | | - |
   *      ^
   * ```
   *
   * @type {State}
   */
  function headDelimiterCellBefore(code) {
    if (markdownSpace(code)) {
      return factorySpace(
        effects,
        headDelimiterValueBefore,
        types.whitespace
      )(code)
    }

    return headDelimiterValueBefore(code)
  }

  /**
   * Before delimiter cell value.
   *
   * ```markdown
   *   | | a |
   * > | | - |
   *       ^
   * ```
   *
   * @type {State}
   */
  function headDelimiterValueBefore(code) {
    // Align: left.
    if (code === codes.colon) {
      sizeB += 1
      seen = true

      effects.enter('tableDelimiterMarker')
      effects.consume(code)
      effects.exit('tableDelimiterMarker')
      return headDelimiterLeftAlignmentAfter
    }

    // Align: none.
    if (code === codes.dash) {
      sizeB += 1
      // To do: seems weird that this *isn’t* left aligned, but that state is used?
      return headDelimiterLeftAlignmentAfter(code)
    }

    if (code === codes.eof || markdownLineEnding(code)) {
      return headDelimiterCellAfter(code)
    }

    return headDelimiterNok(code)
  }

  /**
   * After delimiter cell left alignment marker.
   *
   * ```markdown
   *   | | a  |
   * > | | :- |
   *        ^
   * ```
   *
   * @type {State}
   */
  function headDelimiterLeftAlignmentAfter(code) {
    if (code === codes.dash) {
      effects.enter('tableDelimiterFiller')
      return headDelimiterFiller(code)
    }

    // Anything else is not ok after the left-align colon.
    return headDelimiterNok(code)
  }

  /**
   * In delimiter cell filler.
   *
   * ```markdown
   *   | | a |
   * > | | - |
   *       ^
   * ```
   *
   * @type {State}
   */
  function headDelimiterFiller(code) {
    if (code === codes.dash) {
      effects.consume(code)
      return headDelimiterFiller
    }

    // Align is `center` if it was `left`, `right` otherwise.
    if (code === codes.colon) {
      seen = true
      effects.exit('tableDelimiterFiller')
      effects.enter('tableDelimiterMarker')
      effects.consume(code)
      effects.exit('tableDelimiterMarker')
      return headDelimiterRightAlignmentAfter
    }

    effects.exit('tableDelimiterFiller')
    return headDelimiterRightAlignmentAfter(code)
  }

  /**
   * After delimiter cell right alignment marker.
   *
   * ```markdown
   *   | |  a |
   * > | | -: |
   *         ^
   * ```
   *
   * @type {State}
   */
  function headDelimiterRightAlignmentAfter(code) {
    if (markdownSpace(code)) {
      return factorySpace(
        effects,
        headDelimiterCellAfter,
        types.whitespace
      )(code)
    }

    return headDelimiterCellAfter(code)
  }

  /**
   * After delimiter cell.
   *
   * ```markdown
   *   | |  a |
   * > | | -: |
   *          ^
   * ```
   *
   * @type {State}
   */
  function headDelimiterCellAfter(code) {
    if (code === codes.verticalBar) {
      return headDelimiterBefore(code)
    }

    if (code === codes.eof || markdownLineEnding(code)) {
      // Exit when:
      // * there was no `:` or `|` at all (it’s a thematic break or setext
      //   underline instead)
      // * the header cell count is not the delimiter cell count
      if (!seen || size !== sizeB) {
        return headDelimiterNok(code)
      }

      // Note: in markdown-rs`, a reset is needed here.
      effects.exit('tableDelimiterRow')
      effects.exit('tableHead')
      // To do: in `markdown-rs`, resolvers need to be registered manually.
      // effects.register_resolver(ResolveName::GfmTable)
      return ok(code)
    }

    return headDelimiterNok(code)
  }

  /**
   * In delimiter row, at a disallowed byte.
   *
   * ```markdown
   *   | | a |
   * > | | x |
   *       ^
   * ```
   *
   * @type {State}
   */
  function headDelimiterNok(code) {
    // Note: in `markdown-rs`, we need to reset, in `micromark-js` we don‘t.
    return nok(code)
  }

  /**
   * Before table body row.
   *
   * ```markdown
   *   | | a |
   *   | | - |
   * > | | b |
   *     ^
   * ```
   *
   * @type {State}
   */
  function bodyRowStart(code) {
    // Note: in `markdown-rs` we need to manually take care of a prefix,
    // but in `micromark-js` that is done for us, so if we’re here, we’re
    // never at whitespace.
    effects.enter('tableRow')
    return bodyRowBreak(code)
  }

  /**
   * At break in table body row.
   *
   * ```markdown
   *   | | a |
   *   | | - |
   * > | | b |
   *     ^
   *       ^
   *         ^
   * ```
   *
   * @type {State}
   */
  function bodyRowBreak(code) {
    if (code === codes.verticalBar) {
      effects.enter('tableCellDivider')
      effects.consume(code)
      effects.exit('tableCellDivider')
      return bodyRowBreak
    }

    if (code === codes.eof || markdownLineEnding(code)) {
      effects.exit('tableRow')
      return ok(code)
    }

    if (markdownSpace(code)) {
      return factorySpace(effects, bodyRowBreak, types.whitespace)(code)
    }

    // Anything else is cell content.
    effects.enter(types.data)
    return bodyRowData(code)
  }

  /**
   * In table body row data.
   *
   * ```markdown
   *   | | a |
   *   | | - |
   * > | | b |
   *       ^
   * ```
   *
   * @type {State}
   */
  function bodyRowData(code) {
    if (
      code === codes.eof ||
      code === codes.verticalBar ||
      markdownLineEndingOrSpace(code)
    ) {
      effects.exit(types.data)
      return bodyRowBreak(code)
    }

    effects.consume(code)
    return code === codes.backslash ? bodyRowEscape : bodyRowData
  }

  /**
   * In table body row escape.
   *
   * ```markdown
   *   | | a    |
   *   | | ---- |
   * > | | b\-c |
   *         ^
   * ```
   *
   * @type {State}
   */
  function bodyRowEscape(code) {
    if (code === codes.backslash || code === codes.verticalBar) {
      effects.consume(code)
      return bodyRowData
    }

    return bodyRowData(code)
  }
}

/** @type {Resolver} */

function resolveTable(events, context) {
  let index = -1
  let inFirstCellAwaitingPipe = true
  /** @type {RowKind} */
  let rowKind = 0
  /** @type {Range} */
  let lastCell = [0, 0, 0, 0]
  /** @type {Range} */
  let cell = [0, 0, 0, 0]
  let afterHeadAwaitingFirstBodyRow = false
  let lastTableEnd = 0
  /** @type {Token | undefined} */
  let currentTable
  /** @type {Token | undefined} */
  let currentBody
  /** @type {Token | undefined} */
  let currentCell

  const map = new EditMap()

  while (++index < events.length) {
    const event = events[index]
    const token = event[1]

    if (event[0] === 'enter') {
      // Start of head.
      if (token.type === 'tableHead') {
        afterHeadAwaitingFirstBodyRow = false

        // Inject previous (body end and) table end.
        if (lastTableEnd !== 0) {
          assert(currentTable, 'there should be a table opening')
          flushTableEnd(map, context, lastTableEnd, currentTable, currentBody)
          currentBody = undefined
          lastTableEnd = 0
        }

        // Inject table start.
        currentTable = {
          type: 'table',
          start: Object.assign({}, token.start),
          // Note: correct end is set later.
          end: Object.assign({}, token.end)
        }
        map.add(index, 0, [['enter', currentTable, context]])
      } else if (
        token.type === 'tableRow' ||
        token.type === 'tableDelimiterRow'
      ) {
        inFirstCellAwaitingPipe = true
        currentCell = undefined
        lastCell = [0, 0, 0, 0]
        cell = [0, index + 1, 0, 0]

        // Inject table body start.
        if (afterHeadAwaitingFirstBodyRow) {
          afterHeadAwaitingFirstBodyRow = false
          currentBody = {
            type: 'tableBody',
            start: Object.assign({}, token.start),
            // Note: correct end is set later.
            end: Object.assign({}, token.end)
          }
          map.add(index, 0, [['enter', currentBody, context]])
        }

        rowKind = token.type === 'tableDelimiterRow' ? 2 : currentBody ? 3 : 1
      }
      // Cell data.
      else if (
        rowKind &&
        (token.type === types.data ||
          token.type === 'tableDelimiterMarker' ||
          token.type === 'tableDelimiterFiller')
      ) {
        inFirstCellAwaitingPipe = false

        // First value in cell.
        if (cell[2] === 0) {
          if (lastCell[1] !== 0) {
            cell[0] = cell[1]
            currentCell = flushCell(
              map,
              context,
              lastCell,
              rowKind,
              undefined,
              currentCell
            )
            lastCell = [0, 0, 0, 0]
          }

          cell[2] = index
        }
      } else if (token.type === 'tableCellDivider') {
        if (inFirstCellAwaitingPipe) {
          inFirstCellAwaitingPipe = false
        } else {
          if (lastCell[1] !== 0) {
            cell[0] = cell[1]
            currentCell = flushCell(
              map,
              context,
              lastCell,
              rowKind,
              undefined,
              currentCell
            )
          }

          lastCell = cell
          cell = [lastCell[1], index, 0, 0]
        }
      }
    }
    // Exit events.
    else if (token.type === 'tableHead') {
      afterHeadAwaitingFirstBodyRow = true
      lastTableEnd = index
    } else if (
      token.type === 'tableRow' ||
      token.type === 'tableDelimiterRow'
    ) {
      lastTableEnd = index

      if (lastCell[1] !== 0) {
        cell[0] = cell[1]
        currentCell = flushCell(
          map,
          context,
          lastCell,
          rowKind,
          index,
          currentCell
        )
      } else if (cell[1] !== 0) {
        currentCell = flushCell(map, context, cell, rowKind, index, currentCell)
      }

      rowKind = 0
    } else if (
      rowKind &&
      (token.type === types.data ||
        token.type === 'tableDelimiterMarker' ||
        token.type === 'tableDelimiterFiller')
    ) {
      cell[3] = index
    }
  }

  if (lastTableEnd !== 0) {
    assert(currentTable, 'expected table opening')
    flushTableEnd(map, context, lastTableEnd, currentTable, currentBody)
  }

  map.consume(context.events)

  // To do: move this into `html`, when events are exposed there.
  // That’s what `markdown-rs` does.
  // That needs updates to `mdast-util-gfm-table`.
  index = -1
  while (++index < context.events.length) {
    const event = context.events[index]
    if (event[0] === 'enter' && event[1].type === 'table') {
      event[1]._align = gfmTableAlign(context.events, index)
    }
  }

  return events
}

/**
 * Generate a cell.
 *
 * @param {EditMap} map
 * @param {Readonly<TokenizeContext>} context
 * @param {Readonly<Range>} range
 * @param {RowKind} rowKind
 * @param {number | undefined} rowEnd
 * @param {Token | undefined} previousCell
 * @returns {Token | undefined}
 */
// eslint-disable-next-line max-params
function flushCell(map, context, range, rowKind, rowEnd, previousCell) {
  // `markdown-rs` uses:
  // rowKind === 2 ? 'tableDelimiterCell' : 'tableCell'
  const groupName =
    rowKind === 1
      ? 'tableHeader'
      : rowKind === 2
      ? 'tableDelimiter'
      : 'tableData'
  // `markdown-rs` uses:
  // rowKind === 2 ? 'tableDelimiterCellValue' : 'tableCellText'
  const valueName = 'tableContent'

  // Insert an exit for the previous cell, if there is one.
  //
  // ```markdown
  // > | | aa | bb | cc |
  //          ^-- exit
  //           ^^^^-- this cell
  // ```
  if (range[0] !== 0) {
    assert(previousCell, 'expected previous cell enter')
    previousCell.end = Object.assign({}, getPoint(context.events, range[0]))
    map.add(range[0], 0, [['exit', previousCell, context]])
  }

  // Insert enter of this cell.
  //
  // ```markdown
  // > | | aa | bb | cc |
  //           ^-- enter
  //           ^^^^-- this cell
  // ```
  const now = getPoint(context.events, range[1])
  previousCell = {
    type: groupName,
    start: Object.assign({}, now),
    // Note: correct end is set later.
    end: Object.assign({}, now)
  }
  map.add(range[1], 0, [['enter', previousCell, context]])

  // Insert text start at first data start and end at last data end, and
  // remove events between.
  //
  // ```markdown
  // > | | aa | bb | cc |
  //            ^-- enter
  //             ^-- exit
  //           ^^^^-- this cell
  // ```
  if (range[2] !== 0) {
    const relatedStart = getPoint(context.events, range[2])
    const relatedEnd = getPoint(context.events, range[3])
    /** @type {Token} */
    const valueToken = {
      type: valueName,
      start: Object.assign({}, relatedStart),
      end: Object.assign({}, relatedEnd)
    }
    map.add(range[2], 0, [['enter', valueToken, context]])
    assert(range[3] !== 0)

    if (rowKind !== 2) {
      // Fix positional info on remaining events
      const start = context.events[range[2]]
      const end = context.events[range[3]]
      start[1].end = Object.assign({}, end[1].end)
      start[1].type = types.chunkText
      start[1].contentType = constants.contentTypeText

      // Remove if needed.
      if (range[3] > range[2] + 1) {
        const a = range[2] + 1
        const b = range[3] - range[2] - 1
        map.add(a, b, [])
      }
    }

    map.add(range[3] + 1, 0, [['exit', valueToken, context]])
  }

  // Insert an exit for the last cell, if at the row end.
  //
  // ```markdown
  // > | | aa | bb | cc |
  //                    ^-- exit
  //               ^^^^^^-- this cell (the last one contains two “between” parts)
  // ```
  if (rowEnd !== undefined) {
    previousCell.end = Object.assign({}, getPoint(context.events, rowEnd))
    map.add(rowEnd, 0, [['exit', previousCell, context]])
    previousCell = undefined
  }

  return previousCell
}

/**
 * Generate table end (and table body end).
 *
 * @param {Readonly<EditMap>} map
 * @param {Readonly<TokenizeContext>} context
 * @param {number} index
 * @param {Token} table
 * @param {Token | undefined} tableBody
 */
// eslint-disable-next-line max-params
function flushTableEnd(map, context, index, table, tableBody) {
  /** @type {Array<Event>} */
  const exits = []
  const related = getPoint(context.events, index)

  if (tableBody) {
    tableBody.end = Object.assign({}, related)
    exits.push(['exit', tableBody, context])
  }

  table.end = Object.assign({}, related)
  exits.push(['exit', table, context])

  map.add(index + 1, 0, exits)
}

/**
 * @param {Readonly<Array<Event>>} events
 * @param {number} index
 * @returns {Readonly<Point>}
 */
function getPoint(events, index) {
  const event = events[index]
  const side = event[0] === 'enter' ? 'start' : 'end'
  return event[1][side]
}