site/node_modules/micromark-core-commonmark/dev/lib/character-reference.js
2024-10-14 08:09:33 +02:00

163 lines
3.8 KiB
JavaScript

/**
* @typedef {import('micromark-util-types').Code} Code
* @typedef {import('micromark-util-types').Construct} Construct
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
import {decodeNamedCharacterReference} from 'decode-named-character-reference'
import {
asciiAlphanumeric,
asciiDigit,
asciiHexDigit
} from 'micromark-util-character'
import {codes, constants, types} from 'micromark-util-symbol'
import {ok as assert} from 'devlop'
/** @type {Construct} */
export const characterReference = {
name: 'characterReference',
tokenize: tokenizeCharacterReference
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeCharacterReference(effects, ok, nok) {
const self = this
let size = 0
/** @type {number} */
let max
/** @type {(code: Code) => boolean} */
let test
return start
/**
* Start of character reference.
*
* ```markdown
* > | a&b
* ^
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function start(code) {
assert(code === codes.ampersand, 'expected `&`')
effects.enter(types.characterReference)
effects.enter(types.characterReferenceMarker)
effects.consume(code)
effects.exit(types.characterReferenceMarker)
return open
}
/**
* After `&`, at `#` for numeric references or alphanumeric for named
* references.
*
* ```markdown
* > | a&b
* ^
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function open(code) {
if (code === codes.numberSign) {
effects.enter(types.characterReferenceMarkerNumeric)
effects.consume(code)
effects.exit(types.characterReferenceMarkerNumeric)
return numeric
}
effects.enter(types.characterReferenceValue)
max = constants.characterReferenceNamedSizeMax
test = asciiAlphanumeric
return value(code)
}
/**
* After `#`, at `x` for hexadecimals or digit for decimals.
*
* ```markdown
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function numeric(code) {
if (code === codes.uppercaseX || code === codes.lowercaseX) {
effects.enter(types.characterReferenceMarkerHexadecimal)
effects.consume(code)
effects.exit(types.characterReferenceMarkerHexadecimal)
effects.enter(types.characterReferenceValue)
max = constants.characterReferenceHexadecimalSizeMax
test = asciiHexDigit
return value
}
effects.enter(types.characterReferenceValue)
max = constants.characterReferenceDecimalSizeMax
test = asciiDigit
return value(code)
}
/**
* After markers (`&#x`, `&#`, or `&`), in value, before `;`.
*
* The character reference kind defines what and how many characters are
* allowed.
*
* ```markdown
* > | a&b
* ^^^
* > | a{b
* ^^^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function value(code) {
if (code === codes.semicolon && size) {
const token = effects.exit(types.characterReferenceValue)
if (
test === asciiAlphanumeric &&
!decodeNamedCharacterReference(self.sliceSerialize(token))
) {
return nok(code)
}
// To do: `markdown-rs` uses a different name:
// `CharacterReferenceMarkerSemi`.
effects.enter(types.characterReferenceMarker)
effects.consume(code)
effects.exit(types.characterReferenceMarker)
effects.exit(types.characterReference)
return ok
}
if (test(code) && size++ < max) {
effects.consume(code)
return value
}
return nok(code)
}
}