site/node_modules/micromark-core-commonmark/lib/character-reference.js
2024-10-14 08:09:33 +02:00

153 lines
3.4 KiB
JavaScript

/**
* @typedef {import('micromark-util-types').Code} Code
* @typedef {import('micromark-util-types').Construct} Construct
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
import {decodeNamedCharacterReference} from 'decode-named-character-reference'
import {
asciiAlphanumeric,
asciiDigit,
asciiHexDigit
} from 'micromark-util-character'
/** @type {Construct} */
export const characterReference = {
name: 'characterReference',
tokenize: tokenizeCharacterReference
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeCharacterReference(effects, ok, nok) {
const self = this
let size = 0
/** @type {number} */
let max
/** @type {(code: Code) => boolean} */
let test
return start
/**
* Start of character reference.
*
* ```markdown
* > | a&b
* ^
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function start(code) {
effects.enter('characterReference')
effects.enter('characterReferenceMarker')
effects.consume(code)
effects.exit('characterReferenceMarker')
return open
}
/**
* After `&`, at `#` for numeric references or alphanumeric for named
* references.
*
* ```markdown
* > | a&b
* ^
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function open(code) {
if (code === 35) {
effects.enter('characterReferenceMarkerNumeric')
effects.consume(code)
effects.exit('characterReferenceMarkerNumeric')
return numeric
}
effects.enter('characterReferenceValue')
max = 31
test = asciiAlphanumeric
return value(code)
}
/**
* After `#`, at `x` for hexadecimals or digit for decimals.
*
* ```markdown
* > | a{b
* ^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function numeric(code) {
if (code === 88 || code === 120) {
effects.enter('characterReferenceMarkerHexadecimal')
effects.consume(code)
effects.exit('characterReferenceMarkerHexadecimal')
effects.enter('characterReferenceValue')
max = 6
test = asciiHexDigit
return value
}
effects.enter('characterReferenceValue')
max = 7
test = asciiDigit
return value(code)
}
/**
* After markers (`&#x`, `&#`, or `&`), in value, before `;`.
*
* The character reference kind defines what and how many characters are
* allowed.
*
* ```markdown
* > | a&b
* ^^^
* > | a{b
* ^^^
* > | a	b
* ^
* ```
*
* @type {State}
*/
function value(code) {
if (code === 59 && size) {
const token = effects.exit('characterReferenceValue')
if (
test === asciiAlphanumeric &&
!decodeNamedCharacterReference(self.sliceSerialize(token))
) {
return nok(code)
}
// To do: `markdown-rs` uses a different name:
// `CharacterReferenceMarkerSemi`.
effects.enter('characterReferenceMarker')
effects.consume(code)
effects.exit('characterReferenceMarker')
effects.exit('characterReference')
return ok
}
if (test(code) && size++ < max) {
effects.consume(code)
return value
}
return nok(code)
}
}