154 lines
3.4 KiB
JavaScript
154 lines
3.4 KiB
JavaScript
|
/**
|
||
|
* @typedef {import('micromark-util-types').Code} Code
|
||
|
* @typedef {import('micromark-util-types').Construct} Construct
|
||
|
* @typedef {import('micromark-util-types').State} State
|
||
|
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
|
||
|
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
|
||
|
*/
|
||
|
|
||
|
import {decodeNamedCharacterReference} from 'decode-named-character-reference'
|
||
|
import {
|
||
|
asciiAlphanumeric,
|
||
|
asciiDigit,
|
||
|
asciiHexDigit
|
||
|
} from 'micromark-util-character'
|
||
|
/** @type {Construct} */
|
||
|
export const characterReference = {
|
||
|
name: 'characterReference',
|
||
|
tokenize: tokenizeCharacterReference
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @this {TokenizeContext}
|
||
|
* @type {Tokenizer}
|
||
|
*/
|
||
|
function tokenizeCharacterReference(effects, ok, nok) {
|
||
|
const self = this
|
||
|
let size = 0
|
||
|
/** @type {number} */
|
||
|
let max
|
||
|
/** @type {(code: Code) => boolean} */
|
||
|
let test
|
||
|
return start
|
||
|
|
||
|
/**
|
||
|
* Start of character reference.
|
||
|
*
|
||
|
* ```markdown
|
||
|
* > | a&b
|
||
|
* ^
|
||
|
* > | a{b
|
||
|
* ^
|
||
|
* > | a	b
|
||
|
* ^
|
||
|
* ```
|
||
|
*
|
||
|
* @type {State}
|
||
|
*/
|
||
|
function start(code) {
|
||
|
effects.enter('characterReference')
|
||
|
effects.enter('characterReferenceMarker')
|
||
|
effects.consume(code)
|
||
|
effects.exit('characterReferenceMarker')
|
||
|
return open
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* After `&`, at `#` for numeric references or alphanumeric for named
|
||
|
* references.
|
||
|
*
|
||
|
* ```markdown
|
||
|
* > | a&b
|
||
|
* ^
|
||
|
* > | a{b
|
||
|
* ^
|
||
|
* > | a	b
|
||
|
* ^
|
||
|
* ```
|
||
|
*
|
||
|
* @type {State}
|
||
|
*/
|
||
|
function open(code) {
|
||
|
if (code === 35) {
|
||
|
effects.enter('characterReferenceMarkerNumeric')
|
||
|
effects.consume(code)
|
||
|
effects.exit('characterReferenceMarkerNumeric')
|
||
|
return numeric
|
||
|
}
|
||
|
effects.enter('characterReferenceValue')
|
||
|
max = 31
|
||
|
test = asciiAlphanumeric
|
||
|
return value(code)
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* After `#`, at `x` for hexadecimals or digit for decimals.
|
||
|
*
|
||
|
* ```markdown
|
||
|
* > | a{b
|
||
|
* ^
|
||
|
* > | a	b
|
||
|
* ^
|
||
|
* ```
|
||
|
*
|
||
|
* @type {State}
|
||
|
*/
|
||
|
function numeric(code) {
|
||
|
if (code === 88 || code === 120) {
|
||
|
effects.enter('characterReferenceMarkerHexadecimal')
|
||
|
effects.consume(code)
|
||
|
effects.exit('characterReferenceMarkerHexadecimal')
|
||
|
effects.enter('characterReferenceValue')
|
||
|
max = 6
|
||
|
test = asciiHexDigit
|
||
|
return value
|
||
|
}
|
||
|
effects.enter('characterReferenceValue')
|
||
|
max = 7
|
||
|
test = asciiDigit
|
||
|
return value(code)
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* After markers (`&#x`, `&#`, or `&`), in value, before `;`.
|
||
|
*
|
||
|
* The character reference kind defines what and how many characters are
|
||
|
* allowed.
|
||
|
*
|
||
|
* ```markdown
|
||
|
* > | a&b
|
||
|
* ^^^
|
||
|
* > | a{b
|
||
|
* ^^^
|
||
|
* > | a	b
|
||
|
* ^
|
||
|
* ```
|
||
|
*
|
||
|
* @type {State}
|
||
|
*/
|
||
|
function value(code) {
|
||
|
if (code === 59 && size) {
|
||
|
const token = effects.exit('characterReferenceValue')
|
||
|
if (
|
||
|
test === asciiAlphanumeric &&
|
||
|
!decodeNamedCharacterReference(self.sliceSerialize(token))
|
||
|
) {
|
||
|
return nok(code)
|
||
|
}
|
||
|
|
||
|
// To do: `markdown-rs` uses a different name:
|
||
|
// `CharacterReferenceMarkerSemi`.
|
||
|
effects.enter('characterReferenceMarker')
|
||
|
effects.consume(code)
|
||
|
effects.exit('characterReferenceMarker')
|
||
|
effects.exit('characterReference')
|
||
|
return ok
|
||
|
}
|
||
|
if (test(code) && size++ < max) {
|
||
|
effects.consume(code)
|
||
|
return value
|
||
|
}
|
||
|
return nok(code)
|
||
|
}
|
||
|
}
|