714 lines
13 KiB
JavaScript
714 lines
13 KiB
JavaScript
/**
|
||
* @typedef {import('micromark-util-types').Code} Code
|
||
* @typedef {import('micromark-util-types').Construct} Construct
|
||
* @typedef {import('micromark-util-types').State} State
|
||
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
|
||
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
|
||
*/
|
||
|
||
import {factorySpace} from 'micromark-factory-space'
|
||
import {
|
||
asciiAlpha,
|
||
asciiAlphanumeric,
|
||
markdownLineEnding,
|
||
markdownLineEndingOrSpace,
|
||
markdownSpace
|
||
} from 'micromark-util-character'
|
||
/** @type {Construct} */
|
||
export const htmlText = {
|
||
name: 'htmlText',
|
||
tokenize: tokenizeHtmlText
|
||
}
|
||
|
||
/**
|
||
* @this {TokenizeContext}
|
||
* @type {Tokenizer}
|
||
*/
|
||
function tokenizeHtmlText(effects, ok, nok) {
|
||
const self = this
|
||
/** @type {NonNullable<Code> | undefined} */
|
||
let marker
|
||
/** @type {number} */
|
||
let index
|
||
/** @type {State} */
|
||
let returnState
|
||
return start
|
||
|
||
/**
|
||
* Start of HTML (text).
|
||
*
|
||
* ```markdown
|
||
* > | a <b> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function start(code) {
|
||
effects.enter('htmlText')
|
||
effects.enter('htmlTextData')
|
||
effects.consume(code)
|
||
return open
|
||
}
|
||
|
||
/**
|
||
* After `<`, at tag name or other stuff.
|
||
*
|
||
* ```markdown
|
||
* > | a <b> c
|
||
* ^
|
||
* > | a <!doctype> c
|
||
* ^
|
||
* > | a <!--b--> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function open(code) {
|
||
if (code === 33) {
|
||
effects.consume(code)
|
||
return declarationOpen
|
||
}
|
||
if (code === 47) {
|
||
effects.consume(code)
|
||
return tagCloseStart
|
||
}
|
||
if (code === 63) {
|
||
effects.consume(code)
|
||
return instruction
|
||
}
|
||
|
||
// ASCII alphabetical.
|
||
if (asciiAlpha(code)) {
|
||
effects.consume(code)
|
||
return tagOpen
|
||
}
|
||
return nok(code)
|
||
}
|
||
|
||
/**
|
||
* After `<!`, at declaration, comment, or CDATA.
|
||
*
|
||
* ```markdown
|
||
* > | a <!doctype> c
|
||
* ^
|
||
* > | a <!--b--> c
|
||
* ^
|
||
* > | a <![CDATA[>&<]]> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function declarationOpen(code) {
|
||
if (code === 45) {
|
||
effects.consume(code)
|
||
return commentOpenInside
|
||
}
|
||
if (code === 91) {
|
||
effects.consume(code)
|
||
index = 0
|
||
return cdataOpenInside
|
||
}
|
||
if (asciiAlpha(code)) {
|
||
effects.consume(code)
|
||
return declaration
|
||
}
|
||
return nok(code)
|
||
}
|
||
|
||
/**
|
||
* In a comment, after `<!-`, at another `-`.
|
||
*
|
||
* ```markdown
|
||
* > | a <!--b--> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function commentOpenInside(code) {
|
||
if (code === 45) {
|
||
effects.consume(code)
|
||
return commentEnd
|
||
}
|
||
return nok(code)
|
||
}
|
||
|
||
/**
|
||
* In comment.
|
||
*
|
||
* ```markdown
|
||
* > | a <!--b--> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function comment(code) {
|
||
if (code === null) {
|
||
return nok(code)
|
||
}
|
||
if (code === 45) {
|
||
effects.consume(code)
|
||
return commentClose
|
||
}
|
||
if (markdownLineEnding(code)) {
|
||
returnState = comment
|
||
return lineEndingBefore(code)
|
||
}
|
||
effects.consume(code)
|
||
return comment
|
||
}
|
||
|
||
/**
|
||
* In comment, after `-`.
|
||
*
|
||
* ```markdown
|
||
* > | a <!--b--> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function commentClose(code) {
|
||
if (code === 45) {
|
||
effects.consume(code)
|
||
return commentEnd
|
||
}
|
||
return comment(code)
|
||
}
|
||
|
||
/**
|
||
* In comment, after `--`.
|
||
*
|
||
* ```markdown
|
||
* > | a <!--b--> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function commentEnd(code) {
|
||
return code === 62
|
||
? end(code)
|
||
: code === 45
|
||
? commentClose(code)
|
||
: comment(code)
|
||
}
|
||
|
||
/**
|
||
* After `<![`, in CDATA, expecting `CDATA[`.
|
||
*
|
||
* ```markdown
|
||
* > | a <![CDATA[>&<]]> b
|
||
* ^^^^^^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function cdataOpenInside(code) {
|
||
const value = 'CDATA['
|
||
if (code === value.charCodeAt(index++)) {
|
||
effects.consume(code)
|
||
return index === value.length ? cdata : cdataOpenInside
|
||
}
|
||
return nok(code)
|
||
}
|
||
|
||
/**
|
||
* In CDATA.
|
||
*
|
||
* ```markdown
|
||
* > | a <![CDATA[>&<]]> b
|
||
* ^^^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function cdata(code) {
|
||
if (code === null) {
|
||
return nok(code)
|
||
}
|
||
if (code === 93) {
|
||
effects.consume(code)
|
||
return cdataClose
|
||
}
|
||
if (markdownLineEnding(code)) {
|
||
returnState = cdata
|
||
return lineEndingBefore(code)
|
||
}
|
||
effects.consume(code)
|
||
return cdata
|
||
}
|
||
|
||
/**
|
||
* In CDATA, after `]`, at another `]`.
|
||
*
|
||
* ```markdown
|
||
* > | a <![CDATA[>&<]]> b
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function cdataClose(code) {
|
||
if (code === 93) {
|
||
effects.consume(code)
|
||
return cdataEnd
|
||
}
|
||
return cdata(code)
|
||
}
|
||
|
||
/**
|
||
* In CDATA, after `]]`, at `>`.
|
||
*
|
||
* ```markdown
|
||
* > | a <![CDATA[>&<]]> b
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function cdataEnd(code) {
|
||
if (code === 62) {
|
||
return end(code)
|
||
}
|
||
if (code === 93) {
|
||
effects.consume(code)
|
||
return cdataEnd
|
||
}
|
||
return cdata(code)
|
||
}
|
||
|
||
/**
|
||
* In declaration.
|
||
*
|
||
* ```markdown
|
||
* > | a <!b> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function declaration(code) {
|
||
if (code === null || code === 62) {
|
||
return end(code)
|
||
}
|
||
if (markdownLineEnding(code)) {
|
||
returnState = declaration
|
||
return lineEndingBefore(code)
|
||
}
|
||
effects.consume(code)
|
||
return declaration
|
||
}
|
||
|
||
/**
|
||
* In instruction.
|
||
*
|
||
* ```markdown
|
||
* > | a <?b?> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function instruction(code) {
|
||
if (code === null) {
|
||
return nok(code)
|
||
}
|
||
if (code === 63) {
|
||
effects.consume(code)
|
||
return instructionClose
|
||
}
|
||
if (markdownLineEnding(code)) {
|
||
returnState = instruction
|
||
return lineEndingBefore(code)
|
||
}
|
||
effects.consume(code)
|
||
return instruction
|
||
}
|
||
|
||
/**
|
||
* In instruction, after `?`, at `>`.
|
||
*
|
||
* ```markdown
|
||
* > | a <?b?> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function instructionClose(code) {
|
||
return code === 62 ? end(code) : instruction(code)
|
||
}
|
||
|
||
/**
|
||
* After `</`, in closing tag, at tag name.
|
||
*
|
||
* ```markdown
|
||
* > | a </b> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagCloseStart(code) {
|
||
// ASCII alphabetical.
|
||
if (asciiAlpha(code)) {
|
||
effects.consume(code)
|
||
return tagClose
|
||
}
|
||
return nok(code)
|
||
}
|
||
|
||
/**
|
||
* After `</x`, in a tag name.
|
||
*
|
||
* ```markdown
|
||
* > | a </b> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagClose(code) {
|
||
// ASCII alphanumerical and `-`.
|
||
if (code === 45 || asciiAlphanumeric(code)) {
|
||
effects.consume(code)
|
||
return tagClose
|
||
}
|
||
return tagCloseBetween(code)
|
||
}
|
||
|
||
/**
|
||
* In closing tag, after tag name.
|
||
*
|
||
* ```markdown
|
||
* > | a </b> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagCloseBetween(code) {
|
||
if (markdownLineEnding(code)) {
|
||
returnState = tagCloseBetween
|
||
return lineEndingBefore(code)
|
||
}
|
||
if (markdownSpace(code)) {
|
||
effects.consume(code)
|
||
return tagCloseBetween
|
||
}
|
||
return end(code)
|
||
}
|
||
|
||
/**
|
||
* After `<x`, in opening tag name.
|
||
*
|
||
* ```markdown
|
||
* > | a <b> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagOpen(code) {
|
||
// ASCII alphanumerical and `-`.
|
||
if (code === 45 || asciiAlphanumeric(code)) {
|
||
effects.consume(code)
|
||
return tagOpen
|
||
}
|
||
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
|
||
return tagOpenBetween(code)
|
||
}
|
||
return nok(code)
|
||
}
|
||
|
||
/**
|
||
* In opening tag, after tag name.
|
||
*
|
||
* ```markdown
|
||
* > | a <b> c
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagOpenBetween(code) {
|
||
if (code === 47) {
|
||
effects.consume(code)
|
||
return end
|
||
}
|
||
|
||
// ASCII alphabetical and `:` and `_`.
|
||
if (code === 58 || code === 95 || asciiAlpha(code)) {
|
||
effects.consume(code)
|
||
return tagOpenAttributeName
|
||
}
|
||
if (markdownLineEnding(code)) {
|
||
returnState = tagOpenBetween
|
||
return lineEndingBefore(code)
|
||
}
|
||
if (markdownSpace(code)) {
|
||
effects.consume(code)
|
||
return tagOpenBetween
|
||
}
|
||
return end(code)
|
||
}
|
||
|
||
/**
|
||
* In attribute name.
|
||
*
|
||
* ```markdown
|
||
* > | a <b c> d
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagOpenAttributeName(code) {
|
||
// ASCII alphabetical and `-`, `.`, `:`, and `_`.
|
||
if (
|
||
code === 45 ||
|
||
code === 46 ||
|
||
code === 58 ||
|
||
code === 95 ||
|
||
asciiAlphanumeric(code)
|
||
) {
|
||
effects.consume(code)
|
||
return tagOpenAttributeName
|
||
}
|
||
return tagOpenAttributeNameAfter(code)
|
||
}
|
||
|
||
/**
|
||
* After attribute name, before initializer, the end of the tag, or
|
||
* whitespace.
|
||
*
|
||
* ```markdown
|
||
* > | a <b c> d
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagOpenAttributeNameAfter(code) {
|
||
if (code === 61) {
|
||
effects.consume(code)
|
||
return tagOpenAttributeValueBefore
|
||
}
|
||
if (markdownLineEnding(code)) {
|
||
returnState = tagOpenAttributeNameAfter
|
||
return lineEndingBefore(code)
|
||
}
|
||
if (markdownSpace(code)) {
|
||
effects.consume(code)
|
||
return tagOpenAttributeNameAfter
|
||
}
|
||
return tagOpenBetween(code)
|
||
}
|
||
|
||
/**
|
||
* Before unquoted, double quoted, or single quoted attribute value, allowing
|
||
* whitespace.
|
||
*
|
||
* ```markdown
|
||
* > | a <b c=d> e
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagOpenAttributeValueBefore(code) {
|
||
if (
|
||
code === null ||
|
||
code === 60 ||
|
||
code === 61 ||
|
||
code === 62 ||
|
||
code === 96
|
||
) {
|
||
return nok(code)
|
||
}
|
||
if (code === 34 || code === 39) {
|
||
effects.consume(code)
|
||
marker = code
|
||
return tagOpenAttributeValueQuoted
|
||
}
|
||
if (markdownLineEnding(code)) {
|
||
returnState = tagOpenAttributeValueBefore
|
||
return lineEndingBefore(code)
|
||
}
|
||
if (markdownSpace(code)) {
|
||
effects.consume(code)
|
||
return tagOpenAttributeValueBefore
|
||
}
|
||
effects.consume(code)
|
||
return tagOpenAttributeValueUnquoted
|
||
}
|
||
|
||
/**
|
||
* In double or single quoted attribute value.
|
||
*
|
||
* ```markdown
|
||
* > | a <b c="d"> e
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagOpenAttributeValueQuoted(code) {
|
||
if (code === marker) {
|
||
effects.consume(code)
|
||
marker = undefined
|
||
return tagOpenAttributeValueQuotedAfter
|
||
}
|
||
if (code === null) {
|
||
return nok(code)
|
||
}
|
||
if (markdownLineEnding(code)) {
|
||
returnState = tagOpenAttributeValueQuoted
|
||
return lineEndingBefore(code)
|
||
}
|
||
effects.consume(code)
|
||
return tagOpenAttributeValueQuoted
|
||
}
|
||
|
||
/**
|
||
* In unquoted attribute value.
|
||
*
|
||
* ```markdown
|
||
* > | a <b c=d> e
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagOpenAttributeValueUnquoted(code) {
|
||
if (
|
||
code === null ||
|
||
code === 34 ||
|
||
code === 39 ||
|
||
code === 60 ||
|
||
code === 61 ||
|
||
code === 96
|
||
) {
|
||
return nok(code)
|
||
}
|
||
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
|
||
return tagOpenBetween(code)
|
||
}
|
||
effects.consume(code)
|
||
return tagOpenAttributeValueUnquoted
|
||
}
|
||
|
||
/**
|
||
* After double or single quoted attribute value, before whitespace or the end
|
||
* of the tag.
|
||
*
|
||
* ```markdown
|
||
* > | a <b c="d"> e
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function tagOpenAttributeValueQuotedAfter(code) {
|
||
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
|
||
return tagOpenBetween(code)
|
||
}
|
||
return nok(code)
|
||
}
|
||
|
||
/**
|
||
* In certain circumstances of a tag where only an `>` is allowed.
|
||
*
|
||
* ```markdown
|
||
* > | a <b c="d"> e
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function end(code) {
|
||
if (code === 62) {
|
||
effects.consume(code)
|
||
effects.exit('htmlTextData')
|
||
effects.exit('htmlText')
|
||
return ok
|
||
}
|
||
return nok(code)
|
||
}
|
||
|
||
/**
|
||
* At eol.
|
||
*
|
||
* > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
|
||
* > empty tokens.
|
||
*
|
||
* ```markdown
|
||
* > | a <!--a
|
||
* ^
|
||
* | b-->
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function lineEndingBefore(code) {
|
||
effects.exit('htmlTextData')
|
||
effects.enter('lineEnding')
|
||
effects.consume(code)
|
||
effects.exit('lineEnding')
|
||
return lineEndingAfter
|
||
}
|
||
|
||
/**
|
||
* After eol, at optional whitespace.
|
||
*
|
||
* > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
|
||
* > empty tokens.
|
||
*
|
||
* ```markdown
|
||
* | a <!--a
|
||
* > | b-->
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function lineEndingAfter(code) {
|
||
// Always populated by defaults.
|
||
|
||
return markdownSpace(code)
|
||
? factorySpace(
|
||
effects,
|
||
lineEndingAfterPrefix,
|
||
'linePrefix',
|
||
self.parser.constructs.disable.null.includes('codeIndented')
|
||
? undefined
|
||
: 4
|
||
)(code)
|
||
: lineEndingAfterPrefix(code)
|
||
}
|
||
|
||
/**
|
||
* After eol, after optional whitespace.
|
||
*
|
||
* > 👉 **Note**: we can’t have blank lines in text, so no need to worry about
|
||
* > empty tokens.
|
||
*
|
||
* ```markdown
|
||
* | a <!--a
|
||
* > | b-->
|
||
* ^
|
||
* ```
|
||
*
|
||
* @type {State}
|
||
*/
|
||
function lineEndingAfterPrefix(code) {
|
||
effects.enter('htmlTextData')
|
||
return returnState(code)
|
||
}
|
||
}
|