site/node_modules/micromark-core-commonmark/lib/html-text.js

714 lines
13 KiB
JavaScript
Raw Permalink Normal View History

2024-10-14 06:09:33 +00:00
/**
* @typedef {import('micromark-util-types').Code} Code
* @typedef {import('micromark-util-types').Construct} Construct
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
import {factorySpace} from 'micromark-factory-space'
import {
asciiAlpha,
asciiAlphanumeric,
markdownLineEnding,
markdownLineEndingOrSpace,
markdownSpace
} from 'micromark-util-character'
/** @type {Construct} */
export const htmlText = {
name: 'htmlText',
tokenize: tokenizeHtmlText
}
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeHtmlText(effects, ok, nok) {
const self = this
/** @type {NonNullable<Code> | undefined} */
let marker
/** @type {number} */
let index
/** @type {State} */
let returnState
return start
/**
* Start of HTML (text).
*
* ```markdown
* > | a <b> c
* ^
* ```
*
* @type {State}
*/
function start(code) {
effects.enter('htmlText')
effects.enter('htmlTextData')
effects.consume(code)
return open
}
/**
* After `<`, at tag name or other stuff.
*
* ```markdown
* > | a <b> c
* ^
* > | a <!doctype> c
* ^
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function open(code) {
if (code === 33) {
effects.consume(code)
return declarationOpen
}
if (code === 47) {
effects.consume(code)
return tagCloseStart
}
if (code === 63) {
effects.consume(code)
return instruction
}
// ASCII alphabetical.
if (asciiAlpha(code)) {
effects.consume(code)
return tagOpen
}
return nok(code)
}
/**
* After `<!`, at declaration, comment, or CDATA.
*
* ```markdown
* > | a <!doctype> c
* ^
* > | a <!--b--> c
* ^
* > | a <![CDATA[>&<]]> c
* ^
* ```
*
* @type {State}
*/
function declarationOpen(code) {
if (code === 45) {
effects.consume(code)
return commentOpenInside
}
if (code === 91) {
effects.consume(code)
index = 0
return cdataOpenInside
}
if (asciiAlpha(code)) {
effects.consume(code)
return declaration
}
return nok(code)
}
/**
* In a comment, after `<!-`, at another `-`.
*
* ```markdown
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function commentOpenInside(code) {
if (code === 45) {
effects.consume(code)
return commentEnd
}
return nok(code)
}
/**
* In comment.
*
* ```markdown
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function comment(code) {
if (code === null) {
return nok(code)
}
if (code === 45) {
effects.consume(code)
return commentClose
}
if (markdownLineEnding(code)) {
returnState = comment
return lineEndingBefore(code)
}
effects.consume(code)
return comment
}
/**
* In comment, after `-`.
*
* ```markdown
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function commentClose(code) {
if (code === 45) {
effects.consume(code)
return commentEnd
}
return comment(code)
}
/**
* In comment, after `--`.
*
* ```markdown
* > | a <!--b--> c
* ^
* ```
*
* @type {State}
*/
function commentEnd(code) {
return code === 62
? end(code)
: code === 45
? commentClose(code)
: comment(code)
}
/**
* After `<![`, in CDATA, expecting `CDATA[`.
*
* ```markdown
* > | a <![CDATA[>&<]]> b
* ^^^^^^
* ```
*
* @type {State}
*/
function cdataOpenInside(code) {
const value = 'CDATA['
if (code === value.charCodeAt(index++)) {
effects.consume(code)
return index === value.length ? cdata : cdataOpenInside
}
return nok(code)
}
/**
* In CDATA.
*
* ```markdown
* > | a <![CDATA[>&<]]> b
* ^^^
* ```
*
* @type {State}
*/
function cdata(code) {
if (code === null) {
return nok(code)
}
if (code === 93) {
effects.consume(code)
return cdataClose
}
if (markdownLineEnding(code)) {
returnState = cdata
return lineEndingBefore(code)
}
effects.consume(code)
return cdata
}
/**
* In CDATA, after `]`, at another `]`.
*
* ```markdown
* > | a <![CDATA[>&<]]> b
* ^
* ```
*
* @type {State}
*/
function cdataClose(code) {
if (code === 93) {
effects.consume(code)
return cdataEnd
}
return cdata(code)
}
/**
* In CDATA, after `]]`, at `>`.
*
* ```markdown
* > | a <![CDATA[>&<]]> b
* ^
* ```
*
* @type {State}
*/
function cdataEnd(code) {
if (code === 62) {
return end(code)
}
if (code === 93) {
effects.consume(code)
return cdataEnd
}
return cdata(code)
}
/**
* In declaration.
*
* ```markdown
* > | a <!b> c
* ^
* ```
*
* @type {State}
*/
function declaration(code) {
if (code === null || code === 62) {
return end(code)
}
if (markdownLineEnding(code)) {
returnState = declaration
return lineEndingBefore(code)
}
effects.consume(code)
return declaration
}
/**
* In instruction.
*
* ```markdown
* > | a <?b?> c
* ^
* ```
*
* @type {State}
*/
function instruction(code) {
if (code === null) {
return nok(code)
}
if (code === 63) {
effects.consume(code)
return instructionClose
}
if (markdownLineEnding(code)) {
returnState = instruction
return lineEndingBefore(code)
}
effects.consume(code)
return instruction
}
/**
* In instruction, after `?`, at `>`.
*
* ```markdown
* > | a <?b?> c
* ^
* ```
*
* @type {State}
*/
function instructionClose(code) {
return code === 62 ? end(code) : instruction(code)
}
/**
* After `</`, in closing tag, at tag name.
*
* ```markdown
* > | a </b> c
* ^
* ```
*
* @type {State}
*/
function tagCloseStart(code) {
// ASCII alphabetical.
if (asciiAlpha(code)) {
effects.consume(code)
return tagClose
}
return nok(code)
}
/**
* After `</x`, in a tag name.
*
* ```markdown
* > | a </b> c
* ^
* ```
*
* @type {State}
*/
function tagClose(code) {
// ASCII alphanumerical and `-`.
if (code === 45 || asciiAlphanumeric(code)) {
effects.consume(code)
return tagClose
}
return tagCloseBetween(code)
}
/**
* In closing tag, after tag name.
*
* ```markdown
* > | a </b> c
* ^
* ```
*
* @type {State}
*/
function tagCloseBetween(code) {
if (markdownLineEnding(code)) {
returnState = tagCloseBetween
return lineEndingBefore(code)
}
if (markdownSpace(code)) {
effects.consume(code)
return tagCloseBetween
}
return end(code)
}
/**
* After `<x`, in opening tag name.
*
* ```markdown
* > | a <b> c
* ^
* ```
*
* @type {State}
*/
function tagOpen(code) {
// ASCII alphanumerical and `-`.
if (code === 45 || asciiAlphanumeric(code)) {
effects.consume(code)
return tagOpen
}
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
return tagOpenBetween(code)
}
return nok(code)
}
/**
* In opening tag, after tag name.
*
* ```markdown
* > | a <b> c
* ^
* ```
*
* @type {State}
*/
function tagOpenBetween(code) {
if (code === 47) {
effects.consume(code)
return end
}
// ASCII alphabetical and `:` and `_`.
if (code === 58 || code === 95 || asciiAlpha(code)) {
effects.consume(code)
return tagOpenAttributeName
}
if (markdownLineEnding(code)) {
returnState = tagOpenBetween
return lineEndingBefore(code)
}
if (markdownSpace(code)) {
effects.consume(code)
return tagOpenBetween
}
return end(code)
}
/**
* In attribute name.
*
* ```markdown
* > | a <b c> d
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeName(code) {
// ASCII alphabetical and `-`, `.`, `:`, and `_`.
if (
code === 45 ||
code === 46 ||
code === 58 ||
code === 95 ||
asciiAlphanumeric(code)
) {
effects.consume(code)
return tagOpenAttributeName
}
return tagOpenAttributeNameAfter(code)
}
/**
* After attribute name, before initializer, the end of the tag, or
* whitespace.
*
* ```markdown
* > | a <b c> d
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeNameAfter(code) {
if (code === 61) {
effects.consume(code)
return tagOpenAttributeValueBefore
}
if (markdownLineEnding(code)) {
returnState = tagOpenAttributeNameAfter
return lineEndingBefore(code)
}
if (markdownSpace(code)) {
effects.consume(code)
return tagOpenAttributeNameAfter
}
return tagOpenBetween(code)
}
/**
* Before unquoted, double quoted, or single quoted attribute value, allowing
* whitespace.
*
* ```markdown
* > | a <b c=d> e
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeValueBefore(code) {
if (
code === null ||
code === 60 ||
code === 61 ||
code === 62 ||
code === 96
) {
return nok(code)
}
if (code === 34 || code === 39) {
effects.consume(code)
marker = code
return tagOpenAttributeValueQuoted
}
if (markdownLineEnding(code)) {
returnState = tagOpenAttributeValueBefore
return lineEndingBefore(code)
}
if (markdownSpace(code)) {
effects.consume(code)
return tagOpenAttributeValueBefore
}
effects.consume(code)
return tagOpenAttributeValueUnquoted
}
/**
* In double or single quoted attribute value.
*
* ```markdown
* > | a <b c="d"> e
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeValueQuoted(code) {
if (code === marker) {
effects.consume(code)
marker = undefined
return tagOpenAttributeValueQuotedAfter
}
if (code === null) {
return nok(code)
}
if (markdownLineEnding(code)) {
returnState = tagOpenAttributeValueQuoted
return lineEndingBefore(code)
}
effects.consume(code)
return tagOpenAttributeValueQuoted
}
/**
* In unquoted attribute value.
*
* ```markdown
* > | a <b c=d> e
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeValueUnquoted(code) {
if (
code === null ||
code === 34 ||
code === 39 ||
code === 60 ||
code === 61 ||
code === 96
) {
return nok(code)
}
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
return tagOpenBetween(code)
}
effects.consume(code)
return tagOpenAttributeValueUnquoted
}
/**
* After double or single quoted attribute value, before whitespace or the end
* of the tag.
*
* ```markdown
* > | a <b c="d"> e
* ^
* ```
*
* @type {State}
*/
function tagOpenAttributeValueQuotedAfter(code) {
if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) {
return tagOpenBetween(code)
}
return nok(code)
}
/**
* In certain circumstances of a tag where only an `>` is allowed.
*
* ```markdown
* > | a <b c="d"> e
* ^
* ```
*
* @type {State}
*/
function end(code) {
if (code === 62) {
effects.consume(code)
effects.exit('htmlTextData')
effects.exit('htmlText')
return ok
}
return nok(code)
}
/**
* At eol.
*
* > 👉 **Note**: we cant have blank lines in text, so no need to worry about
* > empty tokens.
*
* ```markdown
* > | a <!--a
* ^
* | b-->
* ```
*
* @type {State}
*/
function lineEndingBefore(code) {
effects.exit('htmlTextData')
effects.enter('lineEnding')
effects.consume(code)
effects.exit('lineEnding')
return lineEndingAfter
}
/**
* After eol, at optional whitespace.
*
* > 👉 **Note**: we cant have blank lines in text, so no need to worry about
* > empty tokens.
*
* ```markdown
* | a <!--a
* > | b-->
* ^
* ```
*
* @type {State}
*/
function lineEndingAfter(code) {
// Always populated by defaults.
return markdownSpace(code)
? factorySpace(
effects,
lineEndingAfterPrefix,
'linePrefix',
self.parser.constructs.disable.null.includes('codeIndented')
? undefined
: 4
)(code)
: lineEndingAfterPrefix(code)
}
/**
* After eol, after optional whitespace.
*
* > 👉 **Note**: we cant have blank lines in text, so no need to worry about
* > empty tokens.
*
* ```markdown
* | a <!--a
* > | b-->
* ^
* ```
*
* @type {State}
*/
function lineEndingAfterPrefix(code) {
effects.enter('htmlTextData')
return returnState(code)
}
}