/** * @typedef {import('micromark-util-types').Code} Code * @typedef {import('micromark-util-types').Construct} Construct * @typedef {import('micromark-util-types').State} State * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext * @typedef {import('micromark-util-types').Tokenizer} Tokenizer */ import {factorySpace} from 'micromark-factory-space' import { asciiAlpha, asciiAlphanumeric, markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character' import {codes, constants, types} from 'micromark-util-symbol' import {ok as assert} from 'devlop' /** @type {Construct} */ export const htmlText = {name: 'htmlText', tokenize: tokenizeHtmlText} /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeHtmlText(effects, ok, nok) { const self = this /** @type {NonNullable | undefined} */ let marker /** @type {number} */ let index /** @type {State} */ let returnState return start /** * Start of HTML (text). * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function start(code) { assert(code === codes.lessThan, 'expected `<`') effects.enter(types.htmlText) effects.enter(types.htmlTextData) effects.consume(code) return open } /** * After `<`, at tag name or other stuff. * * ```markdown * > | a c * ^ * > | a c * ^ * > | a c * ^ * ``` * * @type {State} */ function open(code) { if (code === codes.exclamationMark) { effects.consume(code) return declarationOpen } if (code === codes.slash) { effects.consume(code) return tagCloseStart } if (code === codes.questionMark) { effects.consume(code) return instruction } // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code) return tagOpen } return nok(code) } /** * After ` | a c * ^ * > | a c * ^ * > | a &<]]> c * ^ * ``` * * @type {State} */ function declarationOpen(code) { if (code === codes.dash) { effects.consume(code) return commentOpenInside } if (code === codes.leftSquareBracket) { effects.consume(code) index = 0 return cdataOpenInside } if (asciiAlpha(code)) { effects.consume(code) return declaration } return nok(code) } /** * In a comment, after ` | a c * ^ * ``` * * @type {State} */ function commentOpenInside(code) { if (code === codes.dash) { effects.consume(code) return commentEnd } return nok(code) } /** * In comment. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function comment(code) { if (code === codes.eof) { return nok(code) } if (code === codes.dash) { effects.consume(code) return commentClose } if (markdownLineEnding(code)) { returnState = comment return lineEndingBefore(code) } effects.consume(code) return comment } /** * In comment, after `-`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function commentClose(code) { if (code === codes.dash) { effects.consume(code) return commentEnd } return comment(code) } /** * In comment, after `--`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function commentEnd(code) { return code === codes.greaterThan ? end(code) : code === codes.dash ? commentClose(code) : comment(code) } /** * After ` | a &<]]> b * ^^^^^^ * ``` * * @type {State} */ function cdataOpenInside(code) { const value = constants.cdataOpeningString if (code === value.charCodeAt(index++)) { effects.consume(code) return index === value.length ? cdata : cdataOpenInside } return nok(code) } /** * In CDATA. * * ```markdown * > | a &<]]> b * ^^^ * ``` * * @type {State} */ function cdata(code) { if (code === codes.eof) { return nok(code) } if (code === codes.rightSquareBracket) { effects.consume(code) return cdataClose } if (markdownLineEnding(code)) { returnState = cdata return lineEndingBefore(code) } effects.consume(code) return cdata } /** * In CDATA, after `]`, at another `]`. * * ```markdown * > | a &<]]> b * ^ * ``` * * @type {State} */ function cdataClose(code) { if (code === codes.rightSquareBracket) { effects.consume(code) return cdataEnd } return cdata(code) } /** * In CDATA, after `]]`, at `>`. * * ```markdown * > | a &<]]> b * ^ * ``` * * @type {State} */ function cdataEnd(code) { if (code === codes.greaterThan) { return end(code) } if (code === codes.rightSquareBracket) { effects.consume(code) return cdataEnd } return cdata(code) } /** * In declaration. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function declaration(code) { if (code === codes.eof || code === codes.greaterThan) { return end(code) } if (markdownLineEnding(code)) { returnState = declaration return lineEndingBefore(code) } effects.consume(code) return declaration } /** * In instruction. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function instruction(code) { if (code === codes.eof) { return nok(code) } if (code === codes.questionMark) { effects.consume(code) return instructionClose } if (markdownLineEnding(code)) { returnState = instruction return lineEndingBefore(code) } effects.consume(code) return instruction } /** * In instruction, after `?`, at `>`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function instructionClose(code) { return code === codes.greaterThan ? end(code) : instruction(code) } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagCloseStart(code) { // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code) return tagClose } return nok(code) } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagClose(code) { // ASCII alphanumerical and `-`. if (code === codes.dash || asciiAlphanumeric(code)) { effects.consume(code) return tagClose } return tagCloseBetween(code) } /** * In closing tag, after tag name. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function tagCloseBetween(code) { if (markdownLineEnding(code)) { returnState = tagCloseBetween return lineEndingBefore(code) } if (markdownSpace(code)) { effects.consume(code) return tagCloseBetween } return end(code) } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagOpen(code) { // ASCII alphanumerical and `-`. if (code === codes.dash || asciiAlphanumeric(code)) { effects.consume(code) return tagOpen } if ( code === codes.slash || code === codes.greaterThan || markdownLineEndingOrSpace(code) ) { return tagOpenBetween(code) } return nok(code) } /** * In opening tag, after tag name. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function tagOpenBetween(code) { if (code === codes.slash) { effects.consume(code) return end } // ASCII alphabetical and `:` and `_`. if (code === codes.colon || code === codes.underscore || asciiAlpha(code)) { effects.consume(code) return tagOpenAttributeName } if (markdownLineEnding(code)) { returnState = tagOpenBetween return lineEndingBefore(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenBetween } return end(code) } /** * In attribute name. * * ```markdown * > | a d * ^ * ``` * * @type {State} */ function tagOpenAttributeName(code) { // ASCII alphabetical and `-`, `.`, `:`, and `_`. if ( code === codes.dash || code === codes.dot || code === codes.colon || code === codes.underscore || asciiAlphanumeric(code) ) { effects.consume(code) return tagOpenAttributeName } return tagOpenAttributeNameAfter(code) } /** * After attribute name, before initializer, the end of the tag, or * whitespace. * * ```markdown * > | a d * ^ * ``` * * @type {State} */ function tagOpenAttributeNameAfter(code) { if (code === codes.equalsTo) { effects.consume(code) return tagOpenAttributeValueBefore } if (markdownLineEnding(code)) { returnState = tagOpenAttributeNameAfter return lineEndingBefore(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenAttributeNameAfter } return tagOpenBetween(code) } /** * Before unquoted, double quoted, or single quoted attribute value, allowing * whitespace. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueBefore(code) { if ( code === codes.eof || code === codes.lessThan || code === codes.equalsTo || code === codes.greaterThan || code === codes.graveAccent ) { return nok(code) } if (code === codes.quotationMark || code === codes.apostrophe) { effects.consume(code) marker = code return tagOpenAttributeValueQuoted } if (markdownLineEnding(code)) { returnState = tagOpenAttributeValueBefore return lineEndingBefore(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenAttributeValueBefore } effects.consume(code) return tagOpenAttributeValueUnquoted } /** * In double or single quoted attribute value. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueQuoted(code) { if (code === marker) { effects.consume(code) marker = undefined return tagOpenAttributeValueQuotedAfter } if (code === codes.eof) { return nok(code) } if (markdownLineEnding(code)) { returnState = tagOpenAttributeValueQuoted return lineEndingBefore(code) } effects.consume(code) return tagOpenAttributeValueQuoted } /** * In unquoted attribute value. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueUnquoted(code) { if ( code === codes.eof || code === codes.quotationMark || code === codes.apostrophe || code === codes.lessThan || code === codes.equalsTo || code === codes.graveAccent ) { return nok(code) } if ( code === codes.slash || code === codes.greaterThan || markdownLineEndingOrSpace(code) ) { return tagOpenBetween(code) } effects.consume(code) return tagOpenAttributeValueUnquoted } /** * After double or single quoted attribute value, before whitespace or the end * of the tag. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueQuotedAfter(code) { if ( code === codes.slash || code === codes.greaterThan || markdownLineEndingOrSpace(code) ) { return tagOpenBetween(code) } return nok(code) } /** * In certain circumstances of a tag where only an `>` is allowed. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function end(code) { if (code === codes.greaterThan) { effects.consume(code) effects.exit(types.htmlTextData) effects.exit(types.htmlText) return ok } return nok(code) } /** * At eol. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * > | a * ``` * * @type {State} */ function lineEndingBefore(code) { assert(returnState, 'expected return state') assert(markdownLineEnding(code), 'expected eol') effects.exit(types.htmlTextData) effects.enter(types.lineEnding) effects.consume(code) effects.exit(types.lineEnding) return lineEndingAfter } /** * After eol, at optional whitespace. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * | a * ^ * ``` * * @type {State} */ function lineEndingAfter(code) { // Always populated by defaults. assert( self.parser.constructs.disable.null, 'expected `disable.null` to be populated' ) return markdownSpace(code) ? factorySpace( effects, lineEndingAfterPrefix, types.linePrefix, self.parser.constructs.disable.null.includes('codeIndented') ? undefined : constants.tabSize )(code) : lineEndingAfterPrefix(code) } /** * After eol, after optional whitespace. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * | a * ^ * ``` * * @type {State} */ function lineEndingAfterPrefix(code) { effects.enter(types.htmlTextData) return returnState(code) } }