/** * @typedef {import('micromark-util-types').Code} Code * @typedef {import('micromark-util-types').Construct} Construct * @typedef {import('micromark-util-types').Resolver} Resolver * @typedef {import('micromark-util-types').State} State * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext * @typedef {import('micromark-util-types').Tokenizer} Tokenizer */ import { asciiAlpha, asciiAlphanumeric, markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character' import {htmlBlockNames, htmlRawNames} from 'micromark-util-html-tag-name' import {codes, constants, types} from 'micromark-util-symbol' import {ok as assert} from 'devlop' import {blankLine} from './blank-line.js' /** @type {Construct} */ export const htmlFlow = { name: 'htmlFlow', tokenize: tokenizeHtmlFlow, resolveTo: resolveToHtmlFlow, concrete: true } /** @type {Construct} */ const blankLineBefore = {tokenize: tokenizeBlankLineBefore, partial: true} const nonLazyContinuationStart = { tokenize: tokenizeNonLazyContinuationStart, partial: true } /** @type {Resolver} */ function resolveToHtmlFlow(events) { let index = events.length while (index--) { if ( events[index][0] === 'enter' && events[index][1].type === types.htmlFlow ) { break } } if (index > 1 && events[index - 2][1].type === types.linePrefix) { // Add the prefix start to the HTML token. events[index][1].start = events[index - 2][1].start // Add the prefix start to the HTML line token. events[index + 1][1].start = events[index - 2][1].start // Remove the line prefix. events.splice(index - 2, 2) } return events } /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeHtmlFlow(effects, ok, nok) { const self = this /** @type {number} */ let marker /** @type {boolean} */ let closingTag /** @type {string} */ let buffer /** @type {number} */ let index /** @type {Code} */ let markerB return start /** * Start of HTML (flow). * * ```markdown * > | * ^ * ``` * * @type {State} */ function start(code) { // To do: parse indent like `markdown-rs`. return before(code) } /** * At `<`, after optional whitespace. * * ```markdown * > | * ^ * ``` * * @type {State} */ function before(code) { assert(code === codes.lessThan, 'expected `<`') effects.enter(types.htmlFlow) effects.enter(types.htmlFlowData) effects.consume(code) return open } /** * After `<`, at tag name or other stuff. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function open(code) { if (code === codes.exclamationMark) { effects.consume(code) return declarationOpen } if (code === codes.slash) { effects.consume(code) closingTag = true return tagCloseStart } if (code === codes.questionMark) { effects.consume(code) marker = constants.htmlInstruction // To do: // tokenizer.concrete = true // To do: use `markdown-rs` style interrupt. // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. return self.interrupt ? ok : continuationDeclarationInside } // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code) // @ts-expect-error: not null. buffer = String.fromCharCode(code) return tagName } return nok(code) } /** * After ` | * ^ * > | * ^ * > | &<]]> * ^ * ``` * * @type {State} */ function declarationOpen(code) { if (code === codes.dash) { effects.consume(code) marker = constants.htmlComment return commentOpenInside } if (code === codes.leftSquareBracket) { effects.consume(code) marker = constants.htmlCdata index = 0 return cdataOpenInside } // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code) marker = constants.htmlDeclaration // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuationDeclarationInside } return nok(code) } /** * After ` | * ^ * ``` * * @type {State} */ function commentOpenInside(code) { if (code === codes.dash) { effects.consume(code) // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuationDeclarationInside } return nok(code) } /** * After ` | &<]]> * ^^^^^^ * ``` * * @type {State} */ function cdataOpenInside(code) { const value = constants.cdataOpeningString if (code === value.charCodeAt(index++)) { effects.consume(code) if (index === value.length) { // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuation } return cdataOpenInside } return nok(code) } /** * After ` | * ^ * ``` * * @type {State} */ function tagCloseStart(code) { if (asciiAlpha(code)) { effects.consume(code) // @ts-expect-error: not null. buffer = String.fromCharCode(code) return tagName } return nok(code) } /** * In tag name. * * ```markdown * > | * ^^ * > | * ^^ * ``` * * @type {State} */ function tagName(code) { if ( code === codes.eof || code === codes.slash || code === codes.greaterThan || markdownLineEndingOrSpace(code) ) { const slash = code === codes.slash const name = buffer.toLowerCase() if (!slash && !closingTag && htmlRawNames.includes(name)) { marker = constants.htmlRaw // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok(code) : continuation(code) } if (htmlBlockNames.includes(buffer.toLowerCase())) { marker = constants.htmlBasic if (slash) { effects.consume(code) return basicSelfClosing } // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok(code) : continuation(code) } marker = constants.htmlComplete // Do not support complete HTML when interrupting. return self.interrupt && !self.parser.lazy[self.now().line] ? nok(code) : closingTag ? completeClosingTagAfter(code) : completeAttributeNameBefore(code) } // ASCII alphanumerical and `-`. if (code === codes.dash || asciiAlphanumeric(code)) { effects.consume(code) buffer += String.fromCharCode(code) return tagName } return nok(code) } /** * After closing slash of a basic tag name. * * ```markdown * > |
* ^ * ``` * * @type {State} */ function basicSelfClosing(code) { if (code === codes.greaterThan) { effects.consume(code) // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuation } return nok(code) } /** * After closing slash of a complete tag name. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeClosingTagAfter(code) { if (markdownSpace(code)) { effects.consume(code) return completeClosingTagAfter } return completeEnd(code) } /** * At an attribute name. * * At first, this state is used after a complete tag name, after whitespace, * where it expects optional attributes or the end of the tag. * It is also reused after attributes, when expecting more optional * attributes. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeNameBefore(code) { if (code === codes.slash) { effects.consume(code) return completeEnd } // ASCII alphanumerical and `:` and `_`. if (code === codes.colon || code === codes.underscore || asciiAlpha(code)) { effects.consume(code) return completeAttributeName } if (markdownSpace(code)) { effects.consume(code) return completeAttributeNameBefore } return completeEnd(code) } /** * In attribute name. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeName(code) { // ASCII alphanumerical and `-`, `.`, `:`, and `_`. if ( code === codes.dash || code === codes.dot || code === codes.colon || code === codes.underscore || asciiAlphanumeric(code) ) { effects.consume(code) return completeAttributeName } return completeAttributeNameAfter(code) } /** * After attribute name, at an optional initializer, the end of the tag, or * whitespace. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeNameAfter(code) { if (code === codes.equalsTo) { effects.consume(code) return completeAttributeValueBefore } if (markdownSpace(code)) { effects.consume(code) return completeAttributeNameAfter } return completeAttributeNameBefore(code) } /** * Before unquoted, double quoted, or single quoted attribute value, allowing * whitespace. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeValueBefore(code) { if ( code === codes.eof || code === codes.lessThan || code === codes.equalsTo || code === codes.greaterThan || code === codes.graveAccent ) { return nok(code) } if (code === codes.quotationMark || code === codes.apostrophe) { effects.consume(code) markerB = code return completeAttributeValueQuoted } if (markdownSpace(code)) { effects.consume(code) return completeAttributeValueBefore } return completeAttributeValueUnquoted(code) } /** * In double or single quoted attribute value. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeValueQuoted(code) { if (code === markerB) { effects.consume(code) markerB = null return completeAttributeValueQuotedAfter } if (code === codes.eof || markdownLineEnding(code)) { return nok(code) } effects.consume(code) return completeAttributeValueQuoted } /** * In unquoted attribute value. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAttributeValueUnquoted(code) { if ( code === codes.eof || code === codes.quotationMark || code === codes.apostrophe || code === codes.slash || code === codes.lessThan || code === codes.equalsTo || code === codes.greaterThan || code === codes.graveAccent || markdownLineEndingOrSpace(code) ) { return completeAttributeNameAfter(code) } effects.consume(code) return completeAttributeValueUnquoted } /** * After double or single quoted attribute value, before whitespace or the * end of the tag. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAttributeValueQuotedAfter(code) { if ( code === codes.slash || code === codes.greaterThan || markdownSpace(code) ) { return completeAttributeNameBefore(code) } return nok(code) } /** * In certain circumstances of a complete tag where only an `>` is allowed. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeEnd(code) { if (code === codes.greaterThan) { effects.consume(code) return completeAfter } return nok(code) } /** * After `>` in a complete tag. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAfter(code) { if (code === codes.eof || markdownLineEnding(code)) { // // Do not form containers. // tokenizer.concrete = true return continuation(code) } if (markdownSpace(code)) { effects.consume(code) return completeAfter } return nok(code) } /** * In continuation of any HTML kind. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuation(code) { if (code === codes.dash && marker === constants.htmlComment) { effects.consume(code) return continuationCommentInside } if (code === codes.lessThan && marker === constants.htmlRaw) { effects.consume(code) return continuationRawTagOpen } if (code === codes.greaterThan && marker === constants.htmlDeclaration) { effects.consume(code) return continuationClose } if (code === codes.questionMark && marker === constants.htmlInstruction) { effects.consume(code) return continuationDeclarationInside } if (code === codes.rightSquareBracket && marker === constants.htmlCdata) { effects.consume(code) return continuationCdataInside } if ( markdownLineEnding(code) && (marker === constants.htmlBasic || marker === constants.htmlComplete) ) { effects.exit(types.htmlFlowData) return effects.check( blankLineBefore, continuationAfter, continuationStart )(code) } if (code === codes.eof || markdownLineEnding(code)) { effects.exit(types.htmlFlowData) return continuationStart(code) } effects.consume(code) return continuation } /** * In continuation, at eol. * * ```markdown * > | * ^ * | asd * ``` * * @type {State} */ function continuationStart(code) { return effects.check( nonLazyContinuationStart, continuationStartNonLazy, continuationAfter )(code) } /** * In continuation, at eol, before non-lazy content. * * ```markdown * > | * ^ * | asd * ``` * * @type {State} */ function continuationStartNonLazy(code) { assert(markdownLineEnding(code)) effects.enter(types.lineEnding) effects.consume(code) effects.exit(types.lineEnding) return continuationBefore } /** * In continuation, before non-lazy content. * * ```markdown * | * > | asd * ^ * ``` * * @type {State} */ function continuationBefore(code) { if (code === codes.eof || markdownLineEnding(code)) { return continuationStart(code) } effects.enter(types.htmlFlowData) return continuation(code) } /** * In comment continuation, after one `-`, expecting another. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationCommentInside(code) { if (code === codes.dash) { effects.consume(code) return continuationDeclarationInside } return continuation(code) } /** * In raw continuation, after `<`, at `/`. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationRawTagOpen(code) { if (code === codes.slash) { effects.consume(code) buffer = '' return continuationRawEndTag } return continuation(code) } /** * In raw continuation, after ` | * ^^^^^^ * ``` * * @type {State} */ function continuationRawEndTag(code) { if (code === codes.greaterThan) { const name = buffer.toLowerCase() if (htmlRawNames.includes(name)) { effects.consume(code) return continuationClose } return continuation(code) } if (asciiAlpha(code) && buffer.length < constants.htmlRawSizeMax) { effects.consume(code) // @ts-expect-error: not null. buffer += String.fromCharCode(code) return continuationRawEndTag } return continuation(code) } /** * In cdata continuation, after `]`, expecting `]>`. * * ```markdown * > | &<]]> * ^ * ``` * * @type {State} */ function continuationCdataInside(code) { if (code === codes.rightSquareBracket) { effects.consume(code) return continuationDeclarationInside } return continuation(code) } /** * In declaration or instruction continuation, at `>`. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * > | * ^ * > | &<]]> * ^ * ``` * * @type {State} */ function continuationDeclarationInside(code) { if (code === codes.greaterThan) { effects.consume(code) return continuationClose } // More dashes. if (code === codes.dash && marker === constants.htmlComment) { effects.consume(code) return continuationDeclarationInside } return continuation(code) } /** * In closed continuation: everything we get until the eol/eof is part of it. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationClose(code) { if (code === codes.eof || markdownLineEnding(code)) { effects.exit(types.htmlFlowData) return continuationAfter(code) } effects.consume(code) return continuationClose } /** * Done. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationAfter(code) { effects.exit(types.htmlFlow) // // Feel free to interrupt. // tokenizer.interrupt = false // // No longer concrete. // tokenizer.concrete = false return ok(code) } } /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeNonLazyContinuationStart(effects, ok, nok) { const self = this return start /** * At eol, before continuation. * * ```markdown * > | * ```js * ^ * | b * ``` * * @type {State} */ function start(code) { if (markdownLineEnding(code)) { effects.enter(types.lineEnding) effects.consume(code) effects.exit(types.lineEnding) return after } return nok(code) } /** * A continuation. * * ```markdown * | * ```js * > | b * ^ * ``` * * @type {State} */ function after(code) { return self.parser.lazy[self.now().line] ? nok(code) : ok(code) } } /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeBlankLineBefore(effects, ok, nok) { return start /** * Before eol, expecting blank line. * * ```markdown * > |
* ^ * | * ``` * * @type {State} */ function start(code) { assert(markdownLineEnding(code), 'expected a line ending') effects.enter(types.lineEnding) effects.consume(code) effects.exit(types.lineEnding) return effects.attempt(blankLine, ok, nok) } }