site/node_modules/hast-util-from-html/lib/index.js
2024-10-14 08:09:33 +02:00

241 lines
6.4 KiB
JavaScript

/**
* @typedef {import('hast').Root} Root
* @typedef {import('parse5').ParserError} ParserError
* @typedef {import('vfile').Value} Value
*/
/**
* @typedef {keyof errors} ErrorCode
* Known names of parse errors.
*
* @typedef {Partial<Record<ErrorCode, ErrorSeverity | null | undefined>>} ErrorOptions
* Options that define the severity of errors.
*
* @typedef {boolean | 0 | 1 | 2} ErrorSeverity
* Error severity:
*
* * `0` or `false`
* — turn the parse error off
* * `1` or `true`
* — turn the parse error into a warning
* * `2`
* — turn the parse error into an actual error: processing stops.
*
* @typedef ExtraOptions
* Options that define how to parse HTML.
* @property {boolean | null | undefined} [fragment=false]
* Specify whether to parse a fragment, instead of a complete document
* (default: `false`).
*
* In document mode, unopened `html`, `head`, and `body` elements are opened
* in just the right places.
* @property {OnError | null | undefined} [onerror]
* Call `onerror` with parse errors while parsing (optional).
*
* > 👉 **Note**: parse errors are currently being added to HTML.
* > Not all errors emitted by parse5 (or us) are specced yet.
* > Some documentation may still be missing.
*
* Specific rules can be turned off by setting them to `false` (or `0`).
* The default, when `emitParseErrors: true`, is `true` (or `1`), and means
* that rules emit as warnings.
* Rules can also be configured with `2`, to turn them into fatal errors.
*
* @typedef {Omit<import('hast-util-from-parse5').Options, 'file'>} FromParse5Options
* Options that can be passed through to `hast-util-from-parse5`.
*
* @callback OnError
* Handle parse errors.
* @param {VFileMessage} error
* Message.
* @returns {undefined | void}
* Nothing.
*
* Note: `void` included until TS infers `undefined` nicely.
*
* @typedef {FromParse5Options & ErrorOptions & ExtraOptions} Options
* Configuration.
*/
import {ok as assert} from 'devlop'
import {fromParse5} from 'hast-util-from-parse5'
import {parse, parseFragment} from 'parse5'
import {VFile} from 'vfile'
import {VFileMessage} from 'vfile-message'
import {errors} from './errors.js'
const base = 'https://html.spec.whatwg.org/multipage/parsing.html#parse-error-'
const dashToCamelRe = /-[a-z]/g
const formatCRe = /%c(?:([-+])(\d+))?/g
const formatXRe = /%x/g
const fatalities = {2: true, 1: false, 0: null}
/** @type {Readonly<Options>} */
const emptyOptions = {}
/**
* Turn serialized HTML into a hast tree.
*
* @param {VFile | Value} value
* Serialized HTML to parse.
* @param {Readonly<Options> | null | undefined} [options]
* Configuration (optional).
* @returns {Root}
* Tree.
*/
export function fromHtml(value, options) {
const settings = options || emptyOptions
const onerror = settings.onerror
const file = value instanceof VFile ? value : new VFile(value)
const fn = settings.fragment ? parseFragment : parse
const doc = String(file)
const p5doc = fn(doc, {
sourceCodeLocationInfo: true,
// Note `parse5` types currently do not allow `undefined`.
onParseError: settings.onerror ? internalOnerror : null,
scriptingEnabled: false
})
// `parse5` returns document which are always mapped to roots.
return /** @type {Root} */ (
fromParse5(p5doc, {
file,
space: settings.space,
verbose: settings.verbose
})
)
/**
* Handle a parse error.
*
* @param {ParserError} error
* Parse5 error.
* @returns {undefined}
* Nothing.
*/
function internalOnerror(error) {
const code = error.code
const name = camelcase(code)
const setting = settings[name]
const config = setting === null || setting === undefined ? true : setting
const level = typeof config === 'number' ? config : config ? 1 : 0
if (level) {
const info = errors[name]
assert(info, 'expected known error from `parse5`')
const message = new VFileMessage(format(info.reason), {
place: {
start: {
line: error.startLine,
column: error.startCol,
offset: error.startOffset
},
end: {
line: error.endLine,
column: error.endCol,
offset: error.endOffset
}
},
ruleId: code,
source: 'hast-util-from-html'
})
if (file.path) {
message.file = file.path
message.name = file.path + ':' + message.name
}
message.fatal = fatalities[level]
message.note = format(info.description)
message.url = info.url === false ? undefined : base + code
assert(onerror, '`internalOnerror` is not passed if `onerror` is not set')
onerror(message)
}
/**
* Format a human readable string about an error.
*
* @param {string} value
* Value to format.
* @returns {string}
* Formatted.
*/
function format(value) {
return value.replace(formatCRe, formatC).replace(formatXRe, formatX)
/**
* Format the character.
*
* @param {string} _
* Match.
* @param {string} $1
* Sign (`-` or `+`, optional).
* @param {string} $2
* Offset.
* @returns {string}
* Formatted.
*/
function formatC(_, $1, $2) {
const offset =
($2 ? Number.parseInt($2, 10) : 0) * ($1 === '-' ? -1 : 1)
const char = doc.charAt(error.startOffset + offset)
return visualizeCharacter(char)
}
/**
* Format the character code.
*
* @returns {string}
* Formatted.
*/
function formatX() {
return visualizeCharacterCode(doc.charCodeAt(error.startOffset))
}
}
}
}
/**
* @param {string} value
* Error code in dash case.
* @returns {ErrorCode}
* Error code in camelcase.
*/
function camelcase(value) {
// This should match an error code.
return /** @type {ErrorCode} */ (value.replace(dashToCamelRe, dashToCamel))
}
/**
* @param {string} $0
* Match.
* @returns {string}
* Camelcased.
*/
function dashToCamel($0) {
return $0.charAt(1).toUpperCase()
}
/**
* @param {string} char
* Character.
* @returns {string}
* Formatted.
*/
function visualizeCharacter(char) {
return char === '`' ? '` ` `' : char
}
/**
* @param {number} charCode
* Character code.
* @returns {string}
* Formatted.
*/
function visualizeCharacterCode(charCode) {
return '0x' + charCode.toString(16).toUpperCase()
}