/** * @typedef {import('micromark-util-types').Code} Code */ import {codes} from 'micromark-util-symbol' const unicodePunctuationInternal = regexCheck(/\p{P}/u) /** * Check whether the character code represents an ASCII alpha (`a` through `z`, * case insensitive). * * An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha. * * An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`) * to U+005A (`Z`). * * An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`) * to U+007A (`z`). * * @param code * Code. * @returns {boolean} * Whether it matches. */ export const asciiAlpha = regexCheck(/[A-Za-z]/) /** * Check whether the character code represents an ASCII alphanumeric (`a` * through `z`, case insensitive, or `0` through `9`). * * An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha * (see `asciiAlpha`). * * @param code * Code. * @returns {boolean} * Whether it matches. */ export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/) /** * Check whether the character code represents an ASCII atext. * * atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in * the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`), * U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F * SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E * CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE * (`{`) to U+007E TILDE (`~`). * * See: * **\[RFC5322]**: * [Internet Message Format](https://tools.ietf.org/html/rfc5322). * P. Resnick. * IETF. * * @param code * Code. * @returns {boolean} * Whether it matches. */ export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/) /** * Check whether a character code is an ASCII control character. * * An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL) * to U+001F (US), or U+007F (DEL). * * @param {Code} code * Code. * @returns {boolean} * Whether it matches. */ export function asciiControl(code) { return ( // Special whitespace codes (which have negative values), C0 and Control // character DEL code !== null && (code < codes.space || code === codes.del) ) } /** * Check whether the character code represents an ASCII digit (`0` through `9`). * * An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to * U+0039 (`9`). * * @param code * Code. * @returns {boolean} * Whether it matches. */ export const asciiDigit = regexCheck(/\d/) /** * Check whether the character code represents an ASCII hex digit (`a` through * `f`, case insensitive, or `0` through `9`). * * An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex * digit, or an ASCII lower hex digit. * * An **ASCII upper hex digit** is a character in the inclusive range U+0041 * (`A`) to U+0046 (`F`). * * An **ASCII lower hex digit** is a character in the inclusive range U+0061 * (`a`) to U+0066 (`f`). * * @param code * Code. * @returns {boolean} * Whether it matches. */ export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/) /** * Check whether the character code represents ASCII punctuation. * * An **ASCII punctuation** is a character in the inclusive ranges U+0021 * EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT * SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT * (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`). * * @param code * Code. * @returns {boolean} * Whether it matches. */ export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/) /** * Check whether a character code is a markdown line ending. * * A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN * LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR). * * In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE * RETURN (CR) are replaced by these virtual characters depending on whether * they occurred together. * * @param {Code} code * Code. * @returns {boolean} * Whether it matches. */ export function markdownLineEnding(code) { return code !== null && code < codes.horizontalTab } /** * Check whether a character code is a markdown line ending (see * `markdownLineEnding`) or markdown space (see `markdownSpace`). * * @param {Code} code * Code. * @returns {boolean} * Whether it matches. */ export function markdownLineEndingOrSpace(code) { return code !== null && (code < codes.nul || code === codes.space) } /** * Check whether a character code is a markdown space. * * A **markdown space** is the concrete character U+0020 SPACE (SP) and the * virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT). * * In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is * replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL * SPACE (VS) characters, depending on the column at which the tab occurred. * * @param {Code} code * Code. * @returns {boolean} * Whether it matches. */ export function markdownSpace(code) { return ( code === codes.horizontalTab || code === codes.virtualSpace || code === codes.space ) } // Size note: removing ASCII from the regex and using `asciiPunctuation` here // In fact adds to the bundle size. /** * Check whether the character code represents Unicode punctuation. * * A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation, * Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf` * (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po` * (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII * punctuation (see `asciiPunctuation`). * * See: * **\[UNICODE]**: * [The Unicode Standard](https://www.unicode.org/versions/). * Unicode Consortium. * * @param {Code} code * Code. * @returns {boolean} * Whether it matches. */ export function unicodePunctuation(code) { return asciiPunctuation(code) || unicodePunctuationInternal(code) } /** * Check whether the character code represents Unicode whitespace. * * Note that this does handle micromark specific markdown whitespace characters. * See `markdownLineEndingOrSpace` to check that. * * A **Unicode whitespace** is a character in the Unicode `Zs` (Separator, * Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF), * U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**). * * See: * **\[UNICODE]**: * [The Unicode Standard](https://www.unicode.org/versions/). * Unicode Consortium. * * @param code * Code. * @returns {boolean} * Whether it matches. */ export const unicodeWhitespace = regexCheck(/\s/) /** * Create a code check from a regex. * * @param {RegExp} regex * @returns {(code: Code) => boolean} */ function regexCheck(regex) { return check /** * Check whether a code matches the bound regex. * * @param {Code} code * Character code. * @returns {boolean} * Whether the character code matches the bound regex. */ function check(code) { return code !== null && code > -1 && regex.test(String.fromCharCode(code)) } }