site/node_modules/micromark-util-character/dev/index.js

/**
 * @typedef {import('micromark-util-types').Code} Code
 */

import {codes} from 'micromark-util-symbol'

const unicodePunctuationInternal = regexCheck(/\p{P}/u)

/**
 * Check whether the character code represents an ASCII alpha (`a` through `z`,
 * case insensitive).
 *
 * An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
 *
 * An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
 * to U+005A (`Z`).
 *
 * An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
 * to U+007A (`z`).
 *
 * @param code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export const asciiAlpha = regexCheck(/[A-Za-z]/)

/**
 * Check whether the character code represents an ASCII alphanumeric (`a`
 * through `z`, case insensitive, or `0` through `9`).
 *
 * An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
 * (see `asciiAlpha`).
 *
 * @param code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/)

/**
 * Check whether the character code represents an ASCII atext.
 *
 * atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
 * the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
 * U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
 * SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
 * CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
 * (`{`) to U+007E TILDE (`~`).
 *
 * See:
 * **\[RFC5322]**:
 * [Internet Message Format](https://tools.ietf.org/html/rfc5322).
 * P. Resnick.
 * IETF.
 *
 * @param code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/)

/**
 * Check whether a character code is an ASCII control character.
 *
 * An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
 * to U+001F (US), or U+007F (DEL).
 *
 * @param {Code} code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export function asciiControl(code) {
  return (
    // Special whitespace codes (which have negative values), C0 and Control
    // character DEL
    code !== null && (code < codes.space || code === codes.del)
  )
}

/**
 * Check whether the character code represents an ASCII digit (`0` through `9`).
 *
 * An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
 * U+0039 (`9`).
 *
 * @param code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export const asciiDigit = regexCheck(/\d/)

/**
 * Check whether the character code represents an ASCII hex digit (`a` through
 * `f`, case insensitive, or `0` through `9`).
 *
 * An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
 * digit, or an ASCII lower hex digit.
 *
 * An **ASCII upper hex digit** is a character in the inclusive range U+0041
 * (`A`) to U+0046 (`F`).
 *
 * An **ASCII lower hex digit** is a character in the inclusive range U+0061
 * (`a`) to U+0066 (`f`).
 *
 * @param code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/)

/**
 * Check whether the character code represents ASCII punctuation.
 *
 * An **ASCII punctuation** is a character in the inclusive ranges U+0021
 * EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
 * SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
 * (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
 *
 * @param code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/)

/**
 * Check whether a character code is a markdown line ending.
 *
 * A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
 * LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
 *
 * In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
 * RETURN (CR) are replaced by these virtual characters depending on whether
 * they occurred together.
 *
 * @param {Code} code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export function markdownLineEnding(code) {
  return code !== null && code < codes.horizontalTab
}

/**
 * Check whether a character code is a markdown line ending (see
 * `markdownLineEnding`) or markdown space (see `markdownSpace`).
 *
 * @param {Code} code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export function markdownLineEndingOrSpace(code) {
  return code !== null && (code < codes.nul || code === codes.space)
}

/**
 * Check whether a character code is a markdown space.
 *
 * A **markdown space** is the concrete character U+0020 SPACE (SP) and the
 * virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
 *
 * In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
 * replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
 * SPACE (VS) characters, depending on the column at which the tab occurred.
 *
 * @param {Code} code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export function markdownSpace(code) {
  return (
    code === codes.horizontalTab ||
    code === codes.virtualSpace ||
    code === codes.space
  )
}

// Size note: removing ASCII from the regex and using `asciiPunctuation` here
// In fact adds to the bundle size.
/**
 * Check whether the character code represents Unicode punctuation.
 *
 * A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
 * Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
 * (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
 * (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
 * punctuation (see `asciiPunctuation`).
 *
 * See:
 * **\[UNICODE]**:
 * [The Unicode Standard](https://www.unicode.org/versions/).
 * Unicode Consortium.
 *
 * @param {Code} code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export function unicodePunctuation(code) {
  return asciiPunctuation(code) || unicodePunctuationInternal(code)
}

/**
 * Check whether the character code represents Unicode whitespace.
 *
 * Note that this does handle micromark specific markdown whitespace characters.
 * See `markdownLineEndingOrSpace` to check that.
 *
 * A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
 * Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
 * U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
 *
 * See:
 * **\[UNICODE]**:
 * [The Unicode Standard](https://www.unicode.org/versions/).
 * Unicode Consortium.
 *
 * @param code
 *   Code.
 * @returns {boolean}
 *   Whether it matches.
 */
export const unicodeWhitespace = regexCheck(/\s/)

/**
 * Create a code check from a regex.
 *
 * @param {RegExp} regex
 * @returns {(code: Code) => boolean}
 */
function regexCheck(regex) {
  return check

  /**
   * Check whether a code matches the bound regex.
   *
   * @param {Code} code
   *   Character code.
   * @returns {boolean}
   *   Whether the character code matches the bound regex.
   */
  function check(code) {
    return code !== null && code > -1 && regex.test(String.fromCharCode(code))
  }
}
updated shit 2024-10-14 06:09:33 +00:00			`/**`
			`* @typedef {import('micromark-util-types').Code} Code`
			`*/`

			`import {codes} from 'micromark-util-symbol'`

			`const unicodePunctuationInternal = regexCheck(/\p{P}/u)`

			`/**`
			* Check whether the character code represents an ASCII alpha (`a` through `z`,
			`* case insensitive).`
			`*`
			`* An ASCII alpha is an ASCII upper alpha or ASCII lower alpha.`
			`*`
			* An ASCII upper alpha is a character in the inclusive range U+0041 (`A`)
			* to U+005A (`Z`).
			`*`
			* An ASCII lower alpha is a character in the inclusive range U+0061 (`a`)
			* to U+007A (`z`).
			`*`
			`* @param code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export const asciiAlpha = regexCheck(/[A-Za-z]/)`

			`/**`
			* Check whether the character code represents an ASCII alphanumeric (`a`
			* through `z`, case insensitive, or `0` through `9`).
			`*`
			* An ASCII alphanumeric is an ASCII digit (see `asciiDigit`) or ASCII alpha
			* (see `asciiAlpha`).
			`*`
			`* @param code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/)`

			`/**`
			`* Check whether the character code represents an ASCII atext.`
			`*`
			* atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
			* the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
			* U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
			* SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
			* CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
			* (`{`) to U+007E TILDE (`~`).
			`*`
			`* See:`
			`* \[RFC5322]:`
			`* [Internet Message Format](https://tools.ietf.org/html/rfc5322).`
			`* P. Resnick.`
			`* IETF.`
			`*`
			`* @param code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/)`

			`/**`
			`* Check whether a character code is an ASCII control character.`
			`*`
			`* An ASCII control is a character in the inclusive range U+0000 NULL (NUL)`
			`* to U+001F (US), or U+007F (DEL).`
			`*`
			`* @param {Code} code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export function asciiControl(code) {`
			`return (`
			`// Special whitespace codes (which have negative values), C0 and Control`
			`// character DEL`
			`code !== null && (code < codes.space \|\| code === codes.del)`
			`)`
			`}`

			`/**`
			* Check whether the character code represents an ASCII digit (`0` through `9`).
			`*`
			* An ASCII digit is a character in the inclusive range U+0030 (`0`) to
			* U+0039 (`9`).
			`*`
			`* @param code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export const asciiDigit = regexCheck(/\d/)`

			`/**`
			* Check whether the character code represents an ASCII hex digit (`a` through
			* `f`, case insensitive, or `0` through `9`).
			`*`
			* An ASCII hex digit is an ASCII digit (see `asciiDigit`), ASCII upper hex
			`* digit, or an ASCII lower hex digit.`
			`*`
			`* An ASCII upper hex digit is a character in the inclusive range U+0041`
			* (`A`) to U+0046 (`F`).
			`*`
			`* An ASCII lower hex digit is a character in the inclusive range U+0061`
			* (`a`) to U+0066 (`f`).
			`*`
			`* @param code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/)`

			`/**`
			`* Check whether the character code represents ASCII punctuation.`
			`*`
			`* An ASCII punctuation is a character in the inclusive ranges U+0021`
			* EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
			* SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
			* (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
			`*`
			`* @param code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/)

			`/**`
			`* Check whether a character code is a markdown line ending.`
			`*`
			`* A markdown line ending is the virtual characters M-0003 CARRIAGE RETURN`
			`* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).`
			`*`
			`* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE`
			`* RETURN (CR) are replaced by these virtual characters depending on whether`
			`* they occurred together.`
			`*`
			`* @param {Code} code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export function markdownLineEnding(code) {`
			`return code !== null && code < codes.horizontalTab`
			`}`

			`/**`
			`* Check whether a character code is a markdown line ending (see`
			* `markdownLineEnding`) or markdown space (see `markdownSpace`).
			`*`
			`* @param {Code} code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export function markdownLineEndingOrSpace(code) {`
			`return code !== null && (code < codes.nul \|\| code === codes.space)`
			`}`

			`/**`
			`* Check whether a character code is a markdown space.`
			`*`
			`* A markdown space is the concrete character U+0020 SPACE (SP) and the`
			`* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).`
			`*`
			`* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is`
			`* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL`
			`* SPACE (VS) characters, depending on the column at which the tab occurred.`
			`*`
			`* @param {Code} code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export function markdownSpace(code) {`
			`return (`
			`code === codes.horizontalTab \|\|`
			`code === codes.virtualSpace \|\|`
			`code === codes.space`
			`)`
			`}`

			// Size note: removing ASCII from the regex and using `asciiPunctuation` here
			`// In fact adds to the bundle size.`
			`/**`
			`* Check whether the character code represents Unicode punctuation.`
			`*`
			* A Unicode punctuation is a character in the Unicode `Pc` (Punctuation,
			* Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
			* (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
			* (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
			* punctuation (see `asciiPunctuation`).
			`*`
			`* See:`
			`* \[UNICODE]:`
			`* [The Unicode Standard](https://www.unicode.org/versions/).`
			`* Unicode Consortium.`
			`*`
			`* @param {Code} code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export function unicodePunctuation(code) {`
			`return asciiPunctuation(code) \|\| unicodePunctuationInternal(code)`
			`}`

			`/**`
			`* Check whether the character code represents Unicode whitespace.`
			`*`
			`* Note that this does handle micromark specific markdown whitespace characters.`
			* See `markdownLineEndingOrSpace` to check that.
			`*`
			* A Unicode whitespace is a character in the Unicode `Zs` (Separator,
			`* Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),`
			`* U+000C (FF), or U+000D CARRIAGE RETURN (CR) (\[UNICODE]).`
			`*`
			`* See:`
			`* \[UNICODE]:`
			`* [The Unicode Standard](https://www.unicode.org/versions/).`
			`* Unicode Consortium.`
			`*`
			`* @param code`
			`* Code.`
			`* @returns {boolean}`
			`* Whether it matches.`
			`*/`
			`export const unicodeWhitespace = regexCheck(/\s/)`

			`/**`
			`* Create a code check from a regex.`
			`*`
			`* @param {RegExp} regex`
			`* @returns {(code: Code) => boolean}`
			`*/`
			`function regexCheck(regex) {`
			`return check`

			`/**`
			`* Check whether a code matches the bound regex.`
			`*`
			`* @param {Code} code`
			`* Character code.`
			`* @returns {boolean}`
			`* Whether the character code matches the bound regex.`
			`*/`
			`function check(code) {`
			`return code !== null && code > -1 && regex.test(String.fromCharCode(code))`
			`}`
			`}`