255 lines
7.2 KiB
JavaScript
255 lines
7.2 KiB
JavaScript
|
/**
|
||
|
* @typedef {import('micromark-util-types').Code} Code
|
||
|
*/
|
||
|
|
||
|
import {codes} from 'micromark-util-symbol'
|
||
|
|
||
|
const unicodePunctuationInternal = regexCheck(/\p{P}/u)
|
||
|
|
||
|
/**
|
||
|
* Check whether the character code represents an ASCII alpha (`a` through `z`,
|
||
|
* case insensitive).
|
||
|
*
|
||
|
* An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
|
||
|
*
|
||
|
* An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
|
||
|
* to U+005A (`Z`).
|
||
|
*
|
||
|
* An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
|
||
|
* to U+007A (`z`).
|
||
|
*
|
||
|
* @param code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export const asciiAlpha = regexCheck(/[A-Za-z]/)
|
||
|
|
||
|
/**
|
||
|
* Check whether the character code represents an ASCII alphanumeric (`a`
|
||
|
* through `z`, case insensitive, or `0` through `9`).
|
||
|
*
|
||
|
* An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
|
||
|
* (see `asciiAlpha`).
|
||
|
*
|
||
|
* @param code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/)
|
||
|
|
||
|
/**
|
||
|
* Check whether the character code represents an ASCII atext.
|
||
|
*
|
||
|
* atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
|
||
|
* the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
|
||
|
* U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
|
||
|
* SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
|
||
|
* CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
|
||
|
* (`{`) to U+007E TILDE (`~`).
|
||
|
*
|
||
|
* See:
|
||
|
* **\[RFC5322]**:
|
||
|
* [Internet Message Format](https://tools.ietf.org/html/rfc5322).
|
||
|
* P. Resnick.
|
||
|
* IETF.
|
||
|
*
|
||
|
* @param code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/)
|
||
|
|
||
|
/**
|
||
|
* Check whether a character code is an ASCII control character.
|
||
|
*
|
||
|
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
|
||
|
* to U+001F (US), or U+007F (DEL).
|
||
|
*
|
||
|
* @param {Code} code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export function asciiControl(code) {
|
||
|
return (
|
||
|
// Special whitespace codes (which have negative values), C0 and Control
|
||
|
// character DEL
|
||
|
code !== null && (code < codes.space || code === codes.del)
|
||
|
)
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Check whether the character code represents an ASCII digit (`0` through `9`).
|
||
|
*
|
||
|
* An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
|
||
|
* U+0039 (`9`).
|
||
|
*
|
||
|
* @param code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export const asciiDigit = regexCheck(/\d/)
|
||
|
|
||
|
/**
|
||
|
* Check whether the character code represents an ASCII hex digit (`a` through
|
||
|
* `f`, case insensitive, or `0` through `9`).
|
||
|
*
|
||
|
* An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
|
||
|
* digit, or an ASCII lower hex digit.
|
||
|
*
|
||
|
* An **ASCII upper hex digit** is a character in the inclusive range U+0041
|
||
|
* (`A`) to U+0046 (`F`).
|
||
|
*
|
||
|
* An **ASCII lower hex digit** is a character in the inclusive range U+0061
|
||
|
* (`a`) to U+0066 (`f`).
|
||
|
*
|
||
|
* @param code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/)
|
||
|
|
||
|
/**
|
||
|
* Check whether the character code represents ASCII punctuation.
|
||
|
*
|
||
|
* An **ASCII punctuation** is a character in the inclusive ranges U+0021
|
||
|
* EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
|
||
|
* SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
|
||
|
* (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
|
||
|
*
|
||
|
* @param code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/)
|
||
|
|
||
|
/**
|
||
|
* Check whether a character code is a markdown line ending.
|
||
|
*
|
||
|
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
|
||
|
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
|
||
|
*
|
||
|
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
|
||
|
* RETURN (CR) are replaced by these virtual characters depending on whether
|
||
|
* they occurred together.
|
||
|
*
|
||
|
* @param {Code} code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export function markdownLineEnding(code) {
|
||
|
return code !== null && code < codes.horizontalTab
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Check whether a character code is a markdown line ending (see
|
||
|
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
|
||
|
*
|
||
|
* @param {Code} code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export function markdownLineEndingOrSpace(code) {
|
||
|
return code !== null && (code < codes.nul || code === codes.space)
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Check whether a character code is a markdown space.
|
||
|
*
|
||
|
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
|
||
|
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
|
||
|
*
|
||
|
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
|
||
|
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
|
||
|
* SPACE (VS) characters, depending on the column at which the tab occurred.
|
||
|
*
|
||
|
* @param {Code} code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export function markdownSpace(code) {
|
||
|
return (
|
||
|
code === codes.horizontalTab ||
|
||
|
code === codes.virtualSpace ||
|
||
|
code === codes.space
|
||
|
)
|
||
|
}
|
||
|
|
||
|
// Size note: removing ASCII from the regex and using `asciiPunctuation` here
|
||
|
// In fact adds to the bundle size.
|
||
|
/**
|
||
|
* Check whether the character code represents Unicode punctuation.
|
||
|
*
|
||
|
* A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
|
||
|
* Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
|
||
|
* (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
|
||
|
* (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
|
||
|
* punctuation (see `asciiPunctuation`).
|
||
|
*
|
||
|
* See:
|
||
|
* **\[UNICODE]**:
|
||
|
* [The Unicode Standard](https://www.unicode.org/versions/).
|
||
|
* Unicode Consortium.
|
||
|
*
|
||
|
* @param {Code} code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export function unicodePunctuation(code) {
|
||
|
return asciiPunctuation(code) || unicodePunctuationInternal(code)
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Check whether the character code represents Unicode whitespace.
|
||
|
*
|
||
|
* Note that this does handle micromark specific markdown whitespace characters.
|
||
|
* See `markdownLineEndingOrSpace` to check that.
|
||
|
*
|
||
|
* A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
|
||
|
* Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
|
||
|
* U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
|
||
|
*
|
||
|
* See:
|
||
|
* **\[UNICODE]**:
|
||
|
* [The Unicode Standard](https://www.unicode.org/versions/).
|
||
|
* Unicode Consortium.
|
||
|
*
|
||
|
* @param code
|
||
|
* Code.
|
||
|
* @returns {boolean}
|
||
|
* Whether it matches.
|
||
|
*/
|
||
|
export const unicodeWhitespace = regexCheck(/\s/)
|
||
|
|
||
|
/**
|
||
|
* Create a code check from a regex.
|
||
|
*
|
||
|
* @param {RegExp} regex
|
||
|
* @returns {(code: Code) => boolean}
|
||
|
*/
|
||
|
function regexCheck(regex) {
|
||
|
return check
|
||
|
|
||
|
/**
|
||
|
* Check whether a code matches the bound regex.
|
||
|
*
|
||
|
* @param {Code} code
|
||
|
* Character code.
|
||
|
* @returns {boolean}
|
||
|
* Whether the character code matches the bound regex.
|
||
|
*/
|
||
|
function check(code) {
|
||
|
return code !== null && code > -1 && regex.test(String.fromCharCode(code))
|
||
|
}
|
||
|
}
|