site/node_modules/mdast-util-gfm-autolink-literal/lib/index.js

289 lines
6.8 KiB
JavaScript
Raw Permalink Normal View History

2024-10-14 06:09:33 +00:00
/**
* @typedef {import('mdast').Link} Link
* @typedef {import('mdast').PhrasingContent} PhrasingContent
*
* @typedef {import('mdast-util-from-markdown').CompileContext} CompileContext
* @typedef {import('mdast-util-from-markdown').Extension} FromMarkdownExtension
* @typedef {import('mdast-util-from-markdown').Handle} FromMarkdownHandle
* @typedef {import('mdast-util-from-markdown').Transform} FromMarkdownTransform
*
* @typedef {import('mdast-util-to-markdown').ConstructName} ConstructName
* @typedef {import('mdast-util-to-markdown').Options} ToMarkdownExtension
*
* @typedef {import('mdast-util-find-and-replace').RegExpMatchObject} RegExpMatchObject
* @typedef {import('mdast-util-find-and-replace').ReplaceFunction} ReplaceFunction
*/
import {ccount} from 'ccount'
import {ok as assert} from 'devlop'
import {unicodePunctuation, unicodeWhitespace} from 'micromark-util-character'
import {findAndReplace} from 'mdast-util-find-and-replace'
/** @type {ConstructName} */
const inConstruct = 'phrasing'
/** @type {Array<ConstructName>} */
const notInConstruct = ['autolink', 'link', 'image', 'label']
/**
* Create an extension for `mdast-util-from-markdown` to enable GFM autolink
* literals in markdown.
*
* @returns {FromMarkdownExtension}
* Extension for `mdast-util-to-markdown` to enable GFM autolink literals.
*/
export function gfmAutolinkLiteralFromMarkdown() {
return {
transforms: [transformGfmAutolinkLiterals],
enter: {
literalAutolink: enterLiteralAutolink,
literalAutolinkEmail: enterLiteralAutolinkValue,
literalAutolinkHttp: enterLiteralAutolinkValue,
literalAutolinkWww: enterLiteralAutolinkValue
},
exit: {
literalAutolink: exitLiteralAutolink,
literalAutolinkEmail: exitLiteralAutolinkEmail,
literalAutolinkHttp: exitLiteralAutolinkHttp,
literalAutolinkWww: exitLiteralAutolinkWww
}
}
}
/**
* Create an extension for `mdast-util-to-markdown` to enable GFM autolink
* literals in markdown.
*
* @returns {ToMarkdownExtension}
* Extension for `mdast-util-to-markdown` to enable GFM autolink literals.
*/
export function gfmAutolinkLiteralToMarkdown() {
return {
unsafe: [
{
character: '@',
before: '[+\\-.\\w]',
after: '[\\-.\\w]',
inConstruct,
notInConstruct
},
{
character: '.',
before: '[Ww]',
after: '[\\-.\\w]',
inConstruct,
notInConstruct
},
{
character: ':',
before: '[ps]',
after: '\\/',
inConstruct,
notInConstruct
}
]
}
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function enterLiteralAutolink(token) {
this.enter({type: 'link', title: null, url: '', children: []}, token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function enterLiteralAutolinkValue(token) {
this.config.enter.autolinkProtocol.call(this, token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function exitLiteralAutolinkHttp(token) {
this.config.exit.autolinkProtocol.call(this, token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function exitLiteralAutolinkWww(token) {
this.config.exit.data.call(this, token)
const node = this.stack[this.stack.length - 1]
assert(node.type === 'link')
node.url = 'http://' + this.sliceSerialize(token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function exitLiteralAutolinkEmail(token) {
this.config.exit.autolinkEmail.call(this, token)
}
/**
* @this {CompileContext}
* @type {FromMarkdownHandle}
*/
function exitLiteralAutolink(token) {
this.exit(token)
}
/** @type {FromMarkdownTransform} */
function transformGfmAutolinkLiterals(tree) {
findAndReplace(
tree,
[
[/(https?:\/\/|www(?=\.))([-.\w]+)([^ \t\r\n]*)/gi, findUrl],
[/([-.\w+]+)@([-\w]+(?:\.[-\w]+)+)/g, findEmail]
],
{ignore: ['link', 'linkReference']}
)
}
/**
* @type {ReplaceFunction}
* @param {string} _
* @param {string} protocol
* @param {string} domain
* @param {string} path
* @param {RegExpMatchObject} match
* @returns {Array<PhrasingContent> | Link | false}
*/
// eslint-disable-next-line max-params
function findUrl(_, protocol, domain, path, match) {
let prefix = ''
// Not an expected previous character.
if (!previous(match)) {
return false
}
// Treat `www` as part of the domain.
if (/^w/i.test(protocol)) {
domain = protocol + domain
protocol = ''
prefix = 'http://'
}
if (!isCorrectDomain(domain)) {
return false
}
const parts = splitUrl(domain + path)
if (!parts[0]) return false
/** @type {Link} */
const result = {
type: 'link',
title: null,
url: prefix + protocol + parts[0],
children: [{type: 'text', value: protocol + parts[0]}]
}
if (parts[1]) {
return [result, {type: 'text', value: parts[1]}]
}
return result
}
/**
* @type {ReplaceFunction}
* @param {string} _
* @param {string} atext
* @param {string} label
* @param {RegExpMatchObject} match
* @returns {Link | false}
*/
function findEmail(_, atext, label, match) {
if (
// Not an expected previous character.
!previous(match, true) ||
// Label ends in not allowed character.
/[-\d_]$/.test(label)
) {
return false
}
return {
type: 'link',
title: null,
url: 'mailto:' + atext + '@' + label,
children: [{type: 'text', value: atext + '@' + label}]
}
}
/**
* @param {string} domain
* @returns {boolean}
*/
function isCorrectDomain(domain) {
const parts = domain.split('.')
if (
parts.length < 2 ||
(parts[parts.length - 1] &&
(/_/.test(parts[parts.length - 1]) ||
!/[a-zA-Z\d]/.test(parts[parts.length - 1]))) ||
(parts[parts.length - 2] &&
(/_/.test(parts[parts.length - 2]) ||
!/[a-zA-Z\d]/.test(parts[parts.length - 2])))
) {
return false
}
return true
}
/**
* @param {string} url
* @returns {[string, string | undefined]}
*/
function splitUrl(url) {
const trailExec = /[!"&'),.:;<>?\]}]+$/.exec(url)
if (!trailExec) {
return [url, undefined]
}
url = url.slice(0, trailExec.index)
let trail = trailExec[0]
let closingParenIndex = trail.indexOf(')')
const openingParens = ccount(url, '(')
let closingParens = ccount(url, ')')
while (closingParenIndex !== -1 && openingParens > closingParens) {
url += trail.slice(0, closingParenIndex + 1)
trail = trail.slice(closingParenIndex + 1)
closingParenIndex = trail.indexOf(')')
closingParens++
}
return [url, trail]
}
/**
* @param {RegExpMatchObject} match
* @param {boolean | null | undefined} [email=false]
* @returns {boolean}
*/
function previous(match, email) {
const code = match.input.charCodeAt(match.index - 1)
return (
(match.index === 0 ||
unicodeWhitespace(code) ||
unicodePunctuation(code)) &&
(!email || code !== 47)
)
}