125 lines
3.2 KiB
JavaScript
125 lines
3.2 KiB
JavaScript
|
import {asciiAlphanumeric} from 'micromark-util-character'
|
|||
|
import {encode} from 'micromark-util-encode'
|
|||
|
import {codes, values} from 'micromark-util-symbol'
|
|||
|
|
|||
|
/**
|
|||
|
* Make a value safe for injection as a URL.
|
|||
|
*
|
|||
|
* This encodes unsafe characters with percent-encoding and skips already
|
|||
|
* encoded sequences (see `normalizeUri`).
|
|||
|
* Further unsafe characters are encoded as character references (see
|
|||
|
* `micromark-util-encode`).
|
|||
|
*
|
|||
|
* A regex of allowed protocols can be given, in which case the URL is
|
|||
|
* sanitized.
|
|||
|
* For example, `/^(https?|ircs?|mailto|xmpp)$/i` can be used for `a[href]`, or
|
|||
|
* `/^https?$/i` for `img[src]` (this is what `github.com` allows).
|
|||
|
* If the URL includes an unknown protocol (one not matched by `protocol`, such
|
|||
|
* as a dangerous example, `javascript:`), the value is ignored.
|
|||
|
*
|
|||
|
* @param {string | null | undefined} url
|
|||
|
* URI to sanitize.
|
|||
|
* @param {RegExp | null | undefined} [protocol]
|
|||
|
* Allowed protocols.
|
|||
|
* @returns {string}
|
|||
|
* Sanitized URI.
|
|||
|
*/
|
|||
|
export function sanitizeUri(url, protocol) {
|
|||
|
const value = encode(normalizeUri(url || ''))
|
|||
|
|
|||
|
if (!protocol) {
|
|||
|
return value
|
|||
|
}
|
|||
|
|
|||
|
const colon = value.indexOf(':')
|
|||
|
const questionMark = value.indexOf('?')
|
|||
|
const numberSign = value.indexOf('#')
|
|||
|
const slash = value.indexOf('/')
|
|||
|
|
|||
|
if (
|
|||
|
// If there is no protocol, it’s relative.
|
|||
|
colon < 0 ||
|
|||
|
// If the first colon is after a `?`, `#`, or `/`, it’s not a protocol.
|
|||
|
(slash > -1 && colon > slash) ||
|
|||
|
(questionMark > -1 && colon > questionMark) ||
|
|||
|
(numberSign > -1 && colon > numberSign) ||
|
|||
|
// It is a protocol, it should be allowed.
|
|||
|
protocol.test(value.slice(0, colon))
|
|||
|
) {
|
|||
|
return value
|
|||
|
}
|
|||
|
|
|||
|
return ''
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Normalize a URL.
|
|||
|
*
|
|||
|
* Encode unsafe characters with percent-encoding, skipping already encoded
|
|||
|
* sequences.
|
|||
|
*
|
|||
|
* @param {string} value
|
|||
|
* URI to normalize.
|
|||
|
* @returns {string}
|
|||
|
* Normalized URI.
|
|||
|
*/
|
|||
|
export function normalizeUri(value) {
|
|||
|
/** @type {Array<string>} */
|
|||
|
const result = []
|
|||
|
let index = -1
|
|||
|
let start = 0
|
|||
|
let skip = 0
|
|||
|
|
|||
|
while (++index < value.length) {
|
|||
|
const code = value.charCodeAt(index)
|
|||
|
/** @type {string} */
|
|||
|
let replace = ''
|
|||
|
|
|||
|
// A correct percent encoded value.
|
|||
|
if (
|
|||
|
code === codes.percentSign &&
|
|||
|
asciiAlphanumeric(value.charCodeAt(index + 1)) &&
|
|||
|
asciiAlphanumeric(value.charCodeAt(index + 2))
|
|||
|
) {
|
|||
|
skip = 2
|
|||
|
}
|
|||
|
// ASCII.
|
|||
|
else if (code < 128) {
|
|||
|
if (!/[!#$&-;=?-Z_a-z~]/.test(String.fromCharCode(code))) {
|
|||
|
replace = String.fromCharCode(code)
|
|||
|
}
|
|||
|
}
|
|||
|
// Astral.
|
|||
|
else if (code > 55_295 && code < 57_344) {
|
|||
|
const next = value.charCodeAt(index + 1)
|
|||
|
|
|||
|
// A correct surrogate pair.
|
|||
|
if (code < 56_320 && next > 56_319 && next < 57_344) {
|
|||
|
replace = String.fromCharCode(code, next)
|
|||
|
skip = 1
|
|||
|
}
|
|||
|
// Lone surrogate.
|
|||
|
else {
|
|||
|
replace = values.replacementCharacter
|
|||
|
}
|
|||
|
}
|
|||
|
// Unicode.
|
|||
|
else {
|
|||
|
replace = String.fromCharCode(code)
|
|||
|
}
|
|||
|
|
|||
|
if (replace) {
|
|||
|
result.push(value.slice(start, index), encodeURIComponent(replace))
|
|||
|
start = index + skip + 1
|
|||
|
replace = ''
|
|||
|
}
|
|||
|
|
|||
|
if (skip) {
|
|||
|
index += skip
|
|||
|
skip = 0
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
return result.join('') + value.slice(start)
|
|||
|
}
|