import {asciiAlphanumeric} from 'micromark-util-character' import {encode} from 'micromark-util-encode' /** * Make a value safe for injection as a URL. * * This encodes unsafe characters with percent-encoding and skips already * encoded sequences (see `normalizeUri`). * Further unsafe characters are encoded as character references (see * `micromark-util-encode`). * * A regex of allowed protocols can be given, in which case the URL is * sanitized. * For example, `/^(https?|ircs?|mailto|xmpp)$/i` can be used for `a[href]`, or * `/^https?$/i` for `img[src]` (this is what `github.com` allows). * If the URL includes an unknown protocol (one not matched by `protocol`, such * as a dangerous example, `javascript:`), the value is ignored. * * @param {string | null | undefined} url * URI to sanitize. * @param {RegExp | null | undefined} [protocol] * Allowed protocols. * @returns {string} * Sanitized URI. */ export function sanitizeUri(url, protocol) { const value = encode(normalizeUri(url || '')) if (!protocol) { return value } const colon = value.indexOf(':') const questionMark = value.indexOf('?') const numberSign = value.indexOf('#') const slash = value.indexOf('/') if ( // If there is no protocol, it’s relative. colon < 0 || // If the first colon is after a `?`, `#`, or `/`, it’s not a protocol. (slash > -1 && colon > slash) || (questionMark > -1 && colon > questionMark) || (numberSign > -1 && colon > numberSign) || // It is a protocol, it should be allowed. protocol.test(value.slice(0, colon)) ) { return value } return '' } /** * Normalize a URL. * * Encode unsafe characters with percent-encoding, skipping already encoded * sequences. * * @param {string} value * URI to normalize. * @returns {string} * Normalized URI. */ export function normalizeUri(value) { /** @type {Array} */ const result = [] let index = -1 let start = 0 let skip = 0 while (++index < value.length) { const code = value.charCodeAt(index) /** @type {string} */ let replace = '' // A correct percent encoded value. if ( code === 37 && asciiAlphanumeric(value.charCodeAt(index + 1)) && asciiAlphanumeric(value.charCodeAt(index + 2)) ) { skip = 2 } // ASCII. else if (code < 128) { if (!/[!#$&-;=?-Z_a-z~]/.test(String.fromCharCode(code))) { replace = String.fromCharCode(code) } } // Astral. else if (code > 55_295 && code < 57_344) { const next = value.charCodeAt(index + 1) // A correct surrogate pair. if (code < 56_320 && next > 56_319 && next < 57_344) { replace = String.fromCharCode(code, next) skip = 1 } // Lone surrogate. else { replace = '\uFFFD' } } // Unicode. else { replace = String.fromCharCode(code) } if (replace) { result.push(value.slice(start, index), encodeURIComponent(replace)) start = index + skip + 1 replace = '' } if (skip) { index += skip skip = 0 } } return result.join('') + value.slice(start) }