"use strict"; const punycode = require("punycode/"); const regexes = require("./lib/regexes.js"); const mappingTable = require("./lib/mappingTable.json"); const { STATUS_MAPPING } = require("./lib/statusMapping.js"); function containsNonASCII(str) { return /[^\x00-\x7F]/u.test(str); } function findStatus(val, { useSTD3ASCIIRules }) { let start = 0; let end = mappingTable.length - 1; while (start <= end) { const mid = Math.floor((start + end) / 2); const target = mappingTable[mid]; const min = Array.isArray(target[0]) ? target[0][0] : target[0]; const max = Array.isArray(target[0]) ? target[0][1] : target[0]; if (min <= val && max >= val) { if (useSTD3ASCIIRules && (target[1] === STATUS_MAPPING.disallowed_STD3_valid || target[1] === STATUS_MAPPING.disallowed_STD3_mapped)) { return [STATUS_MAPPING.disallowed, ...target.slice(2)]; } else if (target[1] === STATUS_MAPPING.disallowed_STD3_valid) { return [STATUS_MAPPING.valid, ...target.slice(2)]; } else if (target[1] === STATUS_MAPPING.disallowed_STD3_mapped) { return [STATUS_MAPPING.mapped, ...target.slice(2)]; } return target.slice(1); } else if (min > val) { end = mid - 1; } else { start = mid + 1; } } return null; } function mapChars(domainName, { useSTD3ASCIIRules, transitionalProcessing }) { let processed = ""; for (const ch of domainName) { const [status, mapping] = findStatus(ch.codePointAt(0), { useSTD3ASCIIRules }); switch (status) { case STATUS_MAPPING.disallowed: processed += ch; break; case STATUS_MAPPING.ignored: break; case STATUS_MAPPING.mapped: if (transitionalProcessing && ch === "ẞ") { processed += "ss"; } else { processed += mapping; } break; case STATUS_MAPPING.deviation: if (transitionalProcessing) { processed += mapping; } else { processed += ch; } break; case STATUS_MAPPING.valid: processed += ch; break; } } return processed; } function validateLabel(label, { checkHyphens, checkBidi, checkJoiners, transitionalProcessing, useSTD3ASCIIRules, isBidi }) { // "must be satisfied for a non-empty label" if (label.length === 0) { return true; } // "1. The label must be in Unicode Normalization Form NFC." if (label.normalize("NFC") !== label) { return false; } const codePoints = Array.from(label); // "2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the // third and fourth positions." // // "3. If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character." if (checkHyphens) { if ((codePoints[2] === "-" && codePoints[3] === "-") || (label.startsWith("-") || label.endsWith("-"))) { return false; } } // "4. If not CheckHyphens, the label must not begin with “xn--”." // Disabled while we figure out https://github.com/whatwg/url/issues/803. // if (!checkHyphens) { // if (label.startsWith("xn--")) { // return false; // } // } // "5. The label must not contain a U+002E ( . ) FULL STOP." if (label.includes(".")) { return false; } // "6. The label must not begin with a combining mark, that is: General_Category=Mark." if (regexes.combiningMarks.test(codePoints[0])) { return false; } // "7. Each code point in the label must only have certain Status values according to Section 5" for (const ch of codePoints) { const [status] = findStatus(ch.codePointAt(0), { useSTD3ASCIIRules }); if (transitionalProcessing) { // "For Transitional Processing (deprecated), each value must be valid." if (status !== STATUS_MAPPING.valid) { return false; } } else if (status !== STATUS_MAPPING.valid && status !== STATUS_MAPPING.deviation) { // "For Nontransitional Processing, each value must be either valid or deviation." return false; } } // "8. If CheckJoiners, the label must satisify the ContextJ rules" // https://tools.ietf.org/html/rfc5892#appendix-A if (checkJoiners) { let last = 0; for (const [i, ch] of codePoints.entries()) { if (ch === "\u200C" || ch === "\u200D") { if (i > 0) { if (regexes.combiningClassVirama.test(codePoints[i - 1])) { continue; } if (ch === "\u200C") { // TODO: make this more efficient const next = codePoints.indexOf("\u200C", i + 1); const test = next < 0 ? codePoints.slice(last) : codePoints.slice(last, next); if (regexes.validZWNJ.test(test.join(""))) { last = i + 1; continue; } } } return false; } } } // "9. If CheckBidi, and if the domain name is a Bidi domain name, then the label must satisfy..." // https://tools.ietf.org/html/rfc5893#section-2 if (checkBidi && isBidi) { let rtl; // 1 if (regexes.bidiS1LTR.test(codePoints[0])) { rtl = false; } else if (regexes.bidiS1RTL.test(codePoints[0])) { rtl = true; } else { return false; } if (rtl) { // 2-4 if (!regexes.bidiS2.test(label) || !regexes.bidiS3.test(label) || (regexes.bidiS4EN.test(label) && regexes.bidiS4AN.test(label))) { return false; } } else if (!regexes.bidiS5.test(label) || !regexes.bidiS6.test(label)) { // 5-6 return false; } } return true; } function isBidiDomain(labels) { const domain = labels.map(label => { if (label.startsWith("xn--")) { try { return punycode.decode(label.substring(4)); } catch (err) { return ""; } } return label; }).join("."); return regexes.bidiDomain.test(domain); } function processing(domainName, options) { // 1. Map. let string = mapChars(domainName, options); // 2. Normalize. string = string.normalize("NFC"); // 3. Break. const labels = string.split("."); const isBidi = isBidiDomain(labels); // 4. Convert/Validate. let error = false; for (const [i, origLabel] of labels.entries()) { let label = origLabel; let transitionalProcessingForThisLabel = options.transitionalProcessing; if (label.startsWith("xn--")) { if (containsNonASCII(label)) { error = true; continue; } try { label = punycode.decode(label.substring(4)); } catch { if (!options.ignoreInvalidPunycode) { error = true; continue; } } labels[i] = label; transitionalProcessingForThisLabel = false; } // No need to validate if we already know there is an error. if (error) { continue; } const validation = validateLabel(label, { ...options, transitionalProcessing: transitionalProcessingForThisLabel, isBidi }); if (!validation) { error = true; } } return { string: labels.join("."), error }; } function toASCII(domainName, { checkHyphens = false, checkBidi = false, checkJoiners = false, useSTD3ASCIIRules = false, verifyDNSLength = false, transitionalProcessing = false, ignoreInvalidPunycode = false } = {}) { const result = processing(domainName, { checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, transitionalProcessing, ignoreInvalidPunycode }); let labels = result.string.split("."); labels = labels.map(l => { if (containsNonASCII(l)) { try { return `xn--${punycode.encode(l)}`; } catch (e) { result.error = true; } } return l; }); if (verifyDNSLength) { const total = labels.join(".").length; if (total > 253 || total === 0) { result.error = true; } for (let i = 0; i < labels.length; ++i) { if (labels[i].length > 63 || labels[i].length === 0) { result.error = true; break; } } } if (result.error) { return null; } return labels.join("."); } function toUnicode(domainName, { checkHyphens = false, checkBidi = false, checkJoiners = false, useSTD3ASCIIRules = false, transitionalProcessing = false, ignoreInvalidPunycode = false } = {}) { const result = processing(domainName, { checkHyphens, checkBidi, checkJoiners, useSTD3ASCIIRules, transitionalProcessing, ignoreInvalidPunycode }); return { domain: result.string, error: result.error }; } module.exports = { toASCII, toUnicode };