341 lines
8.5 KiB
JavaScript
341 lines
8.5 KiB
JavaScript
|
"use strict";
|
||
|
|
||
|
const punycode = require("punycode/");
|
||
|
const regexes = require("./lib/regexes.js");
|
||
|
const mappingTable = require("./lib/mappingTable.json");
|
||
|
const { STATUS_MAPPING } = require("./lib/statusMapping.js");
|
||
|
|
||
|
function containsNonASCII(str) {
|
||
|
return /[^\x00-\x7F]/u.test(str);
|
||
|
}
|
||
|
|
||
|
function findStatus(val, { useSTD3ASCIIRules }) {
|
||
|
let start = 0;
|
||
|
let end = mappingTable.length - 1;
|
||
|
|
||
|
while (start <= end) {
|
||
|
const mid = Math.floor((start + end) / 2);
|
||
|
|
||
|
const target = mappingTable[mid];
|
||
|
const min = Array.isArray(target[0]) ? target[0][0] : target[0];
|
||
|
const max = Array.isArray(target[0]) ? target[0][1] : target[0];
|
||
|
|
||
|
if (min <= val && max >= val) {
|
||
|
if (useSTD3ASCIIRules &&
|
||
|
(target[1] === STATUS_MAPPING.disallowed_STD3_valid || target[1] === STATUS_MAPPING.disallowed_STD3_mapped)) {
|
||
|
return [STATUS_MAPPING.disallowed, ...target.slice(2)];
|
||
|
} else if (target[1] === STATUS_MAPPING.disallowed_STD3_valid) {
|
||
|
return [STATUS_MAPPING.valid, ...target.slice(2)];
|
||
|
} else if (target[1] === STATUS_MAPPING.disallowed_STD3_mapped) {
|
||
|
return [STATUS_MAPPING.mapped, ...target.slice(2)];
|
||
|
}
|
||
|
|
||
|
return target.slice(1);
|
||
|
} else if (min > val) {
|
||
|
end = mid - 1;
|
||
|
} else {
|
||
|
start = mid + 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
function mapChars(domainName, { useSTD3ASCIIRules, transitionalProcessing }) {
|
||
|
let processed = "";
|
||
|
|
||
|
for (const ch of domainName) {
|
||
|
const [status, mapping] = findStatus(ch.codePointAt(0), { useSTD3ASCIIRules });
|
||
|
|
||
|
switch (status) {
|
||
|
case STATUS_MAPPING.disallowed:
|
||
|
processed += ch;
|
||
|
break;
|
||
|
case STATUS_MAPPING.ignored:
|
||
|
break;
|
||
|
case STATUS_MAPPING.mapped:
|
||
|
if (transitionalProcessing && ch === "ẞ") {
|
||
|
processed += "ss";
|
||
|
} else {
|
||
|
processed += mapping;
|
||
|
}
|
||
|
break;
|
||
|
case STATUS_MAPPING.deviation:
|
||
|
if (transitionalProcessing) {
|
||
|
processed += mapping;
|
||
|
} else {
|
||
|
processed += ch;
|
||
|
}
|
||
|
break;
|
||
|
case STATUS_MAPPING.valid:
|
||
|
processed += ch;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return processed;
|
||
|
}
|
||
|
|
||
|
function validateLabel(label, {
|
||
|
checkHyphens,
|
||
|
checkBidi,
|
||
|
checkJoiners,
|
||
|
transitionalProcessing,
|
||
|
useSTD3ASCIIRules,
|
||
|
isBidi
|
||
|
}) {
|
||
|
// "must be satisfied for a non-empty label"
|
||
|
if (label.length === 0) {
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// "1. The label must be in Unicode Normalization Form NFC."
|
||
|
if (label.normalize("NFC") !== label) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
const codePoints = Array.from(label);
|
||
|
|
||
|
// "2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character in both the
|
||
|
// third and fourth positions."
|
||
|
//
|
||
|
// "3. If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character."
|
||
|
if (checkHyphens) {
|
||
|
if ((codePoints[2] === "-" && codePoints[3] === "-") ||
|
||
|
(label.startsWith("-") || label.endsWith("-"))) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// "4. If not CheckHyphens, the label must not begin with “xn--”."
|
||
|
// Disabled while we figure out https://github.com/whatwg/url/issues/803.
|
||
|
// if (!checkHyphens) {
|
||
|
// if (label.startsWith("xn--")) {
|
||
|
// return false;
|
||
|
// }
|
||
|
// }
|
||
|
|
||
|
// "5. The label must not contain a U+002E ( . ) FULL STOP."
|
||
|
if (label.includes(".")) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// "6. The label must not begin with a combining mark, that is: General_Category=Mark."
|
||
|
if (regexes.combiningMarks.test(codePoints[0])) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// "7. Each code point in the label must only have certain Status values according to Section 5"
|
||
|
for (const ch of codePoints) {
|
||
|
const [status] = findStatus(ch.codePointAt(0), { useSTD3ASCIIRules });
|
||
|
if (transitionalProcessing) {
|
||
|
// "For Transitional Processing (deprecated), each value must be valid."
|
||
|
if (status !== STATUS_MAPPING.valid) {
|
||
|
return false;
|
||
|
}
|
||
|
} else if (status !== STATUS_MAPPING.valid && status !== STATUS_MAPPING.deviation) {
|
||
|
// "For Nontransitional Processing, each value must be either valid or deviation."
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// "8. If CheckJoiners, the label must satisify the ContextJ rules"
|
||
|
// https://tools.ietf.org/html/rfc5892#appendix-A
|
||
|
if (checkJoiners) {
|
||
|
let last = 0;
|
||
|
for (const [i, ch] of codePoints.entries()) {
|
||
|
if (ch === "\u200C" || ch === "\u200D") {
|
||
|
if (i > 0) {
|
||
|
if (regexes.combiningClassVirama.test(codePoints[i - 1])) {
|
||
|
continue;
|
||
|
}
|
||
|
if (ch === "\u200C") {
|
||
|
// TODO: make this more efficient
|
||
|
const next = codePoints.indexOf("\u200C", i + 1);
|
||
|
const test = next < 0 ? codePoints.slice(last) : codePoints.slice(last, next);
|
||
|
if (regexes.validZWNJ.test(test.join(""))) {
|
||
|
last = i + 1;
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// "9. If CheckBidi, and if the domain name is a Bidi domain name, then the label must satisfy..."
|
||
|
// https://tools.ietf.org/html/rfc5893#section-2
|
||
|
if (checkBidi && isBidi) {
|
||
|
let rtl;
|
||
|
|
||
|
// 1
|
||
|
if (regexes.bidiS1LTR.test(codePoints[0])) {
|
||
|
rtl = false;
|
||
|
} else if (regexes.bidiS1RTL.test(codePoints[0])) {
|
||
|
rtl = true;
|
||
|
} else {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (rtl) {
|
||
|
// 2-4
|
||
|
if (!regexes.bidiS2.test(label) ||
|
||
|
!regexes.bidiS3.test(label) ||
|
||
|
(regexes.bidiS4EN.test(label) && regexes.bidiS4AN.test(label))) {
|
||
|
return false;
|
||
|
}
|
||
|
} else if (!regexes.bidiS5.test(label) ||
|
||
|
!regexes.bidiS6.test(label)) { // 5-6
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
function isBidiDomain(labels) {
|
||
|
const domain = labels.map(label => {
|
||
|
if (label.startsWith("xn--")) {
|
||
|
try {
|
||
|
return punycode.decode(label.substring(4));
|
||
|
} catch (err) {
|
||
|
return "";
|
||
|
}
|
||
|
}
|
||
|
return label;
|
||
|
}).join(".");
|
||
|
return regexes.bidiDomain.test(domain);
|
||
|
}
|
||
|
|
||
|
function processing(domainName, options) {
|
||
|
// 1. Map.
|
||
|
let string = mapChars(domainName, options);
|
||
|
|
||
|
// 2. Normalize.
|
||
|
string = string.normalize("NFC");
|
||
|
|
||
|
// 3. Break.
|
||
|
const labels = string.split(".");
|
||
|
const isBidi = isBidiDomain(labels);
|
||
|
|
||
|
// 4. Convert/Validate.
|
||
|
let error = false;
|
||
|
for (const [i, origLabel] of labels.entries()) {
|
||
|
let label = origLabel;
|
||
|
let transitionalProcessingForThisLabel = options.transitionalProcessing;
|
||
|
if (label.startsWith("xn--")) {
|
||
|
if (containsNonASCII(label)) {
|
||
|
error = true;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
try {
|
||
|
label = punycode.decode(label.substring(4));
|
||
|
} catch {
|
||
|
if (!options.ignoreInvalidPunycode) {
|
||
|
error = true;
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
labels[i] = label;
|
||
|
transitionalProcessingForThisLabel = false;
|
||
|
}
|
||
|
|
||
|
// No need to validate if we already know there is an error.
|
||
|
if (error) {
|
||
|
continue;
|
||
|
}
|
||
|
const validation = validateLabel(label, {
|
||
|
...options,
|
||
|
transitionalProcessing: transitionalProcessingForThisLabel,
|
||
|
isBidi
|
||
|
});
|
||
|
if (!validation) {
|
||
|
error = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return {
|
||
|
string: labels.join("."),
|
||
|
error
|
||
|
};
|
||
|
}
|
||
|
|
||
|
function toASCII(domainName, {
|
||
|
checkHyphens = false,
|
||
|
checkBidi = false,
|
||
|
checkJoiners = false,
|
||
|
useSTD3ASCIIRules = false,
|
||
|
verifyDNSLength = false,
|
||
|
transitionalProcessing = false,
|
||
|
ignoreInvalidPunycode = false
|
||
|
} = {}) {
|
||
|
const result = processing(domainName, {
|
||
|
checkHyphens,
|
||
|
checkBidi,
|
||
|
checkJoiners,
|
||
|
useSTD3ASCIIRules,
|
||
|
transitionalProcessing,
|
||
|
ignoreInvalidPunycode
|
||
|
});
|
||
|
let labels = result.string.split(".");
|
||
|
labels = labels.map(l => {
|
||
|
if (containsNonASCII(l)) {
|
||
|
try {
|
||
|
return `xn--${punycode.encode(l)}`;
|
||
|
} catch (e) {
|
||
|
result.error = true;
|
||
|
}
|
||
|
}
|
||
|
return l;
|
||
|
});
|
||
|
|
||
|
if (verifyDNSLength) {
|
||
|
const total = labels.join(".").length;
|
||
|
if (total > 253 || total === 0) {
|
||
|
result.error = true;
|
||
|
}
|
||
|
|
||
|
for (let i = 0; i < labels.length; ++i) {
|
||
|
if (labels[i].length > 63 || labels[i].length === 0) {
|
||
|
result.error = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (result.error) {
|
||
|
return null;
|
||
|
}
|
||
|
return labels.join(".");
|
||
|
}
|
||
|
|
||
|
function toUnicode(domainName, {
|
||
|
checkHyphens = false,
|
||
|
checkBidi = false,
|
||
|
checkJoiners = false,
|
||
|
useSTD3ASCIIRules = false,
|
||
|
transitionalProcessing = false,
|
||
|
ignoreInvalidPunycode = false
|
||
|
} = {}) {
|
||
|
const result = processing(domainName, {
|
||
|
checkHyphens,
|
||
|
checkBidi,
|
||
|
checkJoiners,
|
||
|
useSTD3ASCIIRules,
|
||
|
transitionalProcessing,
|
||
|
ignoreInvalidPunycode
|
||
|
});
|
||
|
|
||
|
return {
|
||
|
domain: result.string,
|
||
|
error: result.error
|
||
|
};
|
||
|
}
|
||
|
|
||
|
module.exports = {
|
||
|
toASCII,
|
||
|
toUnicode
|
||
|
};
|