69 lines
2.1 KiB
JavaScript
69 lines
2.1 KiB
JavaScript
import { parse } from 'node:url';
|
||
import { createHash } from 'node:crypto';
|
||
|
||
import isEmpty from 'lodash.isempty';
|
||
import intel from 'intel';
|
||
|
||
const log = intel.getLogger('sitespeedio.file');
|
||
|
||
function toSafeKey(key) {
|
||
// U+2013 : EN DASH – as used on https://en.wikipedia.org/wiki/2019–20_coronavirus_pandemic
|
||
return key.replace(/[ %&()+,./:?|~–]|%7C/g, '-');
|
||
}
|
||
|
||
export function pathToFolder(url, options, alias) {
|
||
const useHash = options.useHash;
|
||
const parsedUrl = parse(decodeURIComponent(url));
|
||
|
||
const pathSegments = [];
|
||
const urlSegments = [];
|
||
pathSegments.push('pages', parsedUrl.hostname.split('.').join('_'));
|
||
|
||
if (options.urlMetaData && options.urlMetaData[url]) {
|
||
pathSegments.push(options.urlMetaData[url]);
|
||
} else if (alias) {
|
||
pathSegments.push(alias);
|
||
} else {
|
||
if (!isEmpty(parsedUrl.pathname)) {
|
||
urlSegments.push(...parsedUrl.pathname.split('/').filter(Boolean));
|
||
}
|
||
|
||
if (useHash && !isEmpty(parsedUrl.hash)) {
|
||
const md5 = createHash('md5'),
|
||
hash = md5.update(parsedUrl.hash).digest('hex').slice(0, 8);
|
||
urlSegments.push('hash-' + hash);
|
||
}
|
||
|
||
if (!isEmpty(parsedUrl.search)) {
|
||
const md5 = createHash('md5'),
|
||
hash = md5.update(parsedUrl.search).digest('hex').slice(0, 8);
|
||
urlSegments.push('query-' + hash);
|
||
}
|
||
|
||
// This is used from sitespeed.io to match URLs on Graphite
|
||
if (!options.storeURLsAsFlatPageOnDisk) {
|
||
pathSegments.push(...urlSegments);
|
||
} else {
|
||
const folder = toSafeKey(urlSegments.join('_').concat('_'));
|
||
if (folder.length > 255) {
|
||
log.info(
|
||
`The URL ${url} hit the 255 character limit used when stored on disk, you may want to give your URL an alias to make sure it will not collide with other URLs.`
|
||
);
|
||
pathSegments.push(folder.slice(0, 254));
|
||
} else {
|
||
pathSegments.push(folder);
|
||
}
|
||
}
|
||
}
|
||
|
||
// pathSegments.push('data');
|
||
|
||
for (const [index, segment] of pathSegments.entries()) {
|
||
if (segment) {
|
||
pathSegments[index] = segment.replace(/[^\w.\u0621-\u064A-]/gi, '-');
|
||
}
|
||
}
|
||
|
||
return pathSegments.join('/').concat('/');
|
||
}
|