Skip to content

Commit 872f5e0

Browse files
authored
Improve websiteNormalizer with public suffix list (#1413)
1 parent d521f2f commit 872f5e0

File tree

3 files changed

+21
-10
lines changed

3 files changed

+21
-10
lines changed

services/libs/common/package-lock.json

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

services/libs/common/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"dependencies": {
3030
"@crowd/logging": "file:../logging",
3131
"@crowd/types": "file:../types",
32+
"psl": "^1.9.0",
3233
"uuid": "^9.0.0"
3334
}
3435
}
Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
1-
export const websiteNormalizer = (website: string): string => {
2-
// Prepends https:// to make valid URL
3-
const completeUrl = website.includes('://') ? website : 'https://' + website
1+
import { parse, isValid } from 'psl'
42

5-
const url = new URL(completeUrl)
6-
const hostname = url.hostname
3+
export const websiteNormalizer = (website: string): string => {
4+
// remove http:// or https://
5+
const cleanURL = website.replace(/(^\w+:|^)\/\//, '')
6+
const parsed = parse(cleanURL)
77

8-
const parts = hostname.split('.')
9-
// Ignore subdomains, return only domain and TLD
10-
if (parts.length > 2) {
11-
return parts.slice(-2).join('.')
8+
if (!isValid(cleanURL)) {
9+
return null
1210
}
1311

14-
return hostname
12+
return parsed.domain
1513
}

0 commit comments

Comments
 (0)