From cfa4097df90407ad763365755ba1ae8e1bdb9be0 Mon Sep 17 00:00:00 2001 From: Siddharth Dushantha Date: Wed, 26 Jun 2024 21:57:11 +0200 Subject: [PATCH 01/91] removed support for tor --- sherlock/sherlock.py | 58 +++----------------------------------------- 1 file changed, 4 insertions(+), 54 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index db8e9c2c..170ea32a 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -30,7 +30,6 @@ from .__init__ import ( # noqa: E402 ) from requests_futures.sessions import FuturesSession # noqa: E402 -from torrequest import TorRequest # noqa: E402 from sherlock.result import QueryStatus # noqa: E402 from sherlock.result import QueryResult # noqa: E402 from sherlock.notify import QueryNotify # noqa: E402 @@ -166,8 +165,6 @@ def sherlock( username, site_data, query_notify: QueryNotify, - tor: bool = False, - unique_tor: bool = False, proxy=None, timeout=60, ): @@ -182,8 +179,6 @@ def sherlock( query_notify -- Object with base type of QueryNotify(). This will be used to notify the caller about query results. - tor -- Boolean indicating whether to use a tor circuit for the requests. - unique_tor -- Boolean indicating whether to use a new tor circuit for each request. proxy -- String indicating the proxy URL timeout -- Time in seconds to wait before timing out request. Default is 60 seconds. @@ -204,20 +199,10 @@ def sherlock( # Notify caller that we are starting the query. query_notify.start(username) - # Create session based on request methodology - if tor or unique_tor: - # Requests using Tor obfuscation - try: - underlying_request = TorRequest() - except OSError: - print("Tor not found in system path. Unable to continue.\n") - sys.exit(query_notify.finish()) - underlying_session = underlying_request.session - else: - # Normal requests - underlying_session = requests.session() - underlying_request = requests.Request() + # Normal requests + underlying_session = requests.session() + underlying_request = requests.Request() # Limit number of workers to 20. # This is probably vastly overkill. @@ -341,15 +326,10 @@ def sherlock( # Store future in data for access later net_info["request_future"] = future - # Reset identify for tor (if needed) - if unique_tor: - underlying_request.reset_identity() - # Add this site's results into final dictionary with all the other results. results_total[social_network] = results_site # Open the file containing account links - # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses for social_network, net_info in site_data.items(): # Retrieve results again results_site = results_total.get(social_network) @@ -547,23 +527,7 @@ def main(): "-o", dest="output", help="If using single username, the output of the result will be saved to this file.", - ) - parser.add_argument( - "--tor", - "-t", - action="store_true", - dest="tor", - default=False, - help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.", - ) - parser.add_argument( - "--unique-tor", - "-u", - action="store_true", - dest="unique_tor", - default=False, - help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.", - ) + ), parser.add_argument( "--csv", action="store_true", @@ -687,22 +651,10 @@ def main(): except Exception as error: print(f"A problem occurred while checking for an update: {error}") - # Argument check - # TODO regex check on args.proxy - if args.tor and (args.proxy is not None): - raise Exception("Tor and Proxy cannot be set at the same time.") - # Make prompts if args.proxy is not None: print("Using the proxy: " + args.proxy) - if args.tor or args.unique_tor: - print("Using Tor to make requests") - - print( - "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." - ) - if args.no_color: # Disable color output. init(strip=True, convert=False) @@ -781,8 +733,6 @@ def main(): username, site_data, query_notify, - tor=args.tor, - unique_tor=args.unique_tor, proxy=args.proxy, timeout=args.timeout, ) From 44ad8f506a3719e25bfd894a90e2952199d405f6 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 28 Jun 2024 23:38:44 -0400 Subject: [PATCH 02/91] Lint --- sherlock/sherlock.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 170ea32a..30346bd5 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -202,7 +202,6 @@ def sherlock( # Normal requests underlying_session = requests.session() - underlying_request = requests.Request() # Limit number of workers to 20. # This is probably vastly overkill. @@ -527,7 +526,7 @@ def main(): "-o", dest="output", help="If using single username, the output of the result will be saved to this file.", - ), + ) parser.add_argument( "--csv", action="store_true", From 2016892e648c5bc5674d2b3b910cd9f36988bcff Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 28 Jun 2024 23:39:38 -0400 Subject: [PATCH 03/91] Remove torrequest dep Not sure why it's not in my patch file, but I was removing via sed in my spec instead. --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fafa9f85..5674f016 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,6 @@ PySocks = "^1.7.0" requests = "^2.22.0" requests-futures = "^1.0.0" stem = "^1.8.0" -torrequest = "^0.1.0" # pandas can likely be bumped up to ^2.0.0 after fc39 EOL pandas = ">=1.0.0,<3.0.0" openpyxl = "^3.0.10" From 5e3828882e7dde9880c8a065a199da3b490b804c Mon Sep 17 00:00:00 2001 From: Jason Tenpenny <50330957+jasontenpenny@users.noreply.github.com> Date: Sun, 2 Mar 2025 22:40:32 -0600 Subject: [PATCH 04/91] Sort Bluesky Alphabetically moved the Bluesky config to its proper alphabetical location so it can be found easier --- sherlock_project/resources/data.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..772af09a 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -257,6 +257,13 @@ "urlMain": "https://www.blogger.com/", "username_claimed": "blue" }, + "Bluesky": { + "errorType": "status_code", + "url": "https://bsky.app/profile/{}.bsky.social", + "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social", + "urlMain": "https://bsky.app/", + "username_claimed": "mcuban" + }, "BoardGameGeek": { "errorType": "message", "regexCheck": "^[a-zA-Z0-9_]*$", @@ -2730,12 +2737,5 @@ "url": "https://www.znanylekarz.pl/{}", "urlMain": "https://znanylekarz.pl", "username_claimed": "janusz-nowak" - }, - "Bluesky": { - "errorType": "status_code", - "url": "https://bsky.app/profile/{}.bsky.social", - "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social", - "urlMain": "https://bsky.app/", - "username_claimed": "mcuban" } } From 6c1623a3ad1e1978a14c78891052a1928fb978b2 Mon Sep 17 00:00:00 2001 From: S1lvus Date: Mon, 7 Apr 2025 20:28:28 -0400 Subject: [PATCH 05/91] Added CashApp into the site list. This adds username search for the CashApp financial platform. --- sherlock_project/resources/data.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..4c5747d0 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -357,6 +357,13 @@ "urlMain": "https://career.habr.com/", "username_claimed": "blue" }, + "CashApp": { + "errorType": "status_code", + "url": "https://cash.app/${}", + "urlMain": "https://cash.app", + "username_claimed": "hotdiggitydog", + "username_unclaimed": "HelloItsADell" + }, "Championat": { "errorType": "status_code", "url": "https://www.championat.com/user/{}", @@ -2739,3 +2746,4 @@ "username_claimed": "mcuban" } } + From e0f184f2634e774d064be0290a459f6666daf030 Mon Sep 17 00:00:00 2001 From: S1lvus Date: Mon, 7 Apr 2025 20:31:17 -0400 Subject: [PATCH 06/91] Removed extra spaces --- sherlock_project/resources/data.json | 1 - 1 file changed, 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c5747d0..bf36bf11 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2746,4 +2746,3 @@ "username_claimed": "mcuban" } } - From de81f386221abb73e56869f40a7ee74c316a2360 Mon Sep 17 00:00:00 2001 From: kareemeldahshoury Date: Tue, 29 Apr 2025 15:25:31 -0500 Subject: [PATCH 07/91] Fix Issue #2442: Added support for Aparat --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..071dd88e 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -121,6 +121,12 @@ "urlMain": "https://discussions.apple.com", "username_claimed": "jason" }, + "Aparat": { + "errorType": "status_code", + "url": "https://www.aparat.com/{}/", + "urlMain": "https://www.aparat.com/", + "username_claimed": "jadi" + }, "Archive of Our Own": { "errorType": "status_code", "regexCheck": "^[^.]*?$", From f599ae5ff1d95e45b630547845acb3748cb8dcd2 Mon Sep 17 00:00:00 2001 From: Rhythm Bansal Date: Wed, 30 Apr 2025 02:06:10 +0530 Subject: [PATCH 08/91] fixed minecraft --- sherlock_project/resources/data.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..d12dab6f 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1341,8 +1341,8 @@ "username_claimed": "blue" }, "Minecraft": { - "errorCode": 204, - "errorType": "status_code", + "errorMsg": "Couldn't find any profile with name", + "errorType": "message", "url": "https://api.mojang.com/users/profiles/minecraft/{}", "urlMain": "https://minecraft.net/", "username_claimed": "blue" From 9e54e68da51c5579dab86208f028d7db444cbc35 Mon Sep 17 00:00:00 2001 From: Maxwell Oldshein Date: Thu, 26 Jun 2025 15:10:44 -0400 Subject: [PATCH 09/91] Add Playstrategy support --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..5b8f489a 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1574,6 +1574,12 @@ "urlMain": "https://play.google.com/store", "username_claimed": "Facebook" }, + "Playstrategy": { + "errorType": "status_code", + "url": "https://playstrategy.org/@/{}", + "urlMain": "https://playstrategy.org", + "username_claimed": "oruro" + }, "PocketStars": { "errorMsg": "Join Your Favorite Adult Stars", "errorType": "message", From c82c00650ae94c384bec80bd1f18efbef636b248 Mon Sep 17 00:00:00 2001 From: Maxwell Oldshein Date: Thu, 26 Jun 2025 15:26:57 -0400 Subject: [PATCH 10/91] Add Blitz Tactics support --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..b0b66970 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -250,6 +250,12 @@ "urlMain": "https://www.blipfoto.com/", "username_claimed": "blue" }, + "Blitz Tactics": { + "errorType": "status_code", + "url": "https://blitztactics.com/{}", + "urlMain": "https://blitztactics.com/", + "username_claimed": "Lance5500" + }, "Blogger": { "errorType": "status_code", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", From 9e40e0a0f44c51df7994ad406e50edee919986aa Mon Sep 17 00:00:00 2001 From: MR-VL Date: Sun, 6 Jul 2025 16:27:59 -0500 Subject: [PATCH 11/91] Add support for Pychess --- sherlock_project/resources/data.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..28542026 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1621,6 +1621,13 @@ "urlMain": "https://www.producthunt.com/", "username_claimed": "jenny" }, + "Pychess": { + "errorType": "message", + "errorMsg": "404", + "url": "https://www.pychess.org/@/{}", + "urlMain": "https://www.pychess.org", + "username_claimed": "gbtami" + }, "PromoDJ": { "errorType": "status_code", "url": "http://promodj.com/{}", From 1e12c3f7a6915275680db3ee618cb131d8c28241 Mon Sep 17 00:00:00 2001 From: MR-VL Date: Sun, 6 Jul 2025 16:35:31 -0500 Subject: [PATCH 12/91] Remove TorrentGalaxy --- docs/removed-sites.md | 13 +++++++++++++ sherlock_project/resources/data.json | 8 -------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/docs/removed-sites.md b/docs/removed-sites.md index ecf8631e..b44e520e 100644 --- a/docs/removed-sites.md +++ b/docs/removed-sites.md @@ -1982,3 +1982,16 @@ __2025-02-16 :__ Unsure if any way to view profiles exists now "username_claimed": "t3dotgg" } ``` + +## TorrentGalaxy +__2025-07-06 :__ Site appears to have gone offline in March and hasn't come back +```json + "TorrentGalaxy": { + "errorMsg": "TGx:Can't show details", + "errorType": "message", + "regexCheck": "^[A-Za-z0-9]{3,15}$", + "url": "https://torrentgalaxy.to/profile/{}", + "urlMain": "https://torrentgalaxy.to/", + "username_claimed": "GalaxyRG" + }, +``` diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 28542026..ac4bc97c 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2028,14 +2028,6 @@ "urlMain": "https://www.tnaflix.com/", "username_claimed": "hacker" }, - "TorrentGalaxy": { - "errorMsg": "TGx:Can't show details", - "errorType": "message", - "regexCheck": "^[A-Za-z0-9]{3,15}$", - "url": "https://torrentgalaxy.to/profile/{}", - "urlMain": "https://torrentgalaxy.to/", - "username_claimed": "GalaxyRG" - }, "TradingView": { "errorType": "status_code", "request_method": "GET", From 6e7b3cecb8f922468dbf4b289539369d346c06a0 Mon Sep 17 00:00:00 2001 From: MR-VL Date: Sun, 6 Jul 2025 18:02:24 -0500 Subject: [PATCH 13/91] Syntax Fixes in README.md remove unmatched closing tag, and fix indent add quotes to

LN:1 to make it valid HTML fix type from programmaticaly to programmatically --- docs/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/README.md b/docs/README.md index afabfcf9..b7b8f5ee 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,4 +1,4 @@ -

+



@@ -15,8 +15,7 @@

- - +

@@ -115,7 +114,7 @@ $ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock }] ``` -Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmaticaly via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock). +Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock). ## Credits From 8635d68864afc6f099a46c5ff7f6fe5a99415ea1 Mon Sep 17 00:00:00 2001 From: Moshi <64158093+Davis-3450@users.noreply.github.com> Date: Wed, 16 Jul 2025 14:48:16 -0500 Subject: [PATCH 14/91] add: platzi.com --- sherlock_project/resources/data.json | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..a6873540 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2737,5 +2737,11 @@ "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social", "urlMain": "https://bsky.app/", "username_claimed": "mcuban" - } + }, + "Platzi": { + "errorType": "status_code", + "url": "https://platzi.com/p/{}", + "urlMain": "https://platzi.com/", + "username_claimed": "juan" +} } From 3c9eda75e9f2a5a9eb5b4e9fba118440106a209e Mon Sep 17 00:00:00 2001 From: akamayu-ouo <71762573+akamayu-ouo@users.noreply.github.com> Date: Sat, 9 Aug 2025 14:08:56 +0000 Subject: [PATCH 15/91] Add Plurk --- sherlock_project/resources/data.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..32bbfed5 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1574,6 +1574,13 @@ "urlMain": "https://play.google.com/store", "username_claimed": "Facebook" }, + "Plurk": { + "errorMsg": "User Not Found!", + "errorType": "message", + "url": "https://www.plurk.com/{}", + "urlMain": "https://www.plurk.com/", + "username_claimed": "plurkoffice" + }, "PocketStars": { "errorMsg": "Join Your Favorite Adult Stars", "errorType": "message", From dafcaec192e74324f4b25d4b4a09c06bacd6d03c Mon Sep 17 00:00:00 2001 From: "[Tulsi Shetty]" <[tulsishetty07@gmail.com]> Date: Sat, 16 Aug 2025 18:42:19 +0530 Subject: [PATCH 16/91] feat: Tumblr added --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 42327a18..e5163ff6 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2711,6 +2711,12 @@ "urlMain": "https://www.toster.ru/", "username_claimed": "adam" }, + "tumblr": { + "errorType": "status_code", + "url": "https://{}.tumblr.com/", + "urlMain": "https://www.tumblr.com/", + "username_claimed": "goku" +}, "uid": { "errorType": "status_code", "url": "http://uid.me/{}", From aa1945b01759f48e17d963bac6bd16f9915e37c5 Mon Sep 17 00:00:00 2001 From: Craig London <3833140+craiglondon@users.noreply.github.com> Date: Thu, 11 Sep 2025 08:46:59 -0400 Subject: [PATCH 17/91] readmefixes: HTML fixes --- docs/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/README.md b/docs/README.md index afabfcf9..1df79904 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@


- + sherlock
Hunt down social media accounts by username across 400+ social networks
@@ -15,8 +15,7 @@

- - +demo

@@ -122,7 +121,7 @@ Read more about the [Sherlock Actor](../.actor/README.md), including how to use Thank you to everyone who has contributed to Sherlock! ❤️ - + contributors ## Star history From d60562130c0e04579d1cfea7dd7da13073598945 Mon Sep 17 00:00:00 2001 From: Craig London <3833140+craiglondon@users.noreply.github.com> Date: Thu, 11 Sep 2025 08:47:08 -0400 Subject: [PATCH 18/91] readmefixes: Fix typo --- docs/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/README.md b/docs/README.md index 1df79904..5c9bcee3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -114,7 +114,7 @@ $ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock }] ``` -Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmaticaly via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock). +Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock). ## Credits From 9646055560df1ab1e13e97fc47a8453723add61d Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 13 Sep 2025 18:30:08 -0400 Subject: [PATCH 19/91] fix(manifest): schema non-compliance --- sherlock_project/resources/data.json | 1 - 1 file changed, 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 99db68a1..c4e6405d 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -368,7 +368,6 @@ "url": "https://cash.app/${}", "urlMain": "https://cash.app", "username_claimed": "hotdiggitydog", - "username_unclaimed": "HelloItsADell" }, "Championat": { "errorType": "status_code", From d3f4c65459d13640bf5504b59301b9eb09aa4ede Mon Sep 17 00:00:00 2001 From: MR-VL <107425742+MR-VL@users.noreply.github.com> Date: Sat, 13 Sep 2025 18:15:23 -0500 Subject: [PATCH 20/91] remove trailing comma for cashapp breaking TOX --- sherlock_project/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 31b9a2e4..6f886938 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -367,7 +367,7 @@ "errorType": "status_code", "url": "https://cash.app/${}", "urlMain": "https://cash.app", - "username_claimed": "hotdiggitydog", + "username_claimed": "hotdiggitydog" }, "Championat": { "errorType": "status_code", From ca094d8264b43b184bbad774d12ff08954c6e5ba Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 14 Sep 2025 00:39:35 -0400 Subject: [PATCH 21/91] test: prepare false positive detection base --- pyproject.toml | 3 ++ pytest.ini | 3 +- sherlock_project/sherlock.py | 6 ++-- tests/conftest.py | 16 ++++++++-- tests/test_manifest.py | 2 +- tests/test_validate_targets.py | 53 ++++++++++++++++++++++++++++++++++ tox.ini | 1 + 7 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 tests/test_validate_targets.py diff --git a/pyproject.toml b/pyproject.toml index 069cb9d3..76a6fab3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,9 @@ tor = ["torrequest"] [tool.poetry.group.dev.dependencies] jsonschema = "^4.0.0" +rstr = "^3.2.2" +pytest = "^8.4.2" +pytest-xdist = "^3.8.0" [tool.poetry.scripts] sherlock = 'sherlock_project.sherlock:main' diff --git a/pytest.ini b/pytest.ini index bc1df7de..e05d3088 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] -addopts = --strict-markers +addopts = --strict-markers -m "not validate_targets" markers = online: mark tests are requiring internet access. + validate_targets: mark tests for sweeping manifest validation (sends many requests). diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 4e80d31c..e3786c90 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -169,14 +169,14 @@ def multiple_usernames(username): def sherlock( username: str, - site_data: dict, + site_data: dict[str, dict[str, str]], query_notify: QueryNotify, tor: bool = False, unique_tor: bool = False, dump_response: bool = False, proxy: Optional[str] = None, timeout: int = 60, -): +) -> dict[str, dict[str, str | QueryResult]]: """Run Sherlock Analysis. Checks for existence of username on various social media sites. @@ -507,7 +507,7 @@ def sherlock( print("+++++++++++++++++++++") # Notify caller about results of query. - result = QueryResult( + result: QueryResult = QueryResult( username=username, site_name=social_network, site_url_user=url, diff --git a/tests/conftest.py b/tests/conftest.py index 51c90814..75aa25e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,11 @@ import urllib import pytest from sherlock_project.sites import SitesInformation +def fetch_local_manifest() -> dict[str, dict[str, str]]: + sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) + sites_iterable = {site.name: site.information for site in sites_obj} + return sites_iterable + @pytest.fixture() def sites_obj(): sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) @@ -11,9 +16,7 @@ def sites_obj(): @pytest.fixture(scope="session") def sites_info(): - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) - sites_iterable = {site.name: site.information for site in sites_obj} - yield sites_iterable + yield fetch_local_manifest() @pytest.fixture(scope="session") def remote_schema(): @@ -21,3 +24,10 @@ def remote_schema(): with urllib.request.urlopen(schema_url) as remoteschema: schemadat = json.load(remoteschema) yield schemadat + +def pytest_generate_tests(metafunc): + if "chunked_sites" in metafunc.fixturenames: + sites_info = fetch_local_manifest() + params = [{name: data} for name, data in sites_info.items()] + ids = list(sites_info.keys()) + metafunc.parametrize("chunked_sites", params, ids=ids) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 5c47fbb8..b73e9240 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -7,7 +7,7 @@ def test_validate_manifest_against_local_schema(): """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" json_relative: str = '../sherlock_project/resources/data.json' schema_relative: str = '../sherlock_project/resources/data.schema.json' - + json_path: str = os.path.join(os.path.dirname(__file__), json_relative) schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py new file mode 100644 index 00000000..a71f3b1e --- /dev/null +++ b/tests/test_validate_targets.py @@ -0,0 +1,53 @@ +import pytest +import rstr + +from sherlock_project.sherlock import sherlock +from sherlock_project.notify import QueryNotify +from sherlock_project.result import QueryResult, QueryStatus + + +FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit + + +def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus: + """Check if a site is likely to produce false positives.""" + attempts: int = 1 + status: QueryStatus = QueryStatus.UNKNOWN + + for _ in range(attempts): + query_notify = QueryNotify() + username: str = rstr.xeger(pattern) + + result: QueryResult | str = sherlock( + username=username, + site_data=sites_info, + query_notify=query_notify, + )[site]['status'] + + if not hasattr(result, 'status'): + raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}") + if type(result.status) is not QueryStatus: # type: ignore + raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore + status = result.status # type: ignore + + if status in (QueryStatus.AVAILABLE, QueryStatus.WAF): + return status + + return status + + +@pytest.mark.validate_targets +@pytest.mark.online +class Test_All_Targets: + + def test_manifest_false_pos(self, chunked_sites: dict[str, dict[str, str]]): + """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" + pattern: str + for site in chunked_sites: + try: + pattern = chunked_sites[site]['regexCheck'] + except KeyError: + pattern = r'^[a-zA-Z0-9._-]{7,20}$' + result: QueryStatus = false_positive_check(chunked_sites, site, pattern) + assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}" + diff --git a/tox.ini b/tox.ini index 1e9a47de..da91d7aa 100644 --- a/tox.ini +++ b/tox.ini @@ -16,6 +16,7 @@ deps = coverage jsonschema pytest + rstr allowlist_externals = coverage commands = coverage run --source=sherlock_project --module pytest -v From dfe8b1599dd2035ff7e1dfe103c17455241424f8 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 14 Sep 2025 00:57:55 -0400 Subject: [PATCH 22/91] test: prepare false negative detection base --- tests/test_validate_targets.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py index a71f3b1e..5f392543 100644 --- a/tests/test_validate_targets.py +++ b/tests/test_validate_targets.py @@ -11,11 +11,10 @@ FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus: """Check if a site is likely to produce false positives.""" - attempts: int = 1 status: QueryStatus = QueryStatus.UNKNOWN - for _ in range(attempts): - query_notify = QueryNotify() + for _ in range(FALSE_POSITIVE_ATTEMPTS): + query_notify: QueryNotify = QueryNotify() username: str = rstr.xeger(pattern) result: QueryResult | str = sherlock( @@ -36,12 +35,31 @@ def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, patte return status +def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus: + """Check if a site is likely to produce false negatives.""" + status: QueryStatus = QueryStatus.UNKNOWN + query_notify: QueryNotify = QueryNotify() + + result: QueryResult | str = sherlock( + username=sites_info[site]['username_claimed'], + site_data=sites_info, + query_notify=query_notify, + )[site]['status'] + + if not hasattr(result, 'status'): + raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}") + if type(result.status) is not QueryStatus: # type: ignore + raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore + status = result.status # type: ignore + + return status + @pytest.mark.validate_targets @pytest.mark.online class Test_All_Targets: def test_manifest_false_pos(self, chunked_sites: dict[str, dict[str, str]]): - """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" + """Iterate through all sites in the manifest to discover possible false-positive inducting targets.""" pattern: str for site in chunked_sites: try: @@ -51,3 +69,9 @@ class Test_All_Targets: result: QueryStatus = false_positive_check(chunked_sites, site, pattern) assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}" + def test_manifest_false_neg(self, chunked_sites: dict[str, dict[str, str]]): + """Iterate through all sites in the manifest to discover possible false-negative inducting targets.""" + for site in chunked_sites: + result: QueryStatus = false_negative_check(chunked_sites, site) + assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}" + From 702bfee9889164d8f892fdbdf1d7f43ed3cedc4a Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 14 Sep 2025 01:10:52 -0400 Subject: [PATCH 23/91] chore: deprecate 3.8, 3.9 --- .github/workflows/regression.yml | 2 +- tox.ini | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index 2e5ea941..e366f29d 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -49,10 +49,10 @@ jobs: macos-latest, ] python-version: [ - '3.9', '3.10', '3.11', '3.12', + '3.13', ] steps: - uses: actions/checkout@v4 diff --git a/tox.ini b/tox.ini index da91d7aa..8c43ac30 100644 --- a/tox.ini +++ b/tox.ini @@ -7,8 +7,6 @@ envlist = py312 py311 py310 - py39 - py38 [testenv] description = Attempt to build and install the package @@ -38,7 +36,7 @@ commands = [gh-actions] python = + 3.13: py313 3.12: py312 3.11: py311 3.10: py310 - 3.9: py39 From 34cb23bc6ed4df0e15bba72f2efd7fff1f3c1914 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 14 Sep 2025 01:36:21 -0400 Subject: [PATCH 24/91] test: itemize f+/f- --- pytest.ini | 2 ++ tests/test_validate_targets.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pytest.ini b/pytest.ini index e05d3088..ce1af84e 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,3 +3,5 @@ addopts = --strict-markers -m "not validate_targets" markers = online: mark tests are requiring internet access. validate_targets: mark tests for sweeping manifest validation (sends many requests). + validate_targets_fp: validate_targets, false positive tests only. + validate_targets_fn: validate_targets, false negative tests only. diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py index 5f392543..42b5a012 100644 --- a/tests/test_validate_targets.py +++ b/tests/test_validate_targets.py @@ -58,6 +58,7 @@ def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> Qu @pytest.mark.online class Test_All_Targets: + @pytest.mark.validate_targets_fp def test_manifest_false_pos(self, chunked_sites: dict[str, dict[str, str]]): """Iterate through all sites in the manifest to discover possible false-positive inducting targets.""" pattern: str @@ -69,6 +70,7 @@ class Test_All_Targets: result: QueryStatus = false_positive_check(chunked_sites, site, pattern) assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}" + @pytest.mark.validate_targets_fn def test_manifest_false_neg(self, chunked_sites: dict[str, dict[str, str]]): """Iterate through all sites in the manifest to discover possible false-negative inducting targets.""" for site in chunked_sites: From ad76b3685fc261cbd10d4afa02ead69195d46456 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 14 Sep 2025 01:39:37 -0400 Subject: [PATCH 25/91] chore: simplify test names --- tests/test_validate_targets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py index 42b5a012..8f2874ac 100644 --- a/tests/test_validate_targets.py +++ b/tests/test_validate_targets.py @@ -59,7 +59,7 @@ def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> Qu class Test_All_Targets: @pytest.mark.validate_targets_fp - def test_manifest_false_pos(self, chunked_sites: dict[str, dict[str, str]]): + def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]): """Iterate through all sites in the manifest to discover possible false-positive inducting targets.""" pattern: str for site in chunked_sites: @@ -71,7 +71,7 @@ class Test_All_Targets: assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}" @pytest.mark.validate_targets_fn - def test_manifest_false_neg(self, chunked_sites: dict[str, dict[str, str]]): + def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]): """Iterate through all sites in the manifest to discover possible false-negative inducting targets.""" for site in chunked_sites: result: QueryStatus = false_negative_check(chunked_sites, site) From 87bd15f927fb523372eacae63805d06e3cffd0c4 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 09:42:47 +0100 Subject: [PATCH 26/91] style: remove useless empty line Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 1 - 1 file changed, 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 3c9157c1..373c6906 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1290,7 +1290,6 @@ }, "LinkedIn": { "errorType": "status_code", - "regexCheck": "^[a-zA-Z0-9]{3,100}$", "request_method": "GET", "url": "https://linkedin.com/in/{}", From 6b9305250d456ff6100457b09f4efe098c64288f Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 09:48:50 +0100 Subject: [PATCH 27/91] feat: add `framapiaf.org` in supported web sites Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 373c6906..c1001a5d 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -800,6 +800,13 @@ "urlMain": "https://fosstodon.org/", "username_claimed": "blue" }, + "Framapiaf": { + "errorType": "status_code", + "regexCheck": "^[a-zA-Z0-9_]{1,30}$", + "url": "https://framapiaf.org/@{}", + "urlMain": "https://framapiaf.org", + "username_claimed": "pylapp" + }, "Freelance.habr": { "errorMsg": "
", "errorType": "message", From 481c39ace32605d0bbe22a8252a555e20aaae390 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 09:55:31 +0100 Subject: [PATCH 28/91] feat: add `speakerdeck.com` in supported web sites Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index c1001a5d..8204c501 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1917,6 +1917,12 @@ "urlMain": "https://soylentnews.org", "username_claimed": "adam" }, + "SpeakerDeck": { + "errorType": "status_code", + "url": "https://speakerdeck.com/{}", + "urlMain": "https://speakerdeck.com/", + "username_claimed": "pylapp" + }, "Speedrun.com": { "errorType": "status_code", "url": "https://speedrun.com/users/{}", From 48ef668e1ebd6749b3453cf7bc97555e475ab35b Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 09:59:15 +0100 Subject: [PATCH 29/91] feat: add `write.as` in supported web sites (#2422) Closes #2422 Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 8204c501..76dfcf7a 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2766,6 +2766,12 @@ "urlMain": "https://uid.me/", "username_claimed": "blue" }, + "write.as": { + "errorType": "status_code", + "url": "https://write.as/{}", + "urlMain": "https://write.as", + "username_claimed": "pylapp" + }, "xHamster": { "errorType": "status_code", "isNSFW": true, From b9c352fb7c0d3be8c43d25f1f75076e56d51da1b Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 10:01:59 +0100 Subject: [PATCH 30/91] style: clean file by removing useless whitespace Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 76dfcf7a..10776e2b 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -614,7 +614,7 @@ "urlMain": "https://forums.digitalspy.com/", "username_claimed": "blue", "regexCheck": "^\\w{3,20}$" - }, + }, "Discogs": { "errorType": "status_code", "url": "https://www.discogs.com/user/{}", From 9b502d92454cfac569c14363c1d7a0e4e7072a6f Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 10:19:09 +0100 Subject: [PATCH 31/91] feat: new targets (9), minor cleanup Closes #2421 (added support for site) Closes #2422 (added support for site) Closes #2423 (added support for site) Closes #2424 (added support for site) Closes #2425 (added support for site) Closes #2426 (added support for site) Closes #2427 (added support for site) Closes #2429 (added support for site) Closes #2430 (added support for site) Signed-off-by: Pierre-Yves Lapersonne Singed-off-by: Paul Pfeister --- sherlock_project/resources/data.json | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 10776e2b..f7ed6846 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -806,7 +806,7 @@ "url": "https://framapiaf.org/@{}", "urlMain": "https://framapiaf.org", "username_claimed": "pylapp" - }, + }, "Freelance.habr": { "errorMsg": "
", "errorType": "message", @@ -1659,6 +1659,13 @@ "urlMain": "https://www.producthunt.com/", "username_claimed": "jenny" }, + "programming.dev": { + "errorMsg": "Error!", + "errorType": "message", + "url": "https://programming.dev/u/{}", + "urlMain": "https://programming.dev", + "username_claimed": "pylapp" + }, "Pychess": { "errorType": "message", "errorMsg": "404", @@ -2771,7 +2778,7 @@ "url": "https://write.as/{}", "urlMain": "https://write.as", "username_claimed": "pylapp" - }, + }, "xHamster": { "errorType": "status_code", "isNSFW": true, From 541b023b7f213cbb7b96b69b496c3a119e4c7779 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 10:22:41 +0100 Subject: [PATCH 32/91] feat: add `mamot.fr` in supported web sites (#2424) Closes #2424 Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index f7ed6846..6d4b1ec3 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1351,6 +1351,13 @@ "urlMain": "https://forums.mmorpg.com/", "username_claimed": "goku" }, + "Mamot": { + "errorType": "status_code", + "regexCheck": "^[a-zA-Z0-9_]{1,30}$", + "url": "https://mamot.fr/@{}", + "urlMain": "https://mamot.fr/", + "username_claimed": "anciensEnssat" + }, "Medium": { "errorMsg": " Date: Wed, 5 Mar 2025 10:24:29 +0100 Subject: [PATCH 33/91] feat: add `pixelfed.social` in supported web sites (#2425) Closes #2425 Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 6d4b1ec3..f79928bc 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1600,6 +1600,12 @@ "urlMain": "https://www.pinkbike.com/", "username_claimed": "blue" }, + "pixelfed.social": { + "errorType": "status_code", + "url": "https://pixelfed.social/{}/", + "urlMain": "https://pixelfed.social", + "username_claimed": "pylapp" + }, "PlayStore": { "errorType": "status_code", "url": "https://play.google.com/store/apps/developer?id={}", From 164d01d163af5a8731311ef6f912069dc8258e36 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 10:27:30 +0100 Subject: [PATCH 34/91] feat: add `linuxfr.org` in supported web sites (#2427) Closes #2427 Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index f79928bc..942d7dcc 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1311,6 +1311,12 @@ "urlMain": "https://linktr.ee/", "username_claimed": "anne" }, + "LinuxFR.org": { + "errorType": "status_code", + "url": "https://linuxfr.org/users/{}", + "urlMain": "https://linuxfr.org/", + "username_claimed": "pylapp" + }, "Listed": { "errorType": "response_url", "errorUrl": "https://listed.to/@{}", From a98a113a4bdfa199156ebb5695a442c89a4ea4ac Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 10:30:43 +0100 Subject: [PATCH 35/91] feat: add `opencollective.com` in supported web sites (#2430) Closes #2430 Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 942d7dcc..af1fc8e1 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1525,6 +1525,13 @@ "urlMain": "https://nyaa.si/", "username_claimed": "blue" }, + "Open Collective": { + "errorMsg": "Oops! Page not found", + "errorType": "message", + "url": "https://opencollective.com/{}", + "urlMain": "https://opencollective.com/", + "username_claimed": "pylapp" + }, "OpenStreetMap": { "errorType": "status_code", "regexCheck": "^[^.]*?$", From 58b20db9f14ba6793d4a40005e85b2711d3a4d3a Mon Sep 17 00:00:00 2001 From: Pierre-Yves Lapersonne Date: Wed, 5 Mar 2025 10:39:15 +0100 Subject: [PATCH 36/91] feat: add `outgress.com` in supported web sites (#2426) Closes #2426 Signed-off-by: Pierre-Yves Lapersonne --- sherlock_project/resources/data.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index af1fc8e1..89ab7b86 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1552,6 +1552,13 @@ "urlMain": "https://ourdjtalk.com/", "username_claimed": "steve" }, + "Outgress": { + "errorMsg": "Outgress - Error", + "errorType": "message", + "url": "https://outgress.com/agents/{}", + "urlMain": "https://outgress.com/", + "username_claimed": "pylapp" + }, "PCGamer": { "errorMsg": "The specified member cannot be found. Please enter a member's entire name.", "errorType": "message", From f0f37d841c601a2c03305181a42b29ef73311eb8 Mon Sep 17 00:00:00 2001 From: Moshi <64158093+Davis-3450@users.noreply.github.com> Date: Sun, 14 Sep 2025 16:02:08 -0500 Subject: [PATCH 37/91] bugfix: update platzi - "username_claimed" is now set to "freddier" (the most popular user, just in case) - error code and request method are now explicit. - added trailing slash for consistency --- sherlock_project/resources/data.json | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index a6873540..adec6493 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2740,8 +2740,10 @@ }, "Platzi": { "errorType": "status_code", - "url": "https://platzi.com/p/{}", + "errorCode": 404, + "url": "https://platzi.com/p/{}/", "urlMain": "https://platzi.com/", - "username_claimed": "juan" -} + "username_claimed": "freddier", + "request_method": "GET" + } } From e02507e5a1e0445cef607d22a5240cf736951af3 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 15 Sep 2025 20:31:26 -0400 Subject: [PATCH 38/91] test: set upper bound on f+ fuzz --- tests/test_validate_targets.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py index 8f2874ac..4eb7ea16 100644 --- a/tests/test_validate_targets.py +++ b/tests/test_validate_targets.py @@ -1,4 +1,5 @@ import pytest +import re import rstr from sherlock_project.sherlock import sherlock @@ -7,8 +8,23 @@ from sherlock_project.result import QueryResult, QueryStatus FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit +FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable) +FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry +def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str: + """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`.""" + def replace_upper_bound(match: re.Match) -> str: # type: ignore + lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore + upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823 + return f'{{{lower_bound},{upper_bound}}}' + + pattern = re.sub(r'(? QueryStatus: """Check if a site is likely to produce false positives.""" status: QueryStatus = QueryStatus.UNKNOWN @@ -66,7 +82,11 @@ class Test_All_Targets: try: pattern = chunked_sites[site]['regexCheck'] except KeyError: - pattern = r'^[a-zA-Z0-9._-]{7,20}$' + pattern = FALSE_POSITIVE_DEFAULT_PATTERN + + if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0: + pattern = set_pattern_upper_bound(pattern) + result: QueryStatus = false_positive_check(chunked_sites, site, pattern) assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}" From b15242881eaa30457357d231ef481e454674f5d8 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 15 Sep 2025 21:03:17 -0400 Subject: [PATCH 39/91] ci: automatically update exclusions list --- .github/workflows/exclusions.yml | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 .github/workflows/exclusions.yml diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml new file mode 100644 index 00000000..876f2e7d --- /dev/null +++ b/.github/workflows/exclusions.yml @@ -0,0 +1,64 @@ +name: Exclusions Updater + +on: + schedule: + #- cron: '0 5 * * 0' # Runs at 05:00 every Sunday + - cron: '0 5 * * *' # Runs at 05:00 every day + workflow_dispatch: + +jobs: + update-exclusions: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + + - name: Install Poetry + uses: abatilo/actions-poetry@v4 + with: + poetry-version: 'latest' + + - name: Install dependencies + run: | + poetry install --no-interaction --with dev + + - name: Run false positive tests + run: | + $(poetry env activate) + pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt + deactivate + + - name: Parse false positive detections by desired categories + id: parse_detections + run: | + grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \ + | sort -u > false_positive_exclusions.txt + grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \ + | sort -u > waf_hits.txt + + - name: Quantify and display results + run: | + FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs) + WAF_COUNT=$(wc -l < waf_hits.txt | xargs) + echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits." + echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt + echo ">>> WAF hits:" && cat waf_hits.txt + + - name: Commit and push exclusions list + if: steps.parse_detections.outputs.changed == 'true' || steps.parse_detections.outputs.changed == 'true' + run: | + git config user.name "Paul Pfeister (automation)" + git config user.email "code@pfeister.dev" + + git fetch origin exclusions || true # Allows creation of branch if deleted + git checkout -B exclusions origin/exclusions || git checkout --orphan exclusions + + git add false_positive_exclusions.txt + + git commit -m "auto: Update exclusions list" || echo "No changes to commit" + git push origin exclusions From 5c57b20936acf22fb5fd40e63edaa6b0b08064d2 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 15 Sep 2025 21:17:09 -0400 Subject: [PATCH 40/91] ci: fix exclusions updater --- .github/workflows/exclusions.yml | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml index 876f2e7d..337cc65b 100644 --- a/.github/workflows/exclusions.yml +++ b/.github/workflows/exclusions.yml @@ -34,13 +34,35 @@ jobs: deactivate - name: Parse false positive detections by desired categories - id: parse_detections run: | grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \ | sort -u > false_positive_exclusions.txt grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \ | sort -u > waf_hits.txt + - name: Detect if exclusions list changed + id: detect_changes + run: | + git fetch origin exclusions || true + + if git show origin/exclusions:exclusions.txt >/dev/null 2>&1; then + # If the exclusions branch and file exist, compare + if git diff --quiet origin/exclusions -- exclusions.txt; then + echo "changed=false" >> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + else + # If the exclusions branch or file do not exist, treat as changed + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + + if git diff --quiet false_positive_exclusions.txt; then + echo "exclusions_changed=false" >> $GITHUB_OUTPUT + else + echo "exclusions_changed=true" >> $GITHUB_OUTPUT + fi + - name: Quantify and display results run: | FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs) @@ -50,7 +72,7 @@ jobs: echo ">>> WAF hits:" && cat waf_hits.txt - name: Commit and push exclusions list - if: steps.parse_detections.outputs.changed == 'true' || steps.parse_detections.outputs.changed == 'true' + if: steps.detect_changes.outputs.exclusions_changed == 'true' run: | git config user.name "Paul Pfeister (automation)" git config user.email "code@pfeister.dev" From 69d3308c71f4e9531d983d48046db21d6ef12a66 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 15 Sep 2025 21:24:10 -0400 Subject: [PATCH 41/91] ci: fix exclusions updater --- .github/workflows/exclusions.yml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml index 337cc65b..b3d8c21e 100644 --- a/.github/workflows/exclusions.yml +++ b/.github/workflows/exclusions.yml @@ -48,19 +48,13 @@ jobs: if git show origin/exclusions:exclusions.txt >/dev/null 2>&1; then # If the exclusions branch and file exist, compare if git diff --quiet origin/exclusions -- exclusions.txt; then - echo "changed=false" >> "$GITHUB_OUTPUT" + echo "exclusions_changed=false" >> "$GITHUB_OUTPUT" else - echo "changed=true" >> "$GITHUB_OUTPUT" + echo "exclusions_changed=true" >> "$GITHUB_OUTPUT" fi else # If the exclusions branch or file do not exist, treat as changed - echo "changed=true" >> "$GITHUB_OUTPUT" - fi - - if git diff --quiet false_positive_exclusions.txt; then - echo "exclusions_changed=false" >> $GITHUB_OUTPUT - else - echo "exclusions_changed=true" >> $GITHUB_OUTPUT + echo "exclusions_changed=true" >> "$GITHUB_OUTPUT" fi - name: Quantify and display results From 6d15f1319e58889ab1d458763abda1be37a35c30 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 15 Sep 2025 21:29:20 -0400 Subject: [PATCH 42/91] ci: fix exclusions updater (again) --- .github/workflows/exclusions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml index b3d8c21e..dd834167 100644 --- a/.github/workflows/exclusions.yml +++ b/.github/workflows/exclusions.yml @@ -72,7 +72,7 @@ jobs: git config user.email "code@pfeister.dev" git fetch origin exclusions || true # Allows creation of branch if deleted - git checkout -B exclusions origin/exclusions || git checkout --orphan exclusions + git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .) git add false_positive_exclusions.txt From 888231045082ee9b627b6a7a51668255b56c2b5c Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 15 Sep 2025 21:56:54 -0400 Subject: [PATCH 43/91] feat: honor automatic exclusions list --- .github/workflows/exclusions.yml | 2 +- sherlock_project/sherlock.py | 17 +++++++++++-- sherlock_project/sites.py | 41 ++++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml index dd834167..a4bc0449 100644 --- a/.github/workflows/exclusions.yml +++ b/.github/workflows/exclusions.yml @@ -76,5 +76,5 @@ jobs: git add false_positive_exclusions.txt - git commit -m "auto: Update exclusions list" || echo "No changes to commit" + git commit -m "auto: update exclusions list" || echo "No changes to commit" git push origin exclusions diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index e3786c90..250175a5 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -727,6 +727,14 @@ def main(): help="Disable creation of a txt file", ) + parser.add_argument( + "--ignore-exclusions", + action="store_true", + dest="ignore_exclusions", + default=False, + help="Ignore upstream exclusions (may return more false positives)", + ) + args = parser.parse_args() # If the user presses CTRL-C, exit gracefully without throwing errors @@ -784,7 +792,8 @@ def main(): try: if args.local: sites = SitesInformation( - os.path.join(os.path.dirname(__file__), "resources/data.json") + os.path.join(os.path.dirname(__file__), "resources/data.json"), + honor_exclusions=False, ) else: json_file_location = args.json_file @@ -804,7 +813,11 @@ def main(): head_commit_sha = pull_request_json["head"]["sha"] json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json" - sites = SitesInformation(json_file_location) + sites = SitesInformation( + data_file_path=json_file_location, + honor_exclusions=not args.ignore_exclusions, + do_not_exclude=args.site_list, + ) except Exception as error: print(f"ERROR: {error}") sys.exit(1) diff --git a/sherlock_project/sites.py b/sherlock_project/sites.py index 847d1576..2ba811d7 100644 --- a/sherlock_project/sites.py +++ b/sherlock_project/sites.py @@ -7,6 +7,10 @@ import json import requests import secrets + +MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json" +EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt" + class SiteInformation: def __init__(self, name, url_home, url_username_format, username_claimed, information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)): @@ -67,12 +71,17 @@ class SiteInformation: Return Value: Nicely formatted string to get information about this object. """ - + return f"{self.name} ({self.url_home})" class SitesInformation: - def __init__(self, data_file_path=None): + def __init__( + self, + data_file_path: str|None = None, + honor_exclusions: bool = True, + do_not_exclude: list[str] = [], + ): """Create Sites Information Object. Contains information about all supported websites. @@ -110,7 +119,7 @@ class SitesInformation: # The default data file is the live data.json which is in the GitHub repo. The reason why we are using # this instead of the local one is so that the user has the most up-to-date data. This prevents # users from creating issue about false positives which has already been fixed or having outdated data - data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json" + data_file_path = MANIFEST_URL # Ensure that specified data file has correct extension. if not data_file_path.lower().endswith(".json"): @@ -152,9 +161,31 @@ class SitesInformation: raise FileNotFoundError(f"Problem while attempting to access " f"data file '{data_file_path}'." ) - + site_data.pop('$schema', None) + if honor_exclusions: + try: + response = requests.get(url=EXCLUSIONS_URL) + if response.status_code == 200: + exclusions = response.text.splitlines() + exclusions = [exclusion.strip() for exclusion in exclusions] + + for site in do_not_exclude: + if site in exclusions: + exclusions.remove(site) + + for exclusion in exclusions: + try: + site_data.pop(exclusion, None) + except KeyError: + pass + + except Exception: + # If there was any problem loading the exclusions, just continue without them + print("Warning: Could not load exclusions, continuing without them.") + honor_exclusions = False + self.sites = {} # Add all site information from the json file to internal site list. @@ -194,7 +225,7 @@ class SitesInformation: for site in self.sites: if self.sites[site].is_nsfw and site.casefold() not in do_not_remove: continue - sites[site] = self.sites[site] + sites[site] = self.sites[site] self.sites = sites def site_name_list(self): From 524415b5d535409a1988233ec5c0b4f6bdda18df Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 15 Sep 2025 22:03:23 -0400 Subject: [PATCH 44/91] chore: bump to 0.16.0 --- Dockerfile | 2 +- sherlock_project/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2e13f679..361530ab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py # 2. Update the VCS_REF tag to match the tagged version's FULL commit hash # 3. Build image with BOTH latest and version tags - # i.e. `docker build -t sherlock/sherlock:0.15.0 -t sherlock/sherlock:latest .` + # i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .` FROM python:3.12-slim-bullseye as build WORKDIR /sherlock diff --git a/sherlock_project/__init__.py b/sherlock_project/__init__.py index 52307cd7..382d7cd0 100644 --- a/sherlock_project/__init__.py +++ b/sherlock_project/__init__.py @@ -10,6 +10,6 @@ import_error_test_var = None __shortname__ = "Sherlock" __longname__ = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.15.0" +__version__ = "0.16.0" forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" From 35940e7584a91cec015e4281f24bdeec7fe8cddc Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Wed, 17 Sep 2025 13:44:02 -0400 Subject: [PATCH 45/91] fix: ignore exclusions list on parameterization for false positive tests --- tests/conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 75aa25e0..ca00ae8b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,8 +4,8 @@ import urllib import pytest from sherlock_project.sites import SitesInformation -def fetch_local_manifest() -> dict[str, dict[str, str]]: - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) +def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]: + sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions) sites_iterable = {site.name: site.information for site in sites_obj} return sites_iterable @@ -27,7 +27,7 @@ def remote_schema(): def pytest_generate_tests(metafunc): if "chunked_sites" in metafunc.fixturenames: - sites_info = fetch_local_manifest() + sites_info = fetch_local_manifest(honor_exclusions=False) params = [{name: data} for name, data in sites_info.items()] ids = list(sites_info.keys()) metafunc.parametrize("chunked_sites", params, ids=ids) From 78a2d309d10731c53b185c806991ff10e161db51 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Wed, 17 Sep 2025 13:55:42 -0400 Subject: [PATCH 46/91] ci(exclusions): fix loss of untracked list --- .github/workflows/exclusions.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml index a4bc0449..a5faf63a 100644 --- a/.github/workflows/exclusions.yml +++ b/.github/workflows/exclusions.yml @@ -45,9 +45,9 @@ jobs: run: | git fetch origin exclusions || true - if git show origin/exclusions:exclusions.txt >/dev/null 2>&1; then + if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then # If the exclusions branch and file exist, compare - if git diff --quiet origin/exclusions -- exclusions.txt; then + if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then echo "exclusions_changed=false" >> "$GITHUB_OUTPUT" else echo "exclusions_changed=true" >> "$GITHUB_OUTPUT" @@ -71,10 +71,12 @@ jobs: git config user.name "Paul Pfeister (automation)" git config user.email "code@pfeister.dev" + git stash push -m "stash false positive exclusion list" --include-untracked -- false_positive_exclusions.txt + git fetch origin exclusions || true # Allows creation of branch if deleted git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .) - git add false_positive_exclusions.txt + git stash pop || true git commit -m "auto: update exclusions list" || echo "No changes to commit" git push origin exclusions From faddcbd15f0812afbea940223d2f7b6a487d0945 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Wed, 17 Sep 2025 14:03:51 -0400 Subject: [PATCH 47/91] ci(exclusions): fix loss of untracked list --- .github/workflows/exclusions.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml index a5faf63a..516aa67b 100644 --- a/.github/workflows/exclusions.yml +++ b/.github/workflows/exclusions.yml @@ -71,7 +71,8 @@ jobs: git config user.name "Paul Pfeister (automation)" git config user.email "code@pfeister.dev" - git stash push -m "stash false positive exclusion list" --include-untracked -- false_positive_exclusions.txt + git add -f false_positive_exclusions.txt # -f required to override .gitignore + git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt git fetch origin exclusions || true # Allows creation of branch if deleted git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .) From 39c3729524b100b7ef02375633ab58e4e10d8b3d Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Wed, 17 Sep 2025 14:09:15 -0400 Subject: [PATCH 48/91] ci(exclusions): fix loss of untracked list --- .github/workflows/exclusions.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml index 516aa67b..95b6180f 100644 --- a/.github/workflows/exclusions.yml +++ b/.github/workflows/exclusions.yml @@ -71,13 +71,18 @@ jobs: git config user.name "Paul Pfeister (automation)" git config user.email "code@pfeister.dev" - git add -f false_positive_exclusions.txt # -f required to override .gitignore - git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt + mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp + + git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore + git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp git fetch origin exclusions || true # Allows creation of branch if deleted git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .) git stash pop || true + mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt + + git add false_positive_exclusions.txt git commit -m "auto: update exclusions list" || echo "No changes to commit" git push origin exclusions From 7ff3924f0b5d82b74926ee63acb5a995007aaa38 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Wed, 17 Sep 2025 17:17:49 -0400 Subject: [PATCH 49/91] ci(exclusions): ensure unstaging and removal of tmp --- .github/workflows/exclusions.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/exclusions.yml b/.github/workflows/exclusions.yml index 95b6180f..6617ad67 100644 --- a/.github/workflows/exclusions.yml +++ b/.github/workflows/exclusions.yml @@ -83,6 +83,7 @@ jobs: mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt + git rm -f false_positive_exclusions.txt.tmp || true git add false_positive_exclusions.txt git commit -m "auto: update exclusions list" || echo "No changes to commit" git push origin exclusions From b44ac231c1b18263a26e865303c7864eff33c515 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Wed, 17 Sep 2025 17:47:45 -0400 Subject: [PATCH 50/91] chore: move SSOT to pyproject.toml Co-authored-by: ByteXenon <125568681+ByteXenon@users.noreply.github.com> --- pyproject.toml | 4 ++-- sherlock_project/__init__.py | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 76a6fab3..b1ca18d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,8 +8,7 @@ source = "init" [tool.poetry] name = "sherlock-project" -# single source of truth for version is __init__.py -version = "0" +version = "0.16.0" description = "Hunt down social media accounts by username across social networks" license = "MIT" authors = [ @@ -50,6 +49,7 @@ stem = "^1.8.0" torrequest = "^0.1.0" pandas = "^2.2.1" openpyxl = "^3.0.10" +tomli = "^2.2.1" [tool.poetry.extras] tor = ["torrequest"] diff --git a/sherlock_project/__init__.py b/sherlock_project/__init__.py index 382d7cd0..ad6c9e30 100644 --- a/sherlock_project/__init__.py +++ b/sherlock_project/__init__.py @@ -5,11 +5,26 @@ networks. """ +from importlib.metadata import version as pkg_version, PackageNotFoundError +import pathlib +import tomli + + +def get_version() -> str: + """Fetch the version number of the installed package.""" + try: + return pkg_version("sherlock_project") + except PackageNotFoundError: + pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml" + with pyproject_path.open("rb") as f: + pyproject_data = tomli.load(f) + return pyproject_data["tool"]["poetry"]["version"] + # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package import_error_test_var = None __shortname__ = "Sherlock" __longname__ = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.16.0" +__version__ = get_version() forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" From 9560355a7c91056b40dae838ef583f28bc6056af Mon Sep 17 00:00:00 2001 From: My Name Date: Thu, 18 Sep 2025 10:23:09 +0400 Subject: [PATCH 51/91] Added Laracast support --- sherlock_project/resources/data.json | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index e88f8dbc..a7a75178 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1254,6 +1254,13 @@ "urlMain": "https://linux.org.ru/", "username_claimed": "red" }, + "Laracast": { + "errorType":"status_code", + "url": "https://laracasts.com/@{}", + "urlMain": "https://laracasts.com/", + "regexCheck": "^[a-zA-Z0-9_-]{3,}$", + "username_claimed": "user1" + }, "Launchpad": { "errorType": "status_code", "url": "https://launchpad.net/~{}", @@ -1558,7 +1565,7 @@ "url": "https://outgress.com/agents/{}", "urlMain": "https://outgress.com/", "username_claimed": "pylapp" - }, + }, "PCGamer": { "errorMsg": "The specified member cannot be found. Please enter a member's entire name.", "errorType": "message", From 435540606e950cad028182d209238b622f87b4a3 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 13:49:29 -0400 Subject: [PATCH 52/91] chore: add typedef --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index ca00ae8b..a767c07c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,7 @@ from sherlock_project.sites import SitesInformation def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]: sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions) - sites_iterable = {site.name: site.information for site in sites_obj} + sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj} return sites_iterable @pytest.fixture() From ae362b0f0267f81ca9833669501e0e7387fdaa86 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 14:44:19 -0400 Subject: [PATCH 53/91] ci: automatically validate modified targets on pr --- .../workflows/validate_modified_targets.yml | 64 +++++++++++++++++++ sherlock_project/resources/data.json | 2 +- tests/conftest.py | 18 ++++++ 3 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/validate_modified_targets.yml diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml new file mode 100644 index 00000000..bdb95636 --- /dev/null +++ b/.github/workflows/validate_modified_targets.yml @@ -0,0 +1,64 @@ +name: Modified Target Validation + +on: + pull_request: + branches: + - master + paths: + - "sherlock_project/resources/data.json" + +jobs: + validate-modified-targets: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + + - name: Install Poetry + uses: abatilo/actions-poetry@v4 + with: + poetry-version: 'latest' + + - name: Install dependencies + run: | + poetry install --no-interaction --with dev + + - name: Discover modified targets + id: discover-modified + run: | + # Fetch the upstream branch + git fetch origin ${{ github.base_ref }} --depth=1 + + # Discover changes + git show origin/${{ github.base_ref }}:sherlock_project/resources/data.json > data.json.base + CHANGED=$( + jq --slurpfile base data.json.base --slurpfile head sherlock_project/resources/data.json ' + [ + ($head[0] | keys_unsorted[]) as $key + | select(($base[0][$key] != $head[0][$key]) or ($base[0][$key] | not)) + | $key + ] | unique | join(",")' + ) + + # Preserve changelist + echo ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')" + echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT" + + - name: Validate modified targets + if: steps.discover-modified.outputs.changed_targets != '' + run: | + $(poetry env activate) + pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" + deactivate + + - name: Announce skip if no modified targets + if: steps.discover-modified.outputs.changed_targets == '' + run: | + echo "No modified targets found" diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index e88f8dbc..391c71ee 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1558,7 +1558,7 @@ "url": "https://outgress.com/agents/{}", "urlMain": "https://outgress.com/", "username_claimed": "pylapp" - }, + }, "PCGamer": { "errorMsg": "The specified member cannot be found. Please enter a member's entire name.", "errorType": "message", diff --git a/tests/conftest.py b/tests/conftest.py index a767c07c..69fce756 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,9 +25,27 @@ def remote_schema(): schemadat = json.load(remoteschema) yield schemadat +def pytest_addoption(parser): + parser.addoption( + "--chunked-sites", + action="store", + default=None, + help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.", + ) + def pytest_generate_tests(metafunc): if "chunked_sites" in metafunc.fixturenames: sites_info = fetch_local_manifest(honor_exclusions=False) + + # Ingest and apply site selections + site_filter: str | None = metafunc.config.getoption("--chunked-sites") + if site_filter: + selected_sites: list[str] = [site.strip() for site in site_filter.split(",")] + sites_info = { + site: data for site, data in sites_info.items() + if site in selected_sites + } + params = [{name: data} for name, data in sites_info.items()] ids = list(sites_info.keys()) metafunc.parametrize("chunked_sites", params, ids=ids) From 9f5b7e1846e5610616791c2174f2003a3555d41b Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 15:02:43 -0400 Subject: [PATCH 54/91] fix(validation ci): parsing and presentation --- .github/workflows/validate_modified_targets.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index bdb95636..266c5108 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -39,7 +39,7 @@ jobs: # Discover changes git show origin/${{ github.base_ref }}:sherlock_project/resources/data.json > data.json.base CHANGED=$( - jq --slurpfile base data.json.base --slurpfile head sherlock_project/resources/data.json ' + jq -r --slurpfile base data.json.base --slurpfile head sherlock_project/resources/data.json ' [ ($head[0] | keys_unsorted[]) as $key | select(($base[0][$key] != $head[0][$key]) or ($base[0][$key] | not)) @@ -48,7 +48,7 @@ jobs: ) # Preserve changelist - echo ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')" + echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')" echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT" - name: Validate modified targets From 9882478fb5cf84e78d8e416dcf56f9c44964fa6f Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 14:56:44 -0400 Subject: [PATCH 55/91] feat: add instapaper --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index a7a75178..386bb36c 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1142,6 +1142,12 @@ "urlProbe": "https://imginn.com/{}", "username_claimed": "instagram" }, + "Instapaper": { + "errorType": "status_code", + "url": "https://www.instapaper.com/p/{}", + "urlMain": "https://www.instapaper.com/", + "username_claimed": "john" + }, "Instructables": { "errorType": "status_code", "url": "https://www.instructables.com/member/{}", From 97ba4e861601482b3811a222fc3d916a9db557a8 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 15:39:01 -0400 Subject: [PATCH 56/91] fix(ci): validation issue --- .../workflows/validate_modified_targets.yml | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 266c5108..4a263122 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -14,6 +14,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v5 with: + ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 - name: Set up Python @@ -38,13 +39,21 @@ jobs: # Discover changes git show origin/${{ github.base_ref }}:sherlock_project/resources/data.json > data.json.base + cp sherlock_project/resources/data.json data.json.head + CHANGED=$( - jq -r --slurpfile base data.json.base --slurpfile head sherlock_project/resources/data.json ' - [ - ($head[0] | keys_unsorted[]) as $key - | select(($base[0][$key] != $head[0][$key]) or ($base[0][$key] | not)) - | $key - ] | unique | join(",")' + python - <<'EOF' + import json + with open("data.json.base") as f: base = json.load(f) + with open("data.json.head") as f: head = json.load(f) + + changed = [] + for k, v in head.items(): + if k not in base or base[k] != v: + changed.append(k) + + print(",".join(sorted(changed))) + EOF ) # Preserve changelist From fa0564166134257c611a75dd5a64fc9e92fb88b7 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 17:43:00 -0400 Subject: [PATCH 57/91] ci: improve validation --- .../workflows/validate_modified_targets.yml | 30 +++++++++++++++++-- pyproject.toml | 4 +++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 4a263122..44a6fdbd 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -62,12 +62,38 @@ jobs: - name: Validate modified targets if: steps.discover-modified.outputs.changed_targets != '' + continue-on-error: true run: | $(poetry env activate) - pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" + pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" --junitxml=validation_results.xml deactivate - - name: Announce skip if no modified targets + - name: Prepare validation summary + if: steps.discover-modified.outputs.changed_targets != '' + id: prepare-summary + run: | + $(poetry env activate) + summary=$( + python devel/summarize_site_validation.py validation_results.xml > summary.md || echo "Failed to generate summary of test results" + ) + deactivate + echo "summary<> $GITHUB_OUTPUT + echo "$summary" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Announce validation results + if: steps.discover-modified.outputs.changed_targets != '' + uses: actions/github-script@v8 + with: + script: | + github.rest.issues.createComment({ + issue_number: context.payload.pull_request.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `${{ steps.prepare-summary.outputs.summary }}` + }); + + - name: This step shows as ran when no modifications are found if: steps.discover-modified.outputs.changed_targets == '' run: | echo "No modified targets found" diff --git a/pyproject.toml b/pyproject.toml index b1ca18d7..1d66dac6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,5 +60,9 @@ rstr = "^3.2.2" pytest = "^8.4.2" pytest-xdist = "^3.8.0" + +[tool.poetry.group.ci.dependencies] +defusedxml = "^0.7.1" + [tool.poetry.scripts] sherlock = 'sherlock_project.sherlock:main' From cd6fa5bb305429a370ad6977bb062a3f5005c365 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 18:04:42 -0400 Subject: [PATCH 58/91] ci: fix the thing --- .github/workflows/validate_modified_targets.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 44a6fdbd..99844216 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -73,24 +73,26 @@ jobs: id: prepare-summary run: | $(poetry env activate) + python devel/summarize_site_validation.py validation_results.xml summary=$( - python devel/summarize_site_validation.py validation_results.xml > summary.md || echo "Failed to generate summary of test results" + python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results" ) deactivate - echo "summary<> $GITHUB_OUTPUT - echo "$summary" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT + echo "$summary" > validation_summary.md + cat validation_summary.md - name: Announce validation results if: steps.discover-modified.outputs.changed_targets != '' uses: actions/github-script@v8 with: script: | + const fs = require('fs'); + const body = fs.readFileSync('validation_summary.md', 'utf8'); github.rest.issues.createComment({ issue_number: context.payload.pull_request.number, owner: context.repo.owner, repo: context.repo.repo, - body: `${{ steps.prepare-summary.outputs.summary }}` + body: body, }); - name: This step shows as ran when no modifications are found From 7ca90ba72824baf21fbd3775cc68b010550f444f Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 18:06:25 -0400 Subject: [PATCH 59/91] ci: test result summarization --- devel/summarize_site_validation.py | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 devel/summarize_site_validation.py diff --git a/devel/summarize_site_validation.py b/devel/summarize_site_validation.py new file mode 100644 index 00000000..91a23e36 --- /dev/null +++ b/devel/summarize_site_validation.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# This module summarizes the results of site validation tests queued by +# workflow validate_modified_targets for presentation in Issue comments. + +from defusedxml import ElementTree as ET +import sys +from pathlib import Path + +def summarize_junit_xml(xml_path: Path) -> str: + tree = ET.parse(xml_path) + root = tree.getroot() + suite = root.find('testsuite') + + pass_message: str = ":heavy_check_mark:   Pass" + fail_message: str = ":x:   Fail" + + if suite is None: + raise ValueError("Invalid JUnit XML: No testsuite found") + + summary_lines: list[str] = [] + summary_lines.append("#### Automatic validation of changes\n") + summary_lines.append("| | F- Check | F+ Check |") + summary_lines.append("|---|---|---|") + + failures = int(suite.get('failures', 0)) + errors_detected: bool = False + + results: dict[str, dict[str, str]] = {} + + for testcase in suite.findall('testcase'): + test_name = testcase.get('name').split('[')[0] + site_name = testcase.get('name').split('[')[1].rstrip(']') + failure = testcase.find('failure') + error = testcase.find('error') + + if site_name not in results: + results[site_name] = {} + + if test_name == "test_false_neg": + results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message + elif test_name == "test_false_pos": + results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message + + if error is not None: + errors_detected = True + + for result in results: + summary_lines.append(f"| {result} | {results[result].get('F- Check', 'Error!')} | {results[result].get('F+ Check', 'Error!')} |") + + if failures > 0: + summary_lines.append("\n___\n" + + "\nFailures were detected on at least one updated target. Commits containing accuracy failures" + + " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).") + + if errors_detected: + summary_lines.append("\n___\n" + + "\n**Errors were detected during validation. Please review the workflow logs.**") + + return "\n".join(summary_lines) + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: summarize_site_validation.py ") + sys.exit(1) + + xml_path: Path = Path(sys.argv[1]) + if not xml_path.is_file(): + print(f"Error: File '{xml_path}' does not exist.") + sys.exit(1) + + summary: str = summarize_junit_xml(xml_path) + print(summary) From 0fa2e1afc78508474b428d9b918f3001c74bada9 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 18:09:44 -0400 Subject: [PATCH 60/91] chore: cleanup everything --- .github/workflows/validate_modified_targets.yml | 2 -- devel/summarize_site_validation.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 99844216..a98483cc 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -73,13 +73,11 @@ jobs: id: prepare-summary run: | $(poetry env activate) - python devel/summarize_site_validation.py validation_results.xml summary=$( python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results" ) deactivate echo "$summary" > validation_summary.md - cat validation_summary.md - name: Announce validation results if: steps.discover-modified.outputs.changed_targets != '' diff --git a/devel/summarize_site_validation.py b/devel/summarize_site_validation.py index 91a23e36..89d39750 100644 --- a/devel/summarize_site_validation.py +++ b/devel/summarize_site_validation.py @@ -19,7 +19,7 @@ def summarize_junit_xml(xml_path: Path) -> str: summary_lines: list[str] = [] summary_lines.append("#### Automatic validation of changes\n") - summary_lines.append("| | F- Check | F+ Check |") + summary_lines.append("| Target | F+ Check | F- Check |") summary_lines.append("|---|---|---|") failures = int(suite.get('failures', 0)) @@ -45,7 +45,7 @@ def summarize_junit_xml(xml_path: Path) -> str: errors_detected = True for result in results: - summary_lines.append(f"| {result} | {results[result].get('F- Check', 'Error!')} | {results[result].get('F+ Check', 'Error!')} |") + summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |") if failures > 0: summary_lines.append("\n___\n" + From dc61cdc7a4de06fbaa21347f5bc733a395c85695 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 18:10:33 -0400 Subject: [PATCH 61/91] chore: set request method --- sherlock_project/resources/data.json | 1 + 1 file changed, 1 insertion(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 386bb36c..db013b75 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1144,6 +1144,7 @@ }, "Instapaper": { "errorType": "status_code", + "request_method": "GET", "url": "https://www.instapaper.com/p/{}", "urlMain": "https://www.instapaper.com/", "username_claimed": "john" From d4d8e01e31e60ae7e8ca0dea7b9c4923025fd5c1 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 19:45:34 -0400 Subject: [PATCH 62/91] chore: remove dead site Fixes: #2433 --- sherlock_project/resources/data.json | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index db013b75..f5c3b83f 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -807,14 +807,6 @@ "urlMain": "https://framapiaf.org", "username_claimed": "pylapp" }, - "Freelance.habr": { - "errorMsg": "
", - "errorType": "message", - "regexCheck": "^((?!\\.).)*$", - "url": "https://freelance.habr.com/freelancers/{}", - "urlMain": "https://freelance.habr.com/", - "username_claimed": "adam" - }, "Freelancer": { "errorMsg": "\"users\":{}", "errorType": "message", From 7afdee4c58a789f628b19da7c7553f0ab85c9096 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 20:09:44 -0400 Subject: [PATCH 63/91] fix: incorrect method --- sherlock_project/resources/data.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 071dd88e..78d04606 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -123,8 +123,10 @@ }, "Aparat": { "errorType": "status_code", + "request_method": "GET", "url": "https://www.aparat.com/{}/", "urlMain": "https://www.aparat.com/", + "urlProbe": "https://www.aparat.com/api/fa/v1/user/user/information/username/{}", "username_claimed": "jadi" }, "Archive of Our Own": { From 9b3dc3e58143a0d559ace96ece251aa3deea2f19 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 20:21:28 -0400 Subject: [PATCH 64/91] fix(ci): issue write permission --- .github/workflows/validate_modified_targets.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index a98483cc..a10c554a 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -10,6 +10,8 @@ on: jobs: validate-modified-targets: runs-on: ubuntu-latest + permissions: + issues: write steps: - name: Checkout repository uses: actions/checkout@v5 From 0a38cad926412eded5d3bd4544a347ce9f87309b Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 20:24:41 -0400 Subject: [PATCH 65/91] fix(ci): issue write permission --- .github/workflows/validate_modified_targets.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index a10c554a..fe6996d1 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -85,6 +85,7 @@ jobs: if: steps.discover-modified.outputs.changed_targets != '' uses: actions/github-script@v8 with: + github-token: ${{ secrets.GITHUB_TOKEN }} script: | const fs = require('fs'); const body = fs.readFileSync('validation_summary.md', 'utf8'); From df7da4288ce4f08129754d291f1a9b7596c263b9 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 20 Sep 2025 20:44:38 -0400 Subject: [PATCH 66/91] fix(ci): scoping --- .../workflows/validate_modified_targets.yml | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index fe6996d1..de024090 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -1,7 +1,7 @@ name: Modified Target Validation on: - pull_request: + pull_request_target: branches: - master paths: @@ -11,13 +11,14 @@ jobs: validate-modified-targets: runs-on: ubuntu-latest permissions: - issues: write + contents: read + pull-requests: write steps: - name: Checkout repository uses: actions/checkout@v5 with: - ref: ${{ github.event.pull_request.head.sha }} - fetch-depth: 0 + ref: ${{ github.base_ref }} + fetch-depth: 1 - name: Set up Python uses: actions/setup-python@v6 @@ -33,16 +34,16 @@ jobs: run: | poetry install --no-interaction --with dev + - name: Drop in place updated manifest from base + run: | + cp sherlock_project/resources/data.json data.json.base + git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1 + git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json + cp sherlock_project/resources/data.json data.json.head + - name: Discover modified targets id: discover-modified run: | - # Fetch the upstream branch - git fetch origin ${{ github.base_ref }} --depth=1 - - # Discover changes - git show origin/${{ github.base_ref }}:sherlock_project/resources/data.json > data.json.base - cp sherlock_project/resources/data.json data.json.head - CHANGED=$( python - <<'EOF' import json @@ -66,30 +67,27 @@ jobs: if: steps.discover-modified.outputs.changed_targets != '' continue-on-error: true run: | - $(poetry env activate) - pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" --junitxml=validation_results.xml - deactivate + poetry run pytest -q --tb no -rA -m validate_targets -n 20 \ + --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \ + --junitxml=validation_results.xml - name: Prepare validation summary if: steps.discover-modified.outputs.changed_targets != '' id: prepare-summary run: | - $(poetry env activate) summary=$( - python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results" + poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results" ) - deactivate echo "$summary" > validation_summary.md - name: Announce validation results if: steps.discover-modified.outputs.changed_targets != '' uses: actions/github-script@v8 with: - github-token: ${{ secrets.GITHUB_TOKEN }} script: | const fs = require('fs'); const body = fs.readFileSync('validation_summary.md', 'utf8'); - github.rest.issues.createComment({ + await github.rest.issues.createComment({ issue_number: context.payload.pull_request.number, owner: context.repo.owner, repo: context.repo.repo, From dc89f1cd27a358a6771877cc0b597b3db822c06c Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Wed, 1 Oct 2025 00:41:23 +0900 Subject: [PATCH 67/91] feat: Add dcinside --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac52..ff8af075 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -600,6 +600,12 @@ "urlMain": "https://www.dailymotion.com/", "username_claimed": "blue" }, + "dcinside": { + "errorType": "status_code", + "url": "https://gallog.dcinside.com/{}", + "urlMain": "https://www.dcinside.com/", + "username_claimed": "anrbrb" + }, "Dealabs": { "errorMsg": "La page que vous essayez", "errorType": "message", From e5cd5e5bfe7df4ebd93e220a69496a4fdfe7b39f Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Wed, 1 Oct 2025 00:43:21 +0900 Subject: [PATCH 68/91] feat: Add namuwiki --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index ff8af075..2b5dbf6b 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1465,6 +1465,12 @@ "urlMain": "https://www.native-instruments.com/forum/", "username_claimed": "jambert" }, + "namuwiki": { + "errorType": "status_code", + "url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}", + "urlMain": "https://namu.wiki/", + "username_claimed": "namu" + }, "NationStates Nation": { "errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!", "errorType": "message", From 86140af50e6a2aae642ff38b1cab365a980fa283 Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Wed, 1 Oct 2025 00:44:02 +0900 Subject: [PATCH 69/91] feat: Add SOOP --- sherlock_project/resources/data.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 2b5dbf6b..eaf3e670 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1964,6 +1964,13 @@ "urlMain": "https://www.snapchat.com", "username_claimed": "teamsnapchat" }, + "SOOP": { + "errorType": "status_code", + "url": "https://www.sooplive.co.kr/station/{}", + "urlMain": "https://www.sooplive.co.kr/", + "urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station", + "username_claimed": "udkn" + }, "SoundCloud": { "errorType": "status_code", "url": "https://soundcloud.com/{}", From cd7c52e4fae2dc81bc3fd75d098498e430d8bec9 Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Wed, 1 Oct 2025 00:44:55 +0900 Subject: [PATCH 70/91] Feat: Add tistory --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index eaf3e670..c4efcbe3 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2138,6 +2138,12 @@ "urlMain": "https://themeforest.net/", "username_claimed": "user" }, + "tistory": { + "errorType": "status_code", + "url": "https://{}.tistory.com/", + "urlMain": "https://www.tistory.com/", + "username_claimed": "notice" + }, "TnAFlix": { "errorType": "status_code", "isNSFW": true, From 7b3632bdadd4eba3473a1c0a728df522631d4654 Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Fri, 3 Oct 2025 04:00:41 +0900 Subject: [PATCH 71/91] Add comment to site 'namuwiki' Co-authored-by: Paul Pfeister --- sherlock_project/resources/data.json | 1 + 1 file changed, 1 insertion(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index c4efcbe3..f019000f 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1466,6 +1466,7 @@ "username_claimed": "jambert" }, "namuwiki": { + "__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.", "errorType": "status_code", "url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}", "urlMain": "https://namu.wiki/", From 355bfbd328c31144983904a65e6ad3aa8c003d9c Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 00:42:07 +0530 Subject: [PATCH 72/91] fix(sites): Remediate false positive for DeviantArt --- sherlock_project/resources/data.json | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac52..9738699b 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -608,13 +608,15 @@ "urlMain": "https://www.dealabs.com/", "username_claimed": "blue" }, - "DeviantART": { - "errorType": "status_code", - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://{}.deviantart.com", - "urlMain": "https://deviantart.com", - "username_claimed": "blue" - }, + "DeviantArt": { + "errorType": "message", + "errorMsg": "Llama Not Found", + "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", + "url": "https://www.deviantart.com/{}", + "urlMain": "https://www.deviantart.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis" +}, "DigitalSpy": { "errorMsg": "The page you were looking for could not be found.", "errorType": "message", From b811b2bd47f0b45ac1cdffa9518470fff91a253e Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 2 Oct 2025 18:21:20 -0400 Subject: [PATCH 73/91] chore: update code owners --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0f2eadf2..b9af7fda 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,5 +1,5 @@ ### REPOSITORY -/.github/CODEOWNERS @sdushantha +/.github/CODEOWNERS @sdushantha @ppfeister /.github/FUNDING.yml @sdushantha /LICENSE @sdushantha From 779d4c33f4a88421a443695931d7041e55a51c7e Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 03:55:03 +0530 Subject: [PATCH 74/91] fix: Remove username_unclaimed as requested --- sherlock_project/resources/data.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 9738699b..dc422754 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -608,14 +608,13 @@ "urlMain": "https://www.dealabs.com/", "username_claimed": "blue" }, - "DeviantArt": { + "DeviantArt": { "errorType": "message", "errorMsg": "Llama Not Found", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", "url": "https://www.deviantart.com/{}", "urlMain": "https://www.deviantart.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis" + "username_claimed": "blue" }, "DigitalSpy": { "errorMsg": "The page you were looking for could not be found.", From c89a52caf7f55d36265866ffc2c9d390957a7734 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 04:25:46 +0530 Subject: [PATCH 75/91] fix(sites): Remediate false positive for AllMyLinks --- sherlock_project/resources/data.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac52..091e2e9a 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -79,13 +79,13 @@ "username_claimed": "pink" }, "AllMyLinks": { - "errorMsg": "Not Found", - "errorType": "message", - "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$", - "url": "https://allmylinks.com/{}", - "urlMain": "https://allmylinks.com/", - "username_claimed": "blue" - }, + "errorMsg": "Page not found", + "errorType": "message", + "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$", + "url": "https://allmylinks.com/{}", + "urlMain": "https://allmylinks.com/", + "username_claimed": "blue" +}, "AniWorld": { "errorMsg": "Dieses Profil ist nicht verf\u00fcgbar", "errorType": "message", From d314d75db1636b14511997fe2d19a9b8bc6ef9b6 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 04:43:05 +0530 Subject: [PATCH 76/91] fix(sites): Remediate false positive for Mydramalist --- sherlock_project/resources/data.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac52..dd1c2f39 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1440,12 +1440,12 @@ "username_claimed": "blue" }, "Mydramalist": { - "errorMsg": "Sign in - MyDramaList", - "errorType": "message", - "url": "https://www.mydramalist.com/profile/{}", - "urlMain": "https://mydramalist.com", - "username_claimed": "elhadidy12398" - }, + "errorMsg": "The requested page was not found", + "errorType": "message", + "url": "https://www.mydramalist.com/profile/{}", + "urlMain": "https://mydramalist.com", + "username_claimed": "elhadidy12398" +}, "Myspace": { "errorType": "status_code", "url": "https://myspace.com/{}", From b245c462c92bf1655b3c871217f9683c1544554e Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 05:56:52 +0530 Subject: [PATCH 77/91] fix(sites): Remediate false positive for Apple Discussions --- sherlock_project/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac52..cd081b00 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -115,7 +115,7 @@ "username_claimed": "lio24d" }, "Apple Discussions": { - "errorMsg": "The page you tried was not found. You may have used an outdated link or may have typed the address (URL) incorrectly.", + "errorMsg": "Looking for something in Apple Support Communities?", "errorType": "message", "url": "https://discussions.apple.com/profile/{}", "urlMain": "https://discussions.apple.com", From 0e7219b191d36b1ba06c16066c450377863ea571 Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Fri, 3 Oct 2025 13:41:43 +0530 Subject: [PATCH 78/91] Security Fix: Add timeout parameters to HTTP requests This fix addresses a critical security vulnerability where HTTP requests could hang indefinitely, potentially causing denial of service. Changes: - Added 10-second timeout to version check API call - Added 10-second timeout to GitHub pull request API call - Added 30-second timeout to data file downloads (larger timeout for data) - Added 10-second timeout to exclusions list download Impact: - Prevents infinite hangs that could freeze the application - Improves user experience with predictable response times - Fixes security issue flagged by Bandit static analysis (B113) - Makes the application more robust in poor network conditions The timeouts are conservative enough to work with slow connections while preventing indefinite blocking that could be exploited. --- sherlock_project/sherlock.py | 4 ++-- sherlock_project/sites.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 250175a5..ba630c73 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -742,7 +742,7 @@ def main(): # Check for newer version of Sherlock. If it exists, let the user know about it try: - latest_release_raw = requests.get(forge_api_latest_release).text + latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text latest_release_json = json_loads(latest_release_raw) latest_remote_tag = latest_release_json["tag_name"] @@ -802,7 +802,7 @@ def main(): if args.json_file.isnumeric(): pull_number = args.json_file pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}" - pull_request_raw = requests.get(pull_url).text + pull_request_raw = requests.get(pull_url, timeout=10).text pull_request_json = json_loads(pull_request_raw) # Check if it's a valid pull request diff --git a/sherlock_project/sites.py b/sherlock_project/sites.py index 2ba811d7..b7aaf4c5 100644 --- a/sherlock_project/sites.py +++ b/sherlock_project/sites.py @@ -129,7 +129,7 @@ class SitesInformation: if data_file_path.lower().startswith("http"): # Reference is to a URL. try: - response = requests.get(url=data_file_path) + response = requests.get(url=data_file_path, timeout=30) except Exception as error: raise FileNotFoundError( f"Problem while attempting to access data file URL '{data_file_path}': {error}" @@ -166,7 +166,7 @@ class SitesInformation: if honor_exclusions: try: - response = requests.get(url=EXCLUSIONS_URL) + response = requests.get(url=EXCLUSIONS_URL, timeout=10) if response.status_code == 200: exclusions = response.text.splitlines() exclusions = [exclusion.strip() for exclusion in exclusions] From 91f3b16993f2f1dc70d3750d84249ebff8d24038 Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 02:55:57 +0530 Subject: [PATCH 79/91] fix(sites): Update BoardGameGeek URL structure and detection method BoardGameGeek changed from /user/{} to /profile/{} URL structure. Also updated from message to status_code detection as the site no longer returns clear error messages for non-existent users. --- sherlock_project/resources/data.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec929..3f7f5ac3 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -279,10 +279,9 @@ "username_claimed": "mcuban" }, "BoardGameGeek": { - "errorType": "message", + "errorType": "status_code", "regexCheck": "^[a-zA-Z0-9_]*$", - "errorMsg": "User not found", - "url": "https://boardgamegeek.com/user/{}", + "url": "https://boardgamegeek.com/profile/{}", "urlMain": "https://boardgamegeek.com", "username_claimed": "blue" }, From 3e653c46b07c858811619517b28a17742cb4847a Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 03:12:47 +0530 Subject: [PATCH 80/91] fix(sites): Remove BoardGameGeek - unreliable detection BoardGameGeek returns identical pages for both existing and non-existing users, making reliable username detection impossible with HTTP-based methods. The site likely uses JavaScript to load user-specific content dynamically. --- sherlock_project/resources/data.json | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 3f7f5ac3..891b6245 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -278,13 +278,6 @@ "urlMain": "https://bsky.app/", "username_claimed": "mcuban" }, - "BoardGameGeek": { - "errorType": "status_code", - "regexCheck": "^[a-zA-Z0-9_]*$", - "url": "https://boardgamegeek.com/profile/{}", - "urlMain": "https://boardgamegeek.com", - "username_claimed": "blue" - }, "BongaCams": { "errorType": "status_code", "isNSFW": true, From c5e209d78e203f931a9e3bc6e51d6b49fdd33d3c Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 11:23:55 +0530 Subject: [PATCH 81/91] fix(sites): Implement BoardGameGeek API detection as suggested Using the API endpoint suggested by akh7177: https://api.geekdo.com/api/users?username={} However, there's an edge case where valid users contain empty arrays in their JSON response (adminBadges[], userMicrobadges[], supportYears[]) which causes Sherlock's substring matching to incorrectly flag them as 'not found' when looking for the '[]' error pattern. The API correctly returns: - Valid user: JSON object with user data (but contains [] substrings) - Invalid user: Exactly '[]' (2 characters total) This needs further refinement to distinguish between the exact '[]' response vs JSON containing '[]' substrings. --- sherlock_project/resources/data.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 891b6245..09168d17 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -278,6 +278,15 @@ "urlMain": "https://bsky.app/", "username_claimed": "mcuban" }, + "BoardGameGeek": { + "errorMsg": "[]", + "errorType": "message", + "regexCheck": "^[a-zA-Z0-9_]*$", + "url": "https://boardgamegeek.com/profile/{}", + "urlMain": "https://boardgamegeek.com", + "urlProbe": "https://api.geekdo.com/api/users?username={}", + "username_claimed": "blue" + }, "BongaCams": { "errorType": "status_code", "isNSFW": true, From 94c013886a677df9b7e1192267d548b4520f2958 Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 11:33:27 +0530 Subject: [PATCH 82/91] fix(sites): Remove BoardGameGeek due to incompatible detection BoardGameGeek cannot be reliably detected with Sherlock's current capabilities: - Original HTML detection: Returns false positives - API endpoint approach: The API returns status 200 for both valid and invalid users - Invalid user: Returns exactly '[]' - Valid user: Returns JSON containing '[]' substrings (e.g., "adminBadges":[]) Since Sherlock's 'message' errorType uses substring matching, it incorrectly identifies valid users as "not found" when checking for '[]' in the response. The site's API response format is fundamentally incompatible with Sherlock's detection methods (message/status_code/response_url), so removal is the only viable solution to prevent false positives and false negatives. Addresses false positive issue originally reported in testing. --- sherlock_project/resources/data.json | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 09168d17..891b6245 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -278,15 +278,6 @@ "urlMain": "https://bsky.app/", "username_claimed": "mcuban" }, - "BoardGameGeek": { - "errorMsg": "[]", - "errorType": "message", - "regexCheck": "^[a-zA-Z0-9_]*$", - "url": "https://boardgamegeek.com/profile/{}", - "urlMain": "https://boardgamegeek.com", - "urlProbe": "https://api.geekdo.com/api/users?username={}", - "username_claimed": "blue" - }, "BongaCams": { "errorType": "status_code", "isNSFW": true, From 57a0ccef38066b769061736bc165fb0d94a4a516 Mon Sep 17 00:00:00 2001 From: Abhyuday K Hegde <66260177+akh7177@users.noreply.github.com> Date: Sat, 4 Oct 2025 14:30:40 +0530 Subject: [PATCH 83/91] Remediate False Positive for Roblox --- sherlock_project/resources/data.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec929..2d965176 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1823,8 +1823,7 @@ "username_claimed": "blue" }, "Roblox": { - "errorMsg": "Page cannot be found or no longer exists", - "errorType": "message", + "errorType": "status_code", "url": "https://www.roblox.com/user.aspx?username={}", "urlMain": "https://www.roblox.com/", "username_claimed": "bluewolfekiller" From 977ad5c1a48e93cce720941d6777e150099ac183 Mon Sep 17 00:00:00 2001 From: Abhyuday K Hegde <66260177+akh7177@users.noreply.github.com> Date: Sat, 4 Oct 2025 14:48:37 +0530 Subject: [PATCH 84/91] Remediate False Positive for SlideShare --- sherlock_project/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec929..7e984273 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1932,7 +1932,7 @@ }, "SlideShare": { "errorType": "message", - "errorMsg": "Username available", + "errorMsg": "Page no longer exists", "url": "https://slideshare.net/{}", "urlMain": "https://slideshare.net/", "username_claimed": "blue" From 5cd769c2f46e9615fdc3d6e43341e3f868256597 Mon Sep 17 00:00:00 2001 From: Abhyuday K Hegde <66260177+akh7177@users.noreply.github.com> Date: Sat, 4 Oct 2025 15:12:20 +0530 Subject: [PATCH 85/91] Remediate False Positives for CyberDefenders --- sherlock_project/resources/data.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec929..4787ffeb 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -572,8 +572,7 @@ "username_claimed": "brown" }, "CyberDefenders": { - "errorMsg": "Blue Team Training for SOC analysts and DFIR - CyberDefenders", - "errorType": "message", + "errorType": "status_code", "regexCheck": "^[^\\/:*?\"<>|@]{3,50}$", "request_method": "GET", "url": "https://cyberdefenders.org/p/{}", From dc869852bc5674f158db79bb2b4a3ad42b879f0e Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 17:22:50 +0530 Subject: [PATCH 86/91] fix(sites): Fix Threads false positive detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Threads was showing false positives for non-existent users because the error message detection was incorrect. Updated errorMsg: - Old: "Threads" (generic, matches valid pages too) - New: "Threads • Log in" (specific to non-existent users) When a user doesn't exist, Threads redirects to a login page with the title "Threads • Log in". Valid user profiles have titles like "Username (@username) • Threads, Say more". Tested with: - Invalid user (impossibleuser12345): Correctly not found - Valid user (zuck): Correctly found This fixes the false positive issue where non-existent Threads profiles were being reported as found. --- sherlock_project/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec929..1f6b3d9e 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2820,7 +2820,7 @@ "username_claimed": "green" }, "threads": { - "errorMsg": "Threads", + "errorMsg": "Threads • Log in", "errorType": "message", "headers": { "Sec-Fetch-Mode": "navigate" From b99719ce6014312445614d856df95dbae37b5991 Mon Sep 17 00:00:00 2001 From: obiwan04kanobi Date: Sun, 5 Oct 2025 00:22:12 +0530 Subject: [PATCH 87/91] Add Docker build test to CI workflow - Adds docker-build-test job to regression.yml - Runs on push/merge to master and release branches - Extracts VERSION_TAG from pyproject.toml for build - Tests that Docker image builds and runs successfully - Resolves dockerfile syntax warnings - Resolves #2196" --- .github/workflows/regression.yml | 27 +++++++++++++++++++++++++-- Dockerfile | 2 +- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index e366f29d..5029b870 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -11,6 +11,7 @@ on: - '**/*.py' - '**/*.ini' - '**/*.toml' + - 'Dockerfile' push: branches: - master @@ -21,11 +22,13 @@ on: - '**/*.py' - '**/*.ini' - '**/*.toml' + - 'Dockerfile' jobs: tox-lint: - # Linting is ran through tox to ensure that the same linter is used by local runners runs-on: ubuntu-latest + # Linting is ran through tox to ensure that the same linter + # is used by local runners steps: - uses: actions/checkout@v4 - name: Set up linting environment @@ -41,7 +44,8 @@ jobs: tox-matrix: runs-on: ${{ matrix.os }} strategy: - fail-fast: false # We want to know what specicic versions it fails on + # We want to know what specicic versions it fails on + fail-fast: false matrix: os: [ ubuntu-latest, @@ -67,3 +71,22 @@ jobs: pip install tox-gh-actions - name: Run tox run: tox + docker-build-test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Get version from pyproject.toml + id: get-version + run: | + VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2) + echo "version=$VERSION" >> $GITHUB_OUTPUT + - name: Build Docker image + run: | + docker build \ + --build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \ + -t sherlock-test:latest . + - name: Test Docker image runs + run: docker run --rm sherlock-test:latest --version diff --git a/Dockerfile b/Dockerfile index 361530ab..ccdfbf23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ # 3. Build image with BOTH latest and version tags # i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .` -FROM python:3.12-slim-bullseye as build +FROM python:3.12-slim-bullseye AS build WORKDIR /sherlock RUN pip3 install --no-cache-dir --upgrade pip From 0794e02b525a2bf5c9222c3da51a714f96b42d64 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 4 Oct 2025 16:53:30 -0400 Subject: [PATCH 88/91] feat: support multiple errorTypes --- sherlock_project/resources/data.schema.json | 217 +++++++++++++------- sherlock_project/sherlock.py | 109 +++++----- 2 files changed, 201 insertions(+), 125 deletions(-) diff --git a/sherlock_project/resources/data.schema.json b/sherlock_project/resources/data.schema.json index 216ffb62..c717cb25 100644 --- a/sherlock_project/resources/data.schema.json +++ b/sherlock_project/resources/data.schema.json @@ -1,80 +1,149 @@ { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "Sherlock Target Manifest", - "description": "Social media targets to probe for the existence of known usernames", - "type": "object", - "properties": { - "$schema": { "type": "string" } - }, - "patternProperties": { - "^(?!\\$).*?$": { - "type": "object", - "description": "Target name and associated information (key should be human readable name)", - "required": [ "url", "urlMain", "errorType", "username_claimed" ], - "properties": { - "url": { "type": "string" }, - "urlMain": { "type": "string" }, - "urlProbe": { "type": "string" }, - "username_claimed": { "type": "string" }, - "regexCheck": { "type": "string" }, - "isNSFW": { "type": "boolean" }, - "headers": { "type": "object" }, - "request_payload": { "type": "object" }, - "__comment__": { - "type": "string", - "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." - }, - "tags": { - "oneOf": [ - { "$ref": "#/$defs/tag" }, - { "type": "array", "items": { "$ref": "#/$defs/tag" } } - ] - }, - "request_method": { - "type": "string", - "enum": [ "GET", "POST", "HEAD", "PUT" ] - }, + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Sherlock Target Manifest", + "description": "Social media targets to probe for the existence of known usernames", + "type": "object", + "properties": { + "$schema": { "type": "string" } + }, + "patternProperties": { + "^(?!\\$).*?$": { + "type": "object", + "description": "Target name and associated information (key should be human readable name)", + "required": ["url", "urlMain", "errorType", "username_claimed"], + "properties": { + "url": { "type": "string" }, + "urlMain": { "type": "string" }, + "urlProbe": { "type": "string" }, + "username_claimed": { "type": "string" }, + "regexCheck": { "type": "string" }, + "isNSFW": { "type": "boolean" }, + "headers": { "type": "object" }, + "request_payload": { "type": "object" }, + "__comment__": { + "type": "string", + "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." + }, + "tags": { + "oneOf": [ + { "$ref": "#/$defs/tag" }, + { "type": "array", "items": { "$ref": "#/$defs/tag" } } + ] + }, + "request_method": { + "type": "string", + "enum": ["GET", "POST", "HEAD", "PUT"] + }, + "errorType": { + "oneOf": [ + { + "type": "string", + "enum": ["message", "response_url", "status_code"] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": ["message", "response_url", "status_code"] + } + } + ] + }, + "errorMsg": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "errorCode": { + "oneOf": [ + { "type": "integer" }, + { "type": "array", "items": { "type": "integer" } } + ] + }, + "errorUrl": { "type": "string" }, + "response_url": { "type": "string" } + }, + "dependencies": { + "errorMsg": { + "oneOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { "errorType": { - "type": "string", - "enum": [ "message", "response_url", "status_code" ] - }, - "errorMsg": { - "oneOf": [ - { "type": "string" }, - { "type": "array", "items": { "type": "string" } } - ] - }, - "errorCode": { - "oneOf": [ - { "type": "integer" }, - { "type": "array", "items": { "type": "integer" } } - ] - }, - "errorUrl": { "type": "string" }, - "response_url": { "type": "string" } - }, - "dependencies": { - "errorMsg": { - "properties" : { "errorType": { "const": "message" } } - }, - "errorUrl": { - "properties": { "errorType": { "const": "response_url" } } - }, - "errorCode": { - "properties": { "errorType": { "const": "status_code" } } + "type": "array", + "contains": { "const": "message" } } - }, - "if": { "properties": { "errorType": { "const": "message" } } }, - "then": { "required": [ "errorMsg" ] }, - "else": { - "if": { "properties": { "errorType": { "const": "response_url" } } }, - "then": { "required": [ "errorUrl" ] } - }, - "additionalProperties": false + } + } + ] + }, + "errorUrl": { + "oneOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } + } + } + ] + }, + "errorCode": { + "oneOf": [ + { "properties": { "errorType": { "const": "status_code" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "status_code" } + } + } + } + ] } - }, - "additionalProperties": false, - "$defs": { - "tag": { "type": "string", "enum": [ "adult", "gaming" ] } + }, + "allOf": [ + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "message" } + } + } + } + ] + }, + "then": { "required": ["errorMsg"] } + }, + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } + } + } + ] + }, + "then": { "required": ["errorUrl"] } + } + ], + "additionalProperties": false } + }, + "additionalProperties": false, + "$defs": { + "tag": { "type": "string", "enum": ["adult", "gaming"] } + } } diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 250175a5..a776d8c3 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -381,6 +381,8 @@ def sherlock( # Get the expected error type error_type = net_info["errorType"] + if isinstance(error_type, str): + error_type: list[str] = [error_type] # Retrieve future and ensure it has finished future = net_info["request_future"] @@ -425,58 +427,63 @@ def sherlock( elif any(hitMsg in r.text for hitMsg in WAFHitMsgs): query_status = QueryStatus.WAF - elif error_type == "message": - # error_flag True denotes no error found in the HTML - # error_flag False denotes error found in the HTML - error_flag = True - errors = net_info.get("errorMsg") - # errors will hold the error message - # it can be string or list - # by isinstance method we can detect that - # and handle the case for strings as normal procedure - # and if its list we can iterate the errors - if isinstance(errors, str): - # Checks if the error message is in the HTML - # if error is present we will set flag to False - if errors in r.text: - error_flag = False - else: - # If it's list, it will iterate all the error message - for error in errors: - if error in r.text: - error_flag = False - break - if error_flag: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE - elif error_type == "status_code": - error_codes = net_info.get("errorCode") - query_status = QueryStatus.CLAIMED - - # Type consistency, allowing for both singlets and lists in manifest - if isinstance(error_codes, int): - error_codes = [error_codes] - - if error_codes is not None and r.status_code in error_codes: - query_status = QueryStatus.AVAILABLE - elif r.status_code >= 300 or r.status_code < 200: - query_status = QueryStatus.AVAILABLE - elif error_type == "response_url": - # For this detection method, we have turned off the redirect. - # So, there is no need to check the response URL: it will always - # match the request. Instead, we will ensure that the response - # code indicates that the request was successful (i.e. no 404, or - # forward to some odd redirect). - if 200 <= r.status_code < 300: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE else: - # It should be impossible to ever get here... - raise ValueError( - f"Unknown Error Type '{error_type}' for " f"site '{social_network}'" - ) + if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): + # It should be impossible to ever get here... + raise ValueError( + f"Unknown Error Type '{error_type}' for " + f"site '{social_network}'" + ) + + if "message" in error_type: + # error_flag True denotes no error found in the HTML + # error_flag False denotes error found in the HTML + error_flag = True + errors = net_info.get("errorMsg") + # errors will hold the error message + # it can be string or list + # by isinstance method we can detect that + # and handle the case for strings as normal procedure + # and if its list we can iterate the errors + if isinstance(errors, str): + # Checks if the error message is in the HTML + # if error is present we will set flag to False + if errors in r.text: + error_flag = False + else: + # If it's list, it will iterate all the error message + for error in errors: + if error in r.text: + error_flag = False + break + if error_flag: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE + + if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: + error_codes = net_info.get("errorCode") + query_status = QueryStatus.CLAIMED + + # Type consistency, allowing for both singlets and lists in manifest + if isinstance(error_codes, int): + error_codes = [error_codes] + + if error_codes is not None and r.status_code in error_codes: + query_status = QueryStatus.AVAILABLE + elif r.status_code >= 300 or r.status_code < 200: + query_status = QueryStatus.AVAILABLE + + if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: + # For this detection method, we have turned off the redirect. + # So, there is no need to check the response URL: it will always + # match the request. Instead, we will ensure that the response + # code indicates that the request was successful (i.e. no 404, or + # forward to some odd redirect). + if 200 <= r.status_code < 300: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE if dump_response: print("+++++++++++++++++++++") From 52cd5fdfc136340b2c88ffe8c1dc953ff8b51cc5 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 4 Oct 2025 20:22:34 -0400 Subject: [PATCH 89/91] feat: gracefully skip sites with invalid errorType --- sherlock_project/sherlock.py | 97 +++++++++++++++++------------------- 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index dcfbda04..d349c12b 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -429,61 +429,58 @@ def sherlock( else: if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): - # It should be impossible to ever get here... - raise ValueError( - f"Unknown Error Type '{error_type}' for " - f"site '{social_network}'" - ) - - if "message" in error_type: - # error_flag True denotes no error found in the HTML - # error_flag False denotes error found in the HTML - error_flag = True - errors = net_info.get("errorMsg") - # errors will hold the error message - # it can be string or list - # by isinstance method we can detect that - # and handle the case for strings as normal procedure - # and if its list we can iterate the errors - if isinstance(errors, str): - # Checks if the error message is in the HTML - # if error is present we will set flag to False - if errors in r.text: - error_flag = False - else: - # If it's list, it will iterate all the error message - for error in errors: - if error in r.text: + error_context = f"Unknown error type '{error_type}' for {social_network}" + query_status = QueryStatus.UNKNOWN + else: + if "message" in error_type: + # error_flag True denotes no error found in the HTML + # error_flag False denotes error found in the HTML + error_flag = True + errors = net_info.get("errorMsg") + # errors will hold the error message + # it can be string or list + # by isinstance method we can detect that + # and handle the case for strings as normal procedure + # and if its list we can iterate the errors + if isinstance(errors, str): + # Checks if the error message is in the HTML + # if error is present we will set flag to False + if errors in r.text: error_flag = False - break - if error_flag: + else: + # If it's list, it will iterate all the error message + for error in errors: + if error in r.text: + error_flag = False + break + if error_flag: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE + + if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: + error_codes = net_info.get("errorCode") query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE - if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: - error_codes = net_info.get("errorCode") - query_status = QueryStatus.CLAIMED + # Type consistency, allowing for both singlets and lists in manifest + if isinstance(error_codes, int): + error_codes = [error_codes] - # Type consistency, allowing for both singlets and lists in manifest - if isinstance(error_codes, int): - error_codes = [error_codes] + if error_codes is not None and r.status_code in error_codes: + query_status = QueryStatus.AVAILABLE + elif r.status_code >= 300 or r.status_code < 200: + query_status = QueryStatus.AVAILABLE - if error_codes is not None and r.status_code in error_codes: - query_status = QueryStatus.AVAILABLE - elif r.status_code >= 300 or r.status_code < 200: - query_status = QueryStatus.AVAILABLE - - if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: - # For this detection method, we have turned off the redirect. - # So, there is no need to check the response URL: it will always - # match the request. Instead, we will ensure that the response - # code indicates that the request was successful (i.e. no 404, or - # forward to some odd redirect). - if 200 <= r.status_code < 300: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE + if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: + # For this detection method, we have turned off the redirect. + # So, there is no need to check the response URL: it will always + # match the request. Instead, we will ensure that the response + # code indicates that the request was successful (i.e. no 404, or + # forward to some odd redirect). + if 200 <= r.status_code < 300: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE if dump_response: print("+++++++++++++++++++++") From 4246a7b16fb399967d766aac9d677c7d48b60aa5 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 4 Oct 2025 20:32:16 -0400 Subject: [PATCH 90/91] chore: make default --no-txt Workflows where a txt file is still required should use --txt --- sherlock_project/sherlock.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index d349c12b..07b19af7 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -723,12 +723,22 @@ def main(): help="Include checking of NSFW sites from default list.", ) + # TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed + # in future release parser.add_argument( "--no-txt", action="store_true", dest="no_txt", default=False, - help="Disable creation of a txt file", + help="Disable creation of a txt file - WILL BE DEPRECATED", + ) + + parser.add_argument( + "--txt", + action="store_true", + dest="output_txt", + default=False, + help="Enable creation of a txt file", ) parser.add_argument( @@ -892,7 +902,7 @@ def main(): else: result_file = f"{username}.txt" - if not args.no_txt: + if args.output_txt: with open(result_file, "w", encoding="utf-8") as file: exists_counter = 0 for website_name in results: From 9e3448d9923fecec7504ef67cc5d0f0892494dcb Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sun, 5 Oct 2025 11:59:41 +0530 Subject: [PATCH 91/91] fix(sites): So , Implemented BoardGameGeek using username validation API - Added BoardGameGeek back using the new API endpoint suggested by @ppfeister - Uses https://api.geekdo.com/api/accounts/validate/username?username={} for detection - errorMsg checks for '"isValid":true' to detect valid usernames - This approach avoids the previous issues with: * HTML parsing returning false positives * User API returning JSON with '[]' substrings that caused detection problems - Successfully tested with both valid (blue) and invalid usernames Thanks @ppfeister for the API suggestion and @akh7177 for the initial guidance --- sherlock_project/resources/data.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 891b6245..6c09c39c 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -291,6 +291,14 @@ "urlMain": "https://www.bookcrossing.com/", "username_claimed": "blue" }, + "BoardGameGeek": { + "errorMsg": "\"isValid\":true", + "errorType": "message", + "url": "https://boardgamegeek.com/user/{}", + "urlMain": "https://boardgamegeek.com/", + "urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}", + "username_claimed": "blue" + }, "BraveCommunity": { "errorType": "status_code", "url": "https://community.brave.com/u/{}/",