Compare commits

...

242 Commits

Author SHA1 Message Date
Paul Pfeister 8f1308b90d
Merge pull request #2758 from Aaditya-Chunekar/patch-2
Add Credly data to JSON resource
2025-12-29 19:54:44 -08:00
Paul Pfeister e856b05c2c
Merge pull request #2636 from simplyNour/Bug/fix-gradle-false-pos-test-failure
Bug: Fix local variable scoping issue affecting false-pos test output
2025-12-29 18:56:30 -08:00
Aaditya fe9e750dab
Add Credly data to JSON resource 2025-11-14 09:27:07 +05:30
Paul Pfeister 842ae1f754
Merge pull request #2733 from Aaditya-Chunekar/patch-1
Add Nothing Community data to data.json
2025-10-29 16:34:10 -07:00
Paul Pfeister 339634f7bc
Merge pull request #2737 from Nolanp123/fix-minecraft-regex
Fix Minecraft False Positives for Long Usernames
2025-10-28 20:47:32 -07:00
Nolan Parker c1632693bb Add regexCheck to Minecraft to prevent false positives for long usernames 2025-10-28 20:39:53 -05:00
Aaditya e19cb32009
Add Nothing Community data to data.json 2025-10-27 11:20:30 +05:30
Paul Pfeister b69c8ef940
Merge pull request #2710 from Aaditya-Chunekar/add-sites
hacktoberfest: Added sites support
2025-10-26 00:16:29 -07:00
Aaditya-Chunekar 2724711060 feat: add tmdb 2025-10-26 09:49:31 +05:30
Paul Pfeister 0a68ab7f4c
Merge pull request #2709 from Aaditya-Chunekar/add-topmate
hacktoberfest: Add topmate.io support
2025-10-24 20:15:02 -07:00
Paul Pfeister 8675178be1
Merge pull request #2705 from Aaditya-Chunekar/add-site-seoforum
hacktoberfest: Add SEO Forum Support
2025-10-24 20:12:50 -07:00
Aaditya-Chunekar 9bafb8a280 feat: add n8n, HackerSploit, Arduino Forum 2025-10-24 09:37:40 +05:30
Aaditya-Chunekar 8e5549862a feat: add topmate.io 2025-10-24 09:14:42 +05:30
Aaditya-Chunekar 8797fcd517 feat: add SEOForum 2025-10-24 08:46:23 +05:30
Paul Pfeister 0995d4d669
chore: reformat 2025-10-23 19:39:05 -04:00
Paul Pfeister 6c0c273a0b
Merge pull request #2695 from simplyNour/Bug/urls-are-not-clickable-in-excel-file
Make urls clickable when saved to excel
2025-10-23 16:25:17 -07:00
Paul Pfeister 3eeba790fd
Merge pull request #2722 from VivekGaddam/Twitch_Added
Added Twitch Platform Support to Sherlock
2025-10-23 15:28:01 -07:00
Paul Pfeister 61a29ec373
Merge pull request #2723 from imhiteshgarg/adding_lemmy
adding lemmy
2025-10-23 15:26:57 -07:00
Paul Pfeister 9fbbbf7c73
Merge pull request #2724 from obiwan04kanobi/feat/add-codolio
feat: add Codolio to supported sites
2025-10-23 15:26:16 -07:00
obiwan04kanobi 331b68d909 feat: add Codolio to supported sites
Add Codolio (coding portfolio tracker) as a new site target for username detection.

Detection method: Message-based using title tag differences
- Existing profiles: '<title>Username | Codolio</title>'
- Non-existing profiles: '<title>Page Not Found | Codolio</title>'

Tested with multiple usernames to confirm accurate detection.
2025-10-23 22:42:06 +05:30
Hitesh Garg 8c3e093561 adding lemmy
adding lemmy
2025-10-23 21:38:18 +05:30
vivekgaddam e35e5e3af1 corrected Twitch 2025-10-23 19:41:00 +05:30
vivekgaddam 906287b305 added twitch 2025-10-23 19:18:31 +05:30
Matheus Felipe 0dbb6abcc5
Fix Minor Capitalization Issue in README.md (#2716) 2025-10-23 09:08:29 -03:00
Matheus Felipe 03e097cc82
Reorder Terraria Forums to correct alphabetical position (#2700) 2025-10-23 08:53:50 -03:00
Matheus Felipe 91c1964918
Add GameFaqs support (#2721)
Co-authored-by: Maquinero123456 <jimenanavarrodavid@uma.es>
2025-10-23 08:04:41 -03:00
Matheus Felipe 373f3d389a
Added support for Trovo (#2720) 2025-10-23 06:17:28 -03:00
SirAzako 828c47109d
Added support for Trovo 2025-10-23 06:10:20 -03:00
Matheus Felipe 94245b25df
Add OpenGameArt support (#2719)
Co-authored-by: Horațiu Mlendea <Horatiu.Mlendea@ProtonMail.com>
2025-10-23 05:03:35 -03:00
Matheus Felipe 734542f0af
Add mstdn.social (#2718) 2025-10-23 04:19:10 -03:00
Matheus Felipe 1f8166ba9f
Remove unclaimed username entry for mstdn.social 2025-10-23 03:41:21 -03:00
MagicLike 6f1ddaa615
Added mstdn.social
Added another Mastodon instance: mstdn.social
2025-10-23 03:32:54 -03:00
Nolan Parker 7ee2891517 Fix Minor Capitalization Issue in README.md 2025-10-22 22:16:13 -05:00
Paul Pfeister b893e4aa20
Merge pull request #2711 from imhiteshgarg/add_observablehq
Adding ObservableHQ site
2025-10-21 23:04:24 -07:00
Hitesh Garg eff869906a Adding ObservableHQ site
Adding ObservableHQ site
2025-10-22 10:58:31 +05:30
Paul Pfeister 2a0107e189
Merge pull request #2702 from ABSCP4/patch-1
Update README.md
2025-10-20 15:33:36 -07:00
ABSCP4 5d8c4de212
Update README.md
fixed typo
2025-10-20 11:01:32 -07:00
Nolan Parker 1f9d7e8373 Reorder Terraria Forums to correct alphabetical position 2025-10-19 15:53:09 -05:00
Paul Pfeister 184470f871
Merge pull request #2699 from Nolanp123/fix-codesandbox-name
Fix site name formatting for CodeSandbox
2025-10-19 13:14:14 -07:00
Nolan Parker 342dbc85cc Fix site name formatting for CodeSandbox 2025-10-19 14:44:47 -05:00
Paul Pfeister 457e16e84f
Merge pull request #2670 from simplyNour/Bug/fix-false-positive-for-topcoder
fix: false positive for Topcoder
2025-10-18 23:47:34 -07:00
Paul Pfeister 43b3736b75
Merge pull request #2697 from raman1236/add-odysee-support
Add Odysee support
2025-10-18 23:06:15 -07:00
Paul Pfeister 64a49ffe17
Merge pull request #2698 from KaiAllAlone/KaiAllAlone-warframe-market
Add Warframe Market support
2025-10-18 22:48:00 -07:00
rvasikarla 0afd2006c6 Add Odysee support
- Add Odysee platform to sherlock database- Uses canonical link detection for non-existent users- URL pattern: https://odysee.com/@\{username\}- Detects error via canonical redirect to main site
2025-10-18 16:47:27 -05:00
rvasikarla 3c270173a7 Add Odysee support
- Add Odysee platform to sherlock database- Uses canonical link detection for non-existent users- URL pattern: https://odysee.com/@\{username\}- Detects error via canonical redirect to main site
2025-10-18 16:44:10 -05:00
rvasikarla 8d73f9ef4c Add Odysee support
- Add Odysee platform to sherlock database- Uses canonical link detection for non-existent users- URL pattern: https://odysee.com/@\{username\}- Detects error via canonical redirect to main site
2025-10-18 16:37:31 -05:00
Debanuj Roy 472c086805
Update data.jsonfixed syntax error 2025-10-19 03:06:25 +05:30
Debanuj Roy 400c277f24
more robust 2025-10-19 03:00:43 +05:30
Debanuj Roy e759564550
Update data.jsonupdate matching logic 2025-10-19 02:55:33 +05:30
Debanuj Roy deebe7137c
Added Warframe Market 2025-10-19 02:45:07 +05:30
nour cb14ccbaaf Make urls clickable when saved to excel 2025-10-18 15:21:36 +03:00
Paul Pfeister eb892795e9
Merge pull request #2683 from 403Code/patch-1
Add: Cfx.re Forum
2025-10-15 10:52:32 -07:00
Rizey (Nantaaaaaaaaaa) 09de90066b
Update data.json 2025-10-15 13:39:44 +07:00
Rizey (Nantaaaaaaaaaa) cd1f27c12b
Update data.json 2025-10-15 13:29:42 +07:00
Rizey (Nantaaaaaaaaaa) b837de8358
Add Cfx.re Forum 2025-10-15 13:22:09 +07:00
Paul Pfeister 7a70f35883
Merge pull request #2680 from bjornmorten/add/norwegian-forums
Add Norwegian forum sites (diskusjon.no & forum.kvinneguiden.no)
2025-10-14 11:25:31 -07:00
bjornmorten 4b17dae385
fix: regex max length for kvinneguiden 2025-10-14 19:48:02 +02:00
Paul Pfeister efefe3f54a
Merge pull request #2682 from bjornmorten/add/cryptohack
Add: CryptoHack
2025-10-14 10:41:41 -07:00
Paul Pfeister 4b70a1fc25
Merge pull request #2681 from bjornmorten/add/hackmd
Add: HackMD
2025-10-14 10:41:31 -07:00
bjornmorten a7893f399e add: CryptoHack 2025-10-14 19:28:53 +02:00
bjornmorten 1cb6c12851 add: HackMD 2025-10-14 19:21:36 +02:00
bjornmorten c4f7485ecf fix: alphabetical ordering 2025-10-14 19:10:57 +02:00
bjornmorten 228f50413e add: diskusjon.no and forum.kvinneguiden.no 2025-10-14 19:08:35 +02:00
Paul Pfeister d1867b1b51
Merge pull request #2679 from aryanj10/fix-fasle-positive-for-lesswrong
Fix LessWrong detection Issue #2634
2025-10-14 09:58:56 -07:00
Aryan Jain 6d2497582e Fix LessWrong detection Issue #2634 2025-10-14 11:04:15 -04:00
Paul Pfeister 885c43b8af
Merge pull request #2677 from spmedia/patch-9
Add: BreachSta.rs Forum
2025-10-13 16:12:36 -07:00
Edmond Major III 8ad47b0b23
Update data.json 2025-10-13 17:23:10 -05:00
Edmond Major III e93af99424
Update data.json
remix based off title instead of text in body
2025-10-13 17:20:50 -05:00
Edmond Major III 5862ab4f92
Update data.json
Add in BreachSta.rs forum - a popular cybercrime forum

https://breachsta.rs/profile/Sleepybubble - returns valid profile

https://breachsta.rs/profile/asdfasdfasdf - returns "Not found
This page doesn't exist"
2025-10-13 17:15:26 -05:00
Paul Pfeister 4110cac45c
Merge pull request #2661 from KaiAllAlone/terraria-forums
Site Added:Terraria forums
2025-10-13 15:07:31 -07:00
Paul Pfeister d66b18e8ae
Merge pull request #2676 from spmedia/patch-8
Add: Patched.sh
2025-10-13 14:53:19 -07:00
Edmond Major III b532fc6a38
Add: Patched.sh
Add Patched, a popular cybercrime forum.

https://patched.sh/User/blue = valid user

https://patched.sh/User/khjasjkdhfa38a = not a valid user and displays "The member you specified is either invalid or doesn't exist."
2025-10-13 13:20:03 -05:00
Paul Pfeister 99cf073835
Merge pull request #2674 from spmedia/patch-6
Add: Cracked.sh
2025-10-13 10:41:46 -07:00
Edmond Major III ec7e1b8b81
Update data.json
Trailing / was the issue so removed it
2025-10-13 12:30:50 -05:00
Edmond Major III a4aab38901
Update data.json
Remove www
2025-10-13 12:24:02 -05:00
Edmond Major III 5202900618
Update data.json
Updated error msg on no user
2025-10-13 12:16:09 -05:00
Edmond Major III 26444a98ad
Update data.json
Add Cracked.sh - a popular skid hacker website

Examples of profiles:

Claimed: https://cracked.sh/Blue - gives status code of 200

Unclaimed: https://cracked.sh/noonewouldeverusethis7 - gives status code of 404
2025-10-13 12:12:43 -05:00
Paul Pfeister bced3242f3
Merge pull request #2668 from simplyNour/Bug/fix-false-positive-for-hackerearth
fix:  false positive for hackerearth
2025-10-13 10:03:00 -07:00
Paul Pfeister 08aabdad76
Merge pull request #2673 from simplyNour/Deprecate/pepper-site-is-no-longer-operating
Deprecate: Pepper.it closed its doors on August2025
2025-10-13 10:00:45 -07:00
Paul Pfeister 170ee0b928
Merge branch 'master' into Deprecate/pepper-site-is-no-longer-operating 2025-10-13 09:58:47 -07:00
Paul Pfeister 2c9a54438a
Merge pull request #2672 from simplyNour/Feature/add-pepper-global-sites
Feat: Add pepper stores worldwide websites
2025-10-13 09:57:36 -07:00
nour 84f4886809 Feat: Add pepper stores worldwide websites 2025-10-13 17:46:38 +03:00
nour e26fd6b643 Fix: false positive for topcoder due to invalid regex 2025-10-13 16:27:02 +03:00
Paul Pfeister ce5de20f80
Merge pull request #2659 from faizan842/re-enable-opencollective-powershell-realmeye
Re-enable OpenCollective and Realmeye
2025-10-12 19:01:46 -07:00
Paul Pfeister 3ff2d135b5
Merge branch 'master' into re-enable-opencollective-powershell-realmeye 2025-10-12 18:58:04 -07:00
Paul Pfeister 1e65b4a209
Merge pull request #2657 from KaiAllAlone/patch-1
Add Pokemon Forums
2025-10-12 18:55:13 -07:00
Debanuj Roy db3545b7b0
Added more robust message 2025-10-12 16:31:27 +05:30
Debanuj Roy 1898a0c4a9
Add Terraria Forums 2025-10-12 16:27:30 +05:30
Faizan Habib 0d32357b10 Re-enable OpenCollective and Realmeye
- Updated OpenCollective to use status_code detection (previously used message detection)
- Added Realmeye with message detection

Both sites were previously removed due to false positives but have been verified to work correctly now:
- OpenCollective: Returns 200 for existing profiles, 404 for non-existent
- Realmeye: Shows 'Sorry, but we either:' error message for non-existent players

Tested with known usernames:
- OpenCollective: sindresorhus
- Realmeye: rotmg

Note: PowerShell Gallery was initially included but removed after discovering their /profiles/ endpoint no longer works.
2025-10-12 13:57:22 +05:30
Debanuj Roy 1be2abb056
Resolved wrong urlMain 2025-10-12 13:39:55 +05:30
Debanuj Roy fb392534ef
Add Pokemon Forums 2025-10-12 08:03:23 +05:30
Paul Pfeister bd49aac9d1
Merge pull request #2606 from Fandroid745/fix/babyru-false-positive
fix: Add error messages to BabyRu to prevent false positives
2025-10-11 18:10:54 -04:00
Matheus Felipe 94838863fd
Cleanup site-list.py (#2307) 2025-10-11 15:30:08 -03:00
Matheus Felipe 79973a58ea
Update file handling to include encoding and correct comments 2025-10-11 15:21:36 -03:00
Fandroid745 b9a72b55ca fix: use Unicode escape sequences for BabyRu error messages 2025-10-11 23:14:43 +05:30
Paul Pfeister ef55f7ddd3
chore: reformat json 2025-10-11 13:34:45 -04:00
Paul Pfeister 28b78e7ddd
Merge pull request #2633 from VivekGaddam/add-tiktok-support
Add TikTok (tiktok.com) to supported sites
2025-10-11 13:33:39 -04:00
Paul Pfeister d2072e2cac
chore: rem tiktok for improved rev 2025-10-11 13:32:51 -04:00
Paul Pfeister 3edb73cb23
Merge pull request #2650 from Nirzak/patch-1
Added classifiers for supported python version
2025-10-11 13:30:20 -04:00
Paul Pfeister 6d1280ee9d
Merge pull request #2651 from aryanj10/add-tiktok-pinterest
Added support for TikTok & Pinterest
2025-10-11 13:12:13 -04:00
Dhanush Sugganahalli 0c457e590a
Merge branch 'master' into fix/babyru-false-positive 2025-10-11 21:24:18 +05:30
Aryan Jain dc307fc0fd feat: add TikTok and Pinterest site detection support 2025-10-11 10:34:48 -04:00
Nirjas Jakilim d6256e9fc6
classifiers for supported python version 2025-10-11 20:27:27 +06:00
Aryan Jain 1645828527 Add TikTok site support 2025-10-11 09:25:00 -04:00
Matheus Felipe e774b08dc5
Add imood.com support (#2647) 2025-10-11 09:28:06 -03:00
Matheus Felipe 99067b2e59
Add imood.com support
resolve #2646
2025-10-11 09:23:52 -03:00
nour f039b50c4e Deprecate: Pepper closed its doors on August 14th 2025. 2025-10-11 08:29:32 +03:00
nour 7d5bd97142 fix: false positive for hackerearth 2025-10-11 07:17:01 +03:00
vivekgaddam 70b5055631 corrected india F+ prevent 2025-10-11 08:54:40 +05:30
Paul Pfeister 1be25e70df
Merge pull request #2621 from MaxwellOldshein/feat/validate-remote-manifest-with-local-schema-before-validate-target-test-suite
feat: GitHub Actions - Validate Remote Manifest Against Local Schema Before Running Validate Modified Targets Test Suite
2025-10-10 20:41:58 -04:00
Paul Pfeister 9000575f7c
Merge pull request #2631 from simplyNour/Add-Vjudge-Support-to-Sherlock
Add Vjudge to the sites source
2025-10-10 20:38:16 -04:00
Paul Pfeister 220ebf935c
Merge pull request #2640 from sctech-tr/patch-1
add status cafe (status.cafe)
2025-10-10 20:22:44 -04:00
sctech 959c4a2b26
change method for status.cafe 2025-10-10 20:38:08 +03:00
sctech 443d43df21
add status cafe 2025-10-10 20:09:45 +03:00
Paul Pfeister 80080cd57c
Merge pull request #2638 from simplyNour/Bug/fix-false-positive-for-kaskus 2025-10-10 12:51:15 -04:00
nour 80922a93fa fix: false positive for kaskus 2025-10-10 18:53:28 +03:00
nour 45494fc74b bug: fix local variable scoping issue in test validate targets 2025-10-10 06:29:55 +03:00
nour d92e2339a1 feat: add vjudge 2025-10-10 05:28:28 +03:00
vivekgaddam 659bf92d99 corrected the errorMsg 2025-10-09 19:50:43 +05:30
vivekgaddam 3e4d9bcd85 Add TikTok support to Sherlock 2025-10-09 17:57:15 +05:30
Matheus Felipe d3076cdfe0
Add Ifunny (#2632) 2025-10-09 09:16:41 -03:00
Derick Kunz 51436cefe8
Add Ifunny 2025-10-09 08:51:13 -03:00
Paul Pfeister 08a8177286
Merge pull request #2610 from eslteacher902010/add-musescore-clean 2025-10-09 06:19:35 -04:00
Paul Pfeister e6d5fd64e0
Merge pull request #2622 from akh7177/Add-support-for-Discord.bio
Add support for Discord.bio
2025-10-08 13:03:57 -04:00
Abhyuday K Hegde ac9f3a7fd5
Add support for Discord.bio 2025-10-08 11:21:53 +05:30
Paul Pfeister 289ab28b98
Merge pull request #2576 from obiwan04kanobi/add-aws-skills-profile-site
Add AWS Skills Profile site to Sherlock
2025-10-07 19:46:54 -04:00
Maxwell Oldshein 46ad6c9a5e Fix whitespace. 2025-10-07 14:53:47 -04:00
Maxwell Oldshein d20dcbe8db Retain original whitespace 2025-10-07 14:52:53 -04:00
Maxwell Oldshein 70c3c84196 Update validation logic placement in workflow 2025-10-07 14:50:54 -04:00
Dhanush Sugganahalli 53840c6a98
Merge branch 'master' into fix/babyru-false-positive 2025-10-07 14:41:12 +05:30
Fandroid745 068fff8711 fix:Remove regexCheck field and changed encoding to UTF-8 2025-10-07 14:33:32 +05:30
Maxwell Oldshein 5735d01804 Validate remote manifest against local schema 2025-10-06 23:52:14 -04:00
Paul Pfeister f60de0d8f8
Merge pull request #2616 from akh7177/Add-new-sites-to-data.json 2025-10-06 13:39:04 -04:00
Paul Pfeister cb3ab91492
Merge pull request #2485 from manjushsh/code-sandbox 2025-10-06 13:30:10 -04:00
paul_kniaz 4eea79ed6a MuseScore: use GET for status_code via request_method to avoid 403 on HEAD 2025-10-06 13:07:45 -04:00
Abhyuday K Hegde 03c051a525
Add new sites to Sherlock 2025-10-06 18:47:38 +05:30
Aniket eccdf80b95
Add Pronouns.page (#2419)
* Add support for Pronouns.page (#2418)

* Update the url
2025-10-06 09:52:56 -03:00
Manjush Shetty eb51bf9b1a misc: remove isnsfw from hive 2025-10-06 17:15:44 +05:30
Manjush Shetty 5d7b438fd6 add urlProbe 2025-10-06 17:11:50 +05:30
Manjush Shetty ef0b97fb57 chore: try with api instead 2025-10-06 16:54:07 +05:30
Manjush Shetty c6c3522159 chore: add custom regex for codesandbox usernames 2025-10-06 16:45:53 +05:30
Manjush Shetty 2908c8eaa8 chore: try with different message 2025-10-06 16:40:59 +05:30
Manjush S f05b8e0ed6
Merge branch 'sherlock-project:master' into code-sandbox 2025-10-06 16:21:40 +05:30
Fandroid745 01bca6b39f fix: corrected the regexCheck field value to an empty string 2025-10-06 08:57:11 +05:30
Paul Pfeister d2835e56a4
Merge pull request #2568 from shreyasNaik0101/fix/remediate-blitztactics
fix(sites): Remediate false positive for Blitz Tactics
2025-10-05 14:17:43 -04:00
shreyasNaik0101 0cf110e69e
Merge branch 'master' into fix/remediate-blitztactics 2025-10-05 22:56:59 +05:30
Paul Pfeister a88adb0488
Merge pull request #2559 from frogtheastronaut/master
Removed duplicate Bluesky entry in data.json
2025-10-05 13:23:53 -04:00
Fandroid745 4010a58dde fix: changed the username_claimed to example placeholder 2025-10-05 22:23:17 +05:30
Paul Pfeister b9e28b9b23
Merge pull request #2588 from shreyasNaik0101/fix/correct-ci-diff
fix(ci): Use merge-base for correct target validation
2025-10-05 12:49:58 -04:00
Paul Pfeister d0e005da23
Merge pull request #2609 from akh7177/Add-support-for-WakaTime
Add support for WakaTime
2025-10-05 12:30:24 -04:00
paul_kniaz 7a4f19e6b3 Fix MuseScore URL endpoint 2025-10-05 12:27:30 -04:00
paul_kniaz f958e7b96f update MuseScore username_claimed to arrangeme (valid profile) 2025-10-05 12:13:37 -04:00
paul_kniaz 4c99bf3b75 Add MuseScore site (clean version) 2025-10-05 10:44:55 -04:00
Fandroid745 e3066a1d7a fix:added the username_claimed field 2025-10-05 18:59:04 +05:30
Abhyuday K Hegde f0510a169a
Add support for WakaTime 2025-10-05 15:52:56 +05:30
manjushsh 738df6c362 chore: add error message to the codesandbox 2025-10-05 15:22:37 +05:30
Paul Pfeister 83a38db110
Merge pull request #2582 from dollaransh17/fix/boardgamegeek-false-positive
fix(sites): Update BoardGameGeek URL structure and detection method
2025-10-05 02:39:29 -04:00
dollaransh17 9e3448d992 fix(sites): So , Implemented BoardGameGeek using username validation API
- Added BoardGameGeek back using the new API endpoint suggested by @ppfeister
- Uses https://api.geekdo.com/api/accounts/validate/username?username={} for detection
- errorMsg checks for '"isValid":true' to detect valid usernames
- This approach avoids the previous issues with:
  * HTML parsing returning false positives
  * User API returning JSON with '[]' substrings that caused detection problems
- Successfully tested with both valid (blue) and invalid usernames

Thanks @ppfeister for the API suggestion and @akh7177 for the initial guidance
2025-10-05 11:59:41 +05:30
shreyasNaik0101 70e3c0ddd8 fix(ci): Address review feedback for correctness and efficiency 2025-10-05 11:00:14 +05:30
Fandroid745 017c08a45d fix: Add error messages to BabyRu to prevent false positives 2025-10-05 10:53:59 +05:30
Paul Pfeister f32f4ffaee
Merge pull request #2595 from obiwan04kanobi/feature/issue-2196-ci-docker-build-test
Add Docker build test to CI workflow (#2196)
2025-10-04 21:09:04 -04:00
Paul Pfeister 7379ba7b19
Merge branch 'remove-tor' 2025-10-04 20:52:40 -04:00
Paul Pfeister 3aeb6d6356
Merge pull request #2602 from sherlock-project/feat/no-txt
chore: make default --no-txt
2025-10-04 20:36:33 -04:00
Paul Pfeister 4246a7b16f
chore: make default --no-txt
Workflows where a txt file is still required should use --txt
2025-10-04 20:32:16 -04:00
Paul Pfeister e44fe49c8f
Merge pull request #2601 from sherlock-project/feat/graceful-skip
feat: gracefully skip sites with invalid errorType
2025-10-04 20:23:07 -04:00
Paul Pfeister 52cd5fdfc1
feat: gracefully skip sites with invalid errorType 2025-10-04 20:22:34 -04:00
Paul Pfeister 947f1ad2b6
Merge pull request #2574 from dollaransh17/fix/http-request-timeouts
Security Fix: Add timeout parameters to HTTP requests
2025-10-04 18:42:13 -04:00
shreyasNaik0101 4d00884d8c fix(ci): Implement secure diff logic per feedback 2025-10-05 03:00:21 +05:30
Paul Pfeister cfcc82aaca
Merge pull request #2597 from sherlock-project/feat/multiple-types
Support multiple errorType checks
2025-10-04 17:21:26 -04:00
Paul Pfeister 0794e02b52
feat: support multiple errorTypes 2025-10-04 16:53:30 -04:00
Paul Pfeister 975965abed
Merge pull request #2589 from dollaransh17/fix/threads-false-positive
fix(sites): Fix Threads false positive detection
2025-10-04 15:44:04 -04:00
Paul Pfeister a678bed154
Merge pull request #2587 from akh7177/remediate-cyberdefenders-fp
fix(sites):  Remediate False Positives for CyberDefenders
2025-10-04 15:43:48 -04:00
Paul Pfeister 4ec6f1eec0
Merge pull request #2585 from akh7177/remediate-slideshare-fp
fix(sites):  Remediate False Positive for SlideShare
2025-10-04 15:43:36 -04:00
Paul Pfeister d1527376e7
Merge pull request #2584 from akh7177/remediate-roblox-fp
fix(sites):  Remediate False Positive for Roblox
2025-10-04 15:43:29 -04:00
obiwan04kanobi b99719ce60 Add Docker build test to CI workflow
- Adds docker-build-test job to regression.yml
- Runs on push/merge to master and release branches
- Extracts VERSION_TAG from pyproject.toml for build
- Tests that Docker image builds and runs successfully
- Resolves dockerfile syntax warnings
- Resolves #2196"
2025-10-05 00:22:12 +05:30
dollaransh17 dc869852bc fix(sites): Fix Threads false positive detection
Threads was showing false positives for non-existent users because
the error message detection was incorrect.

Updated errorMsg:
- Old: "<title>Threads</title>" (generic, matches valid pages too)
- New: "<title>Threads • Log in</title>" (specific to non-existent users)

When a user doesn't exist, Threads redirects to a login page with the
title "Threads • Log in". Valid user profiles have titles like
"Username (@username) • Threads, Say more".

Tested with:
- Invalid user (impossibleuser12345): Correctly not found
- Valid user (zuck): Correctly found

This fixes the false positive issue where non-existent Threads profiles
were being reported as found.
2025-10-04 17:22:50 +05:30
shreyasNaik0101 3079e7a218 fix(ci): Use merge-base for correct target validation 2025-10-04 15:25:30 +05:30
Abhyuday K Hegde 5cd769c2f4
Remediate False Positives for CyberDefenders 2025-10-04 15:12:20 +05:30
Abhyuday K Hegde 977ad5c1a4
Remediate False Positive for SlideShare 2025-10-04 14:48:37 +05:30
Abhyuday K Hegde 57a0ccef38
Remediate False Positive for Roblox 2025-10-04 14:30:40 +05:30
dollaransh17 94c013886a fix(sites): Remove BoardGameGeek due to incompatible detection
BoardGameGeek cannot be reliably detected with Sherlock's current capabilities:

- Original HTML detection: Returns false positives
- API endpoint approach: The API returns status 200 for both valid and invalid users
  - Invalid user: Returns exactly '[]'
  - Valid user: Returns JSON containing '[]' substrings (e.g., "adminBadges":[])

Since Sherlock's 'message' errorType uses substring matching, it incorrectly
identifies valid users as "not found" when checking for '[]' in the response.

The site's API response format is fundamentally incompatible with Sherlock's
detection methods (message/status_code/response_url), so removal is the only
viable solution to prevent false positives and false negatives.

Addresses false positive issue originally reported in testing.
2025-10-04 11:33:27 +05:30
dollaransh17 c5e209d78e fix(sites): Implement BoardGameGeek API detection as suggested
Using the API endpoint suggested by akh7177:
https://api.geekdo.com/api/users?username={}

However, there's an edge case where valid users contain empty arrays
in their JSON response (adminBadges[], userMicrobadges[], supportYears[])
which causes Sherlock's substring matching to incorrectly flag them
as 'not found' when looking for the '[]' error pattern.

The API correctly returns:
- Valid user: JSON object with user data (but contains [] substrings)
- Invalid user: Exactly '[]' (2 characters total)

This needs further refinement to distinguish between the exact '[]'
response vs JSON containing '[]' substrings.
2025-10-04 11:23:55 +05:30
dollaransh17 3e653c46b0 fix(sites): Remove BoardGameGeek - unreliable detection
BoardGameGeek returns identical pages for both existing and non-existing
users, making reliable username detection impossible with HTTP-based
methods. The site likely uses JavaScript to load user-specific content
dynamically.
2025-10-04 03:12:47 +05:30
dollaransh17 91f3b16993 fix(sites): Update BoardGameGeek URL structure and detection method
BoardGameGeek changed from /user/{} to /profile/{} URL structure.
Also updated from message to status_code detection as the site
no longer returns clear error messages for non-existent users.
2025-10-04 02:55:57 +05:30
obiwan04kanobi 0f3df0f4da **PR description:**
This PR adds AWS Skills Profile to Sherlock’s supported sites in data.json. The configuration uses a unique substring (`shareProfileAccepted":false`) for reliable detection of non-existent usernames, addressing the challenge of JavaScript-rendered error messages.
- Site details and detection logic follow Sherlock’s contributing guidelines and Code of Conduct.
- No changes to core logic; only a new site entry.
- Reviewed for schema compliance and duplicate key cleanup as noted.
2025-10-03 13:46:53 +05:30
dollaransh17 0e7219b191 Security Fix: Add timeout parameters to HTTP requests
This fix addresses a critical security vulnerability where HTTP requests
could hang indefinitely, potentially causing denial of service.

Changes:
- Added 10-second timeout to version check API call
- Added 10-second timeout to GitHub pull request API call
- Added 30-second timeout to data file downloads (larger timeout for data)
- Added 10-second timeout to exclusions list download

Impact:
- Prevents infinite hangs that could freeze the application
- Improves user experience with predictable response times
- Fixes security issue flagged by Bandit static analysis (B113)
- Makes the application more robust in poor network conditions

The timeouts are conservative enough to work with slow connections
while preventing indefinite blocking that could be exploited.
2025-10-03 13:41:43 +05:30
Paul Pfeister 1d2c4b134f
Merge pull request #2570 from shreyasNaik0101/fix/remediate-applediscussions
fix(sites): Remediate false positive for Apple Discussions
2025-10-02 20:30:57 -04:00
shreyasNaik0101 b245c462c9 fix(sites): Remediate false positive for Apple Discussions 2025-10-03 05:56:52 +05:30
shreyasNaik0101 876e58b159 fix(sites): Remediate false positive for Blitz Tactics 2025-10-03 05:45:43 +05:30
Paul Pfeister 66d9733da7
Merge pull request #2565 from shreyasNaik0101/fix/remediate-mydramalist
fix(sites): Remediate false positive for Mydramalist
2025-10-02 19:40:47 -04:00
Paul Pfeister c55deab3a2
Merge pull request #2561 from shreyasNaik0101/fix/remediate-deviantart
fix(sites): Remediate false positive for DeviantArt
2025-10-02 19:37:00 -04:00
Paul Pfeister edcb697793
Merge pull request #2564 from shreyasNaik0101/fix/remediate-allmylinks
fix(sites): Remediate false positive for AllMyLinks
2025-10-02 19:36:43 -04:00
shreyasNaik0101 d314d75db1 fix(sites): Remediate false positive for Mydramalist 2025-10-03 04:43:05 +05:30
shreyasNaik0101 c89a52caf7 fix(sites): Remediate false positive for AllMyLinks 2025-10-03 04:25:46 +05:30
Paul Pfeister 9c18cfe273
Merge pull request #2563 from sherlock-project/chore/update-co
chore: update code owners
2025-10-02 18:25:59 -04:00
shreyasNaik0101 779d4c33f4 fix: Remove username_unclaimed as requested 2025-10-03 03:55:03 +05:30
Paul Pfeister 072c24687b
Merge pull request #2558 from hanjm-github/master
feat: Add some popular website in Korea
2025-10-02 18:22:42 -04:00
Paul Pfeister b811b2bd47
chore: update code owners 2025-10-02 18:21:20 -04:00
shreyasNaik0101 355bfbd328 fix(sites): Remediate false positive for DeviantArt 2025-10-03 00:42:07 +05:30
JongMyeong HAN 7b3632bdad
Add comment to site 'namuwiki'
Co-authored-by: Paul Pfeister <code@pfeister.dev>
2025-10-03 04:00:41 +09:00
Ethan Zhang 4fe41f09ff Removed duplicate Bluesky entry in data.json 2025-10-02 12:42:47 +10:00
JongMyeong HAN cd7c52e4fa
Feat: Add tistory 2025-10-01 00:44:55 +09:00
JongMyeong HAN 86140af50e
feat: Add SOOP 2025-10-01 00:44:02 +09:00
JongMyeong HAN e5cd5e5bfe
feat: Add namuwiki 2025-10-01 00:43:21 +09:00
JongMyeong HAN dc89f1cd27
feat: Add dcinside 2025-10-01 00:41:23 +09:00
Paul Pfeister 388a1e06d4
Merge pull request #2459 from kareemeldahshoury/Issue#2442
Fix Issue #2442: Added support for Aparat
2025-09-20 20:47:37 -04:00
Paul Pfeister 61eeeb7876
Merge branch 'master' into Issue#2442 2025-09-20 20:45:09 -04:00
Paul Pfeister df7da4288c
fix(ci): scoping 2025-09-20 20:44:38 -04:00
Paul Pfeister 70896f1da4
Merge branch 'master' into Issue#2442 2025-09-20 20:26:14 -04:00
Paul Pfeister 0a38cad926
fix(ci): issue write permission 2025-09-20 20:24:41 -04:00
Paul Pfeister 1e38fb6f7b
Merge branch 'master' into Issue#2442 2025-09-20 20:21:48 -04:00
Paul Pfeister 9b3dc3e581
fix(ci): issue write permission 2025-09-20 20:21:28 -04:00
Paul Pfeister 37b30602fd
Merge branch 'master' into Issue#2442 2025-09-20 20:12:21 -04:00
Paul Pfeister 7afdee4c58
fix: incorrect method 2025-09-20 20:09:44 -04:00
Paul Pfeister d4d8e01e31
chore: remove dead site
Fixes: #2433
2025-09-20 19:45:34 -04:00
Paul Pfeister e5e0da00fe
Merge pull request #2549 from sherlock-project/add/instapaper
feat: add instapaper
2025-09-20 18:13:30 -04:00
Paul Pfeister dc61cdc7a4
chore: set request method 2025-09-20 18:10:33 -04:00
Paul Pfeister 0fa2e1afc7
chore: cleanup everything 2025-09-20 18:09:44 -04:00
Paul Pfeister 7ca90ba728
ci: test result summarization 2025-09-20 18:06:25 -04:00
Paul Pfeister cd6fa5bb30
ci: fix the thing 2025-09-20 18:04:42 -04:00
Paul Pfeister fa05641661
ci: improve validation 2025-09-20 17:43:00 -04:00
Paul Pfeister 97ba4e8616
fix(ci): validation issue 2025-09-20 15:39:01 -04:00
Paul Pfeister 9882478fb5
feat: add instapaper 2025-09-20 15:05:44 -04:00
Paul Pfeister 9f5b7e1846
fix(validation ci): parsing and presentation 2025-09-20 15:02:43 -04:00
Paul Pfeister 05afac7082
Merge pull request #2548 from sherlock-project/feature/automatic-testing
Automatically test modified targets upon PR
2025-09-20 14:47:38 -04:00
Paul Pfeister ae362b0f02
ci: automatically validate modified targets on pr 2025-09-20 14:44:19 -04:00
Paul Pfeister 435540606e
chore: add typedef 2025-09-20 13:49:29 -04:00
Paul Pfeister 96aa12c140
Merge pull request #2546 from rezocrypt/add-laracast-support
Added Laracast support
2025-09-20 13:38:21 -04:00
My Name 9560355a7c Added Laracast support 2025-09-18 10:23:09 +04:00
Paul Pfeister b44ac231c1
chore: move SSOT to pyproject.toml
Co-authored-by: ByteXenon <125568681+ByteXenon@users.noreply.github.com>
2025-09-17 17:47:45 -04:00
Paul Pfeister 7ff3924f0b
ci(exclusions): ensure unstaging and removal of tmp 2025-09-17 17:17:49 -04:00
Paul Pfeister 39c3729524
ci(exclusions): fix loss of untracked list 2025-09-17 14:09:15 -04:00
Paul Pfeister faddcbd15f
ci(exclusions): fix loss of untracked list 2025-09-17 14:03:51 -04:00
Paul Pfeister 78a2d309d1
ci(exclusions): fix loss of untracked list 2025-09-17 13:55:42 -04:00
Paul Pfeister 35940e7584
fix: ignore exclusions list on parameterization for false positive tests 2025-09-17 13:44:02 -04:00
manjushsh 4706323976 data: add hive blog 2025-06-27 20:05:01 +05:30
manjushsh 4721c7f553 data: Add code sandboxio 2025-06-27 19:42:23 +05:30
kareemeldahshoury de81f38622 Fix Issue #2442: Added support for Aparat 2025-04-29 15:25:31 -05:00
Pallavi Kathait 193de54b6d
Update site-list.py
These changes improve readability and maintain the functionality of the original code.
2024-09-29 21:31:19 +05:30
Paul Pfeister 2016892e64
Remove torrequest dep
Not sure why it's not in my patch file, but I was removing via sed in my spec instead.
2024-06-28 23:39:38 -04:00
Paul Pfeister 44ad8f506a
Lint 2024-06-28 23:38:44 -04:00
Siddharth Dushantha cfa4097df9 removed support for tor 2024-06-26 21:57:11 +02:00
17 changed files with 1011 additions and 309 deletions

View File

@ -65,7 +65,7 @@ The Actor provides three types of outputs:
| Field | Type | Required | Description |
|-------|------|----------|-------------|
| `username` | string | Yes | Username the search was conducted for |
| `links` | arrray | Yes | Array with found links to the social media |
| `links` | array | Yes | Array with found links to the social media |
| `links[]`| string | No | URL to the account
### Example Dataset Item (JSON)

2
.github/CODEOWNERS vendored
View File

@ -1,5 +1,5 @@
### REPOSITORY
/.github/CODEOWNERS @sdushantha
/.github/CODEOWNERS @sdushantha @ppfeister
/.github/FUNDING.yml @sdushantha
/LICENSE @sdushantha

View File

@ -45,9 +45,9 @@ jobs:
run: |
git fetch origin exclusions || true
if git show origin/exclusions:exclusions.txt >/dev/null 2>&1; then
if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
# If the exclusions branch and file exist, compare
if git diff --quiet origin/exclusions -- exclusions.txt; then
if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
else
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
@ -71,10 +71,19 @@ jobs:
git config user.name "Paul Pfeister (automation)"
git config user.email "code@pfeister.dev"
mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
git fetch origin exclusions || true # Allows creation of branch if deleted
git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
git add false_positive_exclusions.txt
git stash pop || true
mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
git rm -f false_positive_exclusions.txt.tmp || true
git add false_positive_exclusions.txt
git commit -m "auto: update exclusions list" || echo "No changes to commit"
git push origin exclusions

View File

@ -11,6 +11,7 @@ on:
- '**/*.py'
- '**/*.ini'
- '**/*.toml'
- 'Dockerfile'
push:
branches:
- master
@ -21,11 +22,13 @@ on:
- '**/*.py'
- '**/*.ini'
- '**/*.toml'
- 'Dockerfile'
jobs:
tox-lint:
# Linting is ran through tox to ensure that the same linter is used by local runners
runs-on: ubuntu-latest
# Linting is ran through tox to ensure that the same linter
# is used by local runners
steps:
- uses: actions/checkout@v4
- name: Set up linting environment
@ -41,7 +44,8 @@ jobs:
tox-matrix:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false # We want to know what specicic versions it fails on
# We want to know what specicic versions it fails on
fail-fast: false
matrix:
os: [
ubuntu-latest,
@ -67,3 +71,22 @@ jobs:
pip install tox-gh-actions
- name: Run tox
run: tox
docker-build-test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get version from pyproject.toml
id: get-version
run: |
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Build Docker image
run: |
docker build \
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
-t sherlock-test:latest .
- name: Test Docker image runs
run: docker run --rm sherlock-test:latest --version

View File

@ -0,0 +1,126 @@
name: Modified Target Validation
on:
pull_request_target:
branches:
- master
paths:
- "sherlock_project/resources/data.json"
jobs:
validate-modified-targets:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v5
with:
# Checkout the base branch but fetch all history to avoid a second fetch call
ref: ${{ github.base_ref }}
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Install Poetry
uses: abatilo/actions-poetry@v4
with:
poetry-version: "latest"
- name: Install dependencies
run: |
poetry install --no-interaction --with dev
- name: Prepare JSON versions for comparison
run: |
# Fetch only the PR's branch head (single network call in this step)
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
# Find the merge-base commit between the target branch and the PR branch
MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
echo "Comparing PR head against merge-base commit: $MERGE_BASE"
# Safely extract the file from the PR's head and the merge-base commit
git show pr:sherlock_project/resources/data.json > data.json.head
git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
# CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
# This ensures that pytest runs against the new, updated file.
cp data.json.head sherlock_project/resources/data.json
- name: Discover modified targets
id: discover-modified
run: |
CHANGED=$(
python - <<'EOF'
import json
import sys
try:
with open("data.json.base") as f: base = json.load(f)
with open("data.json.head") as f: head = json.load(f)
except FileNotFoundError as e:
print(f"Error: Could not find {e.filename}", file=sys.stderr)
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
sys.exit(1)
changed = []
for k, v in head.items():
if k not in base or base[k] != v:
changed.append(k)
print(",".join(sorted(changed)))
EOF
)
# Preserve changelist
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
- name: Validate remote manifest against local schema
if: steps.discover-modified.outputs.changed_targets != ''
run: |
poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
# --- The rest of the steps below are unchanged ---
- name: Validate modified targets
if: steps.discover-modified.outputs.changed_targets != ''
continue-on-error: true
run: |
poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
--chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
--junitxml=validation_results.xml
- name: Prepare validation summary
if: steps.discover-modified.outputs.changed_targets != ''
id: prepare-summary
run: |
summary=$(
poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
)
echo "$summary" > validation_summary.md
- name: Announce validation results
if: steps.discover-modified.outputs.changed_targets != ''
uses: actions/github-script@v8
with:
script: |
const fs = require('fs');
const body = fs.readFileSync('validation_summary.md', 'utf8');
await github.rest.issues.createComment({
issue_number: context.payload.pull_request.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body,
});
- name: This step shows as ran when no modifications are found
if: steps.discover-modified.outputs.changed_targets == ''
run: |
echo "No modified targets found"

View File

@ -4,7 +4,7 @@
# 3. Build image with BOTH latest and version tags
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
FROM python:3.12-slim-bullseye as build
FROM python:3.12-slim-bullseye AS build
WORKDIR /sherlock
RUN pip3 install --no-cache-dir --upgrade pip

View File

@ -1,39 +1,45 @@
#!/usr/bin/env python
# This module generates the listing of supported sites which can be found in
# sites.md. It also organizes all the sites in alphanumeric order
# sites.mdx. It also organizes all the sites in alphanumeric order
import json
import os
DATA_REL_URI: str = "sherlock_project/resources/data.json"
DEFAULT_ENCODING = "utf-8"
# Read the data.json file
with open(DATA_REL_URI, "r", encoding="utf-8") as data_file:
with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
data: dict = json.load(data_file)
# Removes schema-specific keywords for proper processing
social_networks: dict = dict(data)
social_networks = data.copy()
social_networks.pop('$schema', None)
# Sort the social networks in alphanumeric order
social_networks: list = sorted(social_networks.items())
social_networks = sorted(social_networks.items())
# Make output dir where the site list will be written
os.mkdir("output")
# Write the list of supported sites to sites.md
with open("output/sites.mdx", "w") as site_file:
site_file.write("---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n")
# Write the list of supported sites to sites.mdx
with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
site_file.write("---\n")
site_file.write("title: 'List of supported sites'\n")
site_file.write("sidebarTitle: 'Supported sites'\n")
site_file.write("icon: 'globe'\n")
site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
site_file.write("---\n\n")
for social_network, info in social_networks:
url_main = info["urlMain"]
is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")
# Overwrite the data.json file with sorted data
with open(DATA_REL_URI, "w") as data_file:
with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
sorted_data = json.dumps(data, indent=2, sort_keys=True)
data_file.write(sorted_data)
data_file.write("\n")
data_file.write("\n") # Keep the newline after writing data
print("Finished updating supported site listing!")

View File

@ -0,0 +1,72 @@
#!/usr/bin/env python
# This module summarizes the results of site validation tests queued by
# workflow validate_modified_targets for presentation in Issue comments.
from defusedxml import ElementTree as ET
import sys
from pathlib import Path
def summarize_junit_xml(xml_path: Path) -> str:
tree = ET.parse(xml_path)
root = tree.getroot()
suite = root.find('testsuite')
pass_message: str = ":heavy_check_mark: &nbsp; Pass"
fail_message: str = ":x: &nbsp; Fail"
if suite is None:
raise ValueError("Invalid JUnit XML: No testsuite found")
summary_lines: list[str] = []
summary_lines.append("#### Automatic validation of changes\n")
summary_lines.append("| Target | F+ Check | F- Check |")
summary_lines.append("|---|---|---|")
failures = int(suite.get('failures', 0))
errors_detected: bool = False
results: dict[str, dict[str, str]] = {}
for testcase in suite.findall('testcase'):
test_name = testcase.get('name').split('[')[0]
site_name = testcase.get('name').split('[')[1].rstrip(']')
failure = testcase.find('failure')
error = testcase.find('error')
if site_name not in results:
results[site_name] = {}
if test_name == "test_false_neg":
results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
elif test_name == "test_false_pos":
results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
if error is not None:
errors_detected = True
for result in results:
summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
if failures > 0:
summary_lines.append("\n___\n" +
"\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
" will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
if errors_detected:
summary_lines.append("\n___\n" +
"\n**Errors were detected during validation. Please review the workflow logs.**")
return "\n".join(summary_lines)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: summarize_site_validation.py <junit-xml-file>")
sys.exit(1)
xml_path: Path = Path(sys.argv[1])
if not xml_path.is_file():
print(f"Error: File '{xml_path}' does not exist.")
sys.exit(1)
summary: str = summarize_junit_xml(xml_path)
print(summary)

View File

@ -124,7 +124,7 @@ Thank you to everyone who has contributed to Sherlock! ❤️
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
</a>
## Star history
## Star History
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />

View File

@ -8,8 +8,7 @@ source = "init"
[tool.poetry]
name = "sherlock-project"
# single source of truth for version is __init__.py
version = "0"
version = "0.16.0"
description = "Hunt down social media accounts by username across social networks"
license = "MIT"
authors = [
@ -30,6 +29,10 @@ classifiers = [
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Security"
]
homepage = "https://sherlockproject.xyz/"
@ -47,12 +50,9 @@ PySocks = "^1.7.0"
requests = "^2.22.0"
requests-futures = "^1.0.0"
stem = "^1.8.0"
torrequest = "^0.1.0"
pandas = "^2.2.1"
openpyxl = "^3.0.10"
[tool.poetry.extras]
tor = ["torrequest"]
tomli = "^2.2.1"
[tool.poetry.group.dev.dependencies]
jsonschema = "^4.0.0"
@ -60,5 +60,9 @@ rstr = "^3.2.2"
pytest = "^8.4.2"
pytest-xdist = "^3.8.0"
[tool.poetry.group.ci.dependencies]
defusedxml = "^0.7.1"
[tool.poetry.scripts]
sherlock = 'sherlock_project.sherlock:main'

View File

@ -5,11 +5,26 @@ networks.
"""
from importlib.metadata import version as pkg_version, PackageNotFoundError
import pathlib
import tomli
def get_version() -> str:
"""Fetch the version number of the installed package."""
try:
return pkg_version("sherlock_project")
except PackageNotFoundError:
pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
with pyproject_path.open("rb") as f:
pyproject_data = tomli.load(f)
return pyproject_data["tool"]["poetry"]["version"]
# This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
import_error_test_var = None
__shortname__ = "Sherlock"
__longname__ = "Sherlock: Find Usernames Across Social Networks"
__version__ = "0.16.0"
__version__ = get_version()
forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"

File diff suppressed because it is too large Load Diff

View File

@ -1,80 +1,149 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Sherlock Target Manifest",
"description": "Social media targets to probe for the existence of known usernames",
"type": "object",
"properties": {
"$schema": { "type": "string" }
},
"patternProperties": {
"^(?!\\$).*?$": {
"type": "object",
"description": "Target name and associated information (key should be human readable name)",
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
"properties": {
"url": { "type": "string" },
"urlMain": { "type": "string" },
"urlProbe": { "type": "string" },
"username_claimed": { "type": "string" },
"regexCheck": { "type": "string" },
"isNSFW": { "type": "boolean" },
"headers": { "type": "object" },
"request_payload": { "type": "object" },
"__comment__": {
"type": "string",
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
},
"tags": {
"oneOf": [
{ "$ref": "#/$defs/tag" },
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
]
},
"request_method": {
"type": "string",
"enum": [ "GET", "POST", "HEAD", "PUT" ]
},
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Sherlock Target Manifest",
"description": "Social media targets to probe for the existence of known usernames",
"type": "object",
"properties": {
"$schema": { "type": "string" }
},
"patternProperties": {
"^(?!\\$).*?$": {
"type": "object",
"description": "Target name and associated information (key should be human readable name)",
"required": ["url", "urlMain", "errorType", "username_claimed"],
"properties": {
"url": { "type": "string" },
"urlMain": { "type": "string" },
"urlProbe": { "type": "string" },
"username_claimed": { "type": "string" },
"regexCheck": { "type": "string" },
"isNSFW": { "type": "boolean" },
"headers": { "type": "object" },
"request_payload": { "type": "object" },
"__comment__": {
"type": "string",
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
},
"tags": {
"oneOf": [
{ "$ref": "#/$defs/tag" },
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
]
},
"request_method": {
"type": "string",
"enum": ["GET", "POST", "HEAD", "PUT"]
},
"errorType": {
"oneOf": [
{
"type": "string",
"enum": ["message", "response_url", "status_code"]
},
{
"type": "array",
"items": {
"type": "string",
"enum": ["message", "response_url", "status_code"]
}
}
]
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
},
"dependencies": {
"errorMsg": {
"oneOf": [
{ "properties": { "errorType": { "const": "message" } } },
{
"properties": {
"errorType": {
"type": "string",
"enum": [ "message", "response_url", "status_code" ]
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
},
"dependencies": {
"errorMsg": {
"properties" : { "errorType": { "const": "message" } }
},
"errorUrl": {
"properties": { "errorType": { "const": "response_url" } }
},
"errorCode": {
"properties": { "errorType": { "const": "status_code" } }
"type": "array",
"contains": { "const": "message" }
}
},
"if": { "properties": { "errorType": { "const": "message" } } },
"then": { "required": [ "errorMsg" ] },
"else": {
"if": { "properties": { "errorType": { "const": "response_url" } } },
"then": { "required": [ "errorUrl" ] }
},
"additionalProperties": false
}
}
]
},
"errorUrl": {
"oneOf": [
{ "properties": { "errorType": { "const": "response_url" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "response_url" }
}
}
}
]
},
"errorCode": {
"oneOf": [
{ "properties": { "errorType": { "const": "status_code" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "status_code" }
}
}
}
]
}
},
"additionalProperties": false,
"$defs": {
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
},
"allOf": [
{
"if": {
"anyOf": [
{ "properties": { "errorType": { "const": "message" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "message" }
}
}
}
]
},
"then": { "required": ["errorMsg"] }
},
{
"if": {
"anyOf": [
{ "properties": { "errorType": { "const": "response_url" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "response_url" }
}
}
}
]
},
"then": { "required": ["errorUrl"] }
}
],
"additionalProperties": false
}
},
"additionalProperties": false,
"$defs": {
"tag": { "type": "string", "enum": ["adult", "gaming"] }
}
}

View File

@ -171,8 +171,6 @@ def sherlock(
username: str,
site_data: dict[str, dict[str, str]],
query_notify: QueryNotify,
tor: bool = False,
unique_tor: bool = False,
dump_response: bool = False,
proxy: Optional[str] = None,
timeout: int = 60,
@ -188,8 +186,6 @@ def sherlock(
query_notify -- Object with base type of QueryNotify().
This will be used to notify the caller about
query results.
tor -- Boolean indicating whether to use a tor circuit for the requests.
unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
proxy -- String indicating the proxy URL
timeout -- Time in seconds to wait before timing out request.
Default is 60 seconds.
@ -210,32 +206,9 @@ def sherlock(
# Notify caller that we are starting the query.
query_notify.start(username)
# Create session based on request methodology
if tor or unique_tor:
try:
from torrequest import TorRequest # noqa: E402
except ImportError:
print("Important!")
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.")
print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n")
sys.exit(query_notify.finish())
print("Important!")
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
# Requests using Tor obfuscation
try:
underlying_request = TorRequest()
except OSError:
print("Tor not found in system path. Unable to continue.\n")
sys.exit(query_notify.finish())
underlying_session = underlying_request.session
else:
# Normal requests
underlying_session = requests.session()
underlying_request = requests.Request()
# Normal requests
underlying_session = requests.session()
# Limit number of workers to 20.
# This is probably vastly overkill.
@ -359,15 +332,10 @@ def sherlock(
# Store future in data for access later
net_info["request_future"] = future
# Reset identify for tor (if needed)
if unique_tor:
underlying_request.reset_identity()
# Add this site's results into final dictionary with all the other results.
results_total[social_network] = results_site
# Open the file containing account links
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
for social_network, net_info in site_data.items():
# Retrieve results again
results_site = results_total.get(social_network)
@ -381,6 +349,8 @@ def sherlock(
# Get the expected error type
error_type = net_info["errorType"]
if isinstance(error_type, str):
error_type: list[str] = [error_type]
# Retrieve future and ensure it has finished
future = net_info["request_future"]
@ -425,58 +395,60 @@ def sherlock(
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
query_status = QueryStatus.WAF
elif error_type == "message":
# error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
elif error_type == "status_code":
error_codes = net_info.get("errorCode")
query_status = QueryStatus.CLAIMED
# Type consistency, allowing for both singlets and lists in manifest
if isinstance(error_codes, int):
error_codes = [error_codes]
if error_codes is not None and r.status_code in error_codes:
query_status = QueryStatus.AVAILABLE
elif r.status_code >= 300 or r.status_code < 200:
query_status = QueryStatus.AVAILABLE
elif error_type == "response_url":
# For this detection method, we have turned off the redirect.
# So, there is no need to check the response URL: it will always
# match the request. Instead, we will ensure that the response
# code indicates that the request was successful (i.e. no 404, or
# forward to some odd redirect).
if 200 <= r.status_code < 300:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
else:
# It should be impossible to ever get here...
raise ValueError(
f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
)
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
error_context = f"Unknown error type '{error_type}' for {social_network}"
query_status = QueryStatus.UNKNOWN
else:
if "message" in error_type:
# error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
error_codes = net_info.get("errorCode")
query_status = QueryStatus.CLAIMED
# Type consistency, allowing for both singlets and lists in manifest
if isinstance(error_codes, int):
error_codes = [error_codes]
if error_codes is not None and r.status_code in error_codes:
query_status = QueryStatus.AVAILABLE
elif r.status_code >= 300 or r.status_code < 200:
query_status = QueryStatus.AVAILABLE
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
# For this detection method, we have turned off the redirect.
# So, there is no need to check the response URL: it will always
# match the request. Instead, we will ensure that the response
# code indicates that the request was successful (i.e. no 404, or
# forward to some odd redirect).
if 200 <= r.status_code < 300:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if dump_response:
print("+++++++++++++++++++++")
@ -596,22 +568,6 @@ def main():
dest="output",
help="If using single username, the output of the result will be saved to this file.",
)
parser.add_argument(
"--tor",
"-t",
action="store_true",
dest="tor",
default=False,
help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
)
parser.add_argument(
"--unique-tor",
"-u",
action="store_true",
dest="unique_tor",
default=False,
help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
)
parser.add_argument(
"--csv",
action="store_true",
@ -719,12 +675,22 @@ def main():
help="Include checking of NSFW sites from default list.",
)
# TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed
# in future release
parser.add_argument(
"--no-txt",
action="store_true",
dest="no_txt",
default=False,
help="Disable creation of a txt file",
help="Disable creation of a txt file - WILL BE DEPRECATED",
)
parser.add_argument(
"--txt",
action="store_true",
dest="output_txt",
default=False,
help="Enable creation of a txt file",
)
parser.add_argument(
@ -742,7 +708,7 @@ def main():
# Check for newer version of Sherlock. If it exists, let the user know about it
try:
latest_release_raw = requests.get(forge_api_latest_release).text
latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
latest_release_json = json_loads(latest_release_raw)
latest_remote_tag = latest_release_json["tag_name"]
@ -755,22 +721,10 @@ def main():
except Exception as error:
print(f"A problem occurred while checking for an update: {error}")
# Argument check
# TODO regex check on args.proxy
if args.tor and (args.proxy is not None):
raise Exception("Tor and Proxy cannot be set at the same time.")
# Make prompts
if args.proxy is not None:
print("Using the proxy: " + args.proxy)
if args.tor or args.unique_tor:
print("Using Tor to make requests")
print(
"Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
)
if args.no_color:
# Disable color output.
init(strip=True, convert=False)
@ -802,7 +756,7 @@ def main():
if args.json_file.isnumeric():
pull_number = args.json_file
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
pull_request_raw = requests.get(pull_url).text
pull_request_raw = requests.get(pull_url, timeout=10).text
pull_request_json = json_loads(pull_request_raw)
# Check if it's a valid pull request
@ -871,8 +825,6 @@ def main():
username,
site_data,
query_notify,
tor=args.tor,
unique_tor=args.unique_tor,
dump_response=args.dump_response,
proxy=args.proxy,
timeout=args.timeout,
@ -888,7 +840,7 @@ def main():
else:
result_file = f"{username}.txt"
if not args.no_txt:
if args.output_txt:
with open(result_file, "w", encoding="utf-8") as file:
exists_counter = 0
for website_name in results:
@ -973,8 +925,8 @@ def main():
{
"username": usernames,
"name": names,
"url_main": url_main,
"url_user": url_user,
"url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
"url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
"exists": exists,
"http_status": http_status,
"response_time_s": response_time_s,

View File

@ -129,7 +129,7 @@ class SitesInformation:
if data_file_path.lower().startswith("http"):
# Reference is to a URL.
try:
response = requests.get(url=data_file_path)
response = requests.get(url=data_file_path, timeout=30)
except Exception as error:
raise FileNotFoundError(
f"Problem while attempting to access data file URL '{data_file_path}': {error}"
@ -166,7 +166,7 @@ class SitesInformation:
if honor_exclusions:
try:
response = requests.get(url=EXCLUSIONS_URL)
response = requests.get(url=EXCLUSIONS_URL, timeout=10)
if response.status_code == 200:
exclusions = response.text.splitlines()
exclusions = [exclusion.strip() for exclusion in exclusions]

View File

@ -4,9 +4,9 @@ import urllib
import pytest
from sherlock_project.sites import SitesInformation
def fetch_local_manifest() -> dict[str, dict[str, str]]:
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
sites_iterable = {site.name: site.information for site in sites_obj}
def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
return sites_iterable
@pytest.fixture()
@ -25,9 +25,27 @@ def remote_schema():
schemadat = json.load(remoteschema)
yield schemadat
def pytest_addoption(parser):
parser.addoption(
"--chunked-sites",
action="store",
default=None,
help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
)
def pytest_generate_tests(metafunc):
if "chunked_sites" in metafunc.fixturenames:
sites_info = fetch_local_manifest()
sites_info = fetch_local_manifest(honor_exclusions=False)
# Ingest and apply site selections
site_filter: str | None = metafunc.config.getoption("--chunked-sites")
if site_filter:
selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
sites_info = {
site: data for site, data in sites_info.items()
if site in selected_sites
}
params = [{name: data} for name, data in sites_info.items()]
ids = list(sites_info.keys())
metafunc.parametrize("chunked_sites", params, ids=ids)

View File

@ -16,6 +16,7 @@ def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUAN
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
def replace_upper_bound(match: re.Match) -> str: # type: ignore
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
nonlocal upper_bound
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
return f'{{{lower_bound},{upper_bound}}}'