chore: add error message to the codesandbox

This commit is contained in:
manjushsh 2025-10-05 15:22:37 +05:30
commit 738df6c362
19 changed files with 941 additions and 289 deletions

2
.github/CODEOWNERS vendored
View File

@ -1,5 +1,5 @@
### REPOSITORY ### REPOSITORY
/.github/CODEOWNERS @sdushantha /.github/CODEOWNERS @sdushantha @ppfeister
/.github/FUNDING.yml @sdushantha /.github/FUNDING.yml @sdushantha
/LICENSE @sdushantha /LICENSE @sdushantha

89
.github/workflows/exclusions.yml vendored Normal file
View File

@ -0,0 +1,89 @@
name: Exclusions Updater
on:
schedule:
#- cron: '0 5 * * 0' # Runs at 05:00 every Sunday
- cron: '0 5 * * *' # Runs at 05:00 every day
workflow_dispatch:
jobs:
update-exclusions:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.13'
- name: Install Poetry
uses: abatilo/actions-poetry@v4
with:
poetry-version: 'latest'
- name: Install dependencies
run: |
poetry install --no-interaction --with dev
- name: Run false positive tests
run: |
$(poetry env activate)
pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
deactivate
- name: Parse false positive detections by desired categories
run: |
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
| sort -u > false_positive_exclusions.txt
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
| sort -u > waf_hits.txt
- name: Detect if exclusions list changed
id: detect_changes
run: |
git fetch origin exclusions || true
if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
# If the exclusions branch and file exist, compare
if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
else
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
fi
else
# If the exclusions branch or file do not exist, treat as changed
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Quantify and display results
run: |
FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
echo ">>> WAF hits:" && cat waf_hits.txt
- name: Commit and push exclusions list
if: steps.detect_changes.outputs.exclusions_changed == 'true'
run: |
git config user.name "Paul Pfeister (automation)"
git config user.email "code@pfeister.dev"
mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
git fetch origin exclusions || true # Allows creation of branch if deleted
git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
git stash pop || true
mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
git rm -f false_positive_exclusions.txt.tmp || true
git add false_positive_exclusions.txt
git commit -m "auto: update exclusions list" || echo "No changes to commit"
git push origin exclusions

View File

@ -11,6 +11,7 @@ on:
- '**/*.py' - '**/*.py'
- '**/*.ini' - '**/*.ini'
- '**/*.toml' - '**/*.toml'
- 'Dockerfile'
push: push:
branches: branches:
- master - master
@ -21,11 +22,13 @@ on:
- '**/*.py' - '**/*.py'
- '**/*.ini' - '**/*.ini'
- '**/*.toml' - '**/*.toml'
- 'Dockerfile'
jobs: jobs:
tox-lint: tox-lint:
# Linting is ran through tox to ensure that the same linter is used by local runners
runs-on: ubuntu-latest runs-on: ubuntu-latest
# Linting is ran through tox to ensure that the same linter
# is used by local runners
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up linting environment - name: Set up linting environment
@ -41,7 +44,8 @@ jobs:
tox-matrix: tox-matrix:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
fail-fast: false # We want to know what specicic versions it fails on # We want to know what specicic versions it fails on
fail-fast: false
matrix: matrix:
os: [ os: [
ubuntu-latest, ubuntu-latest,
@ -49,10 +53,10 @@ jobs:
macos-latest, macos-latest,
] ]
python-version: [ python-version: [
'3.9',
'3.10', '3.10',
'3.11', '3.11',
'3.12', '3.12',
'3.13',
] ]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -67,3 +71,22 @@ jobs:
pip install tox-gh-actions pip install tox-gh-actions
- name: Run tox - name: Run tox
run: tox run: tox
docker-build-test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get version from pyproject.toml
id: get-version
run: |
VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Build Docker image
run: |
docker build \
--build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
-t sherlock-test:latest .
- name: Test Docker image runs
run: docker run --rm sherlock-test:latest --version

View File

@ -0,0 +1,100 @@
name: Modified Target Validation
on:
pull_request_target:
branches:
- master
paths:
- "sherlock_project/resources/data.json"
jobs:
validate-modified-targets:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v5
with:
ref: ${{ github.base_ref }}
fetch-depth: 1
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.13'
- name: Install Poetry
uses: abatilo/actions-poetry@v4
with:
poetry-version: 'latest'
- name: Install dependencies
run: |
poetry install --no-interaction --with dev
- name: Drop in place updated manifest from base
run: |
cp sherlock_project/resources/data.json data.json.base
git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
cp sherlock_project/resources/data.json data.json.head
- name: Discover modified targets
id: discover-modified
run: |
CHANGED=$(
python - <<'EOF'
import json
with open("data.json.base") as f: base = json.load(f)
with open("data.json.head") as f: head = json.load(f)
changed = []
for k, v in head.items():
if k not in base or base[k] != v:
changed.append(k)
print(",".join(sorted(changed)))
EOF
)
# Preserve changelist
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
- name: Validate modified targets
if: steps.discover-modified.outputs.changed_targets != ''
continue-on-error: true
run: |
poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
--chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
--junitxml=validation_results.xml
- name: Prepare validation summary
if: steps.discover-modified.outputs.changed_targets != ''
id: prepare-summary
run: |
summary=$(
poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
)
echo "$summary" > validation_summary.md
- name: Announce validation results
if: steps.discover-modified.outputs.changed_targets != ''
uses: actions/github-script@v8
with:
script: |
const fs = require('fs');
const body = fs.readFileSync('validation_summary.md', 'utf8');
await github.rest.issues.createComment({
issue_number: context.payload.pull_request.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body,
});
- name: This step shows as ran when no modifications are found
if: steps.discover-modified.outputs.changed_targets == ''
run: |
echo "No modified targets found"

View File

@ -2,9 +2,9 @@
# 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py # 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
# 2. Update the VCS_REF tag to match the tagged version's FULL commit hash # 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
# 3. Build image with BOTH latest and version tags # 3. Build image with BOTH latest and version tags
# i.e. `docker build -t sherlock/sherlock:0.15.0 -t sherlock/sherlock:latest .` # i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
FROM python:3.12-slim-bullseye as build FROM python:3.12-slim-bullseye AS build
WORKDIR /sherlock WORKDIR /sherlock
RUN pip3 install --no-cache-dir --upgrade pip RUN pip3 install --no-cache-dir --upgrade pip

View File

@ -0,0 +1,72 @@
#!/usr/bin/env python
# This module summarizes the results of site validation tests queued by
# workflow validate_modified_targets for presentation in Issue comments.
from defusedxml import ElementTree as ET
import sys
from pathlib import Path
def summarize_junit_xml(xml_path: Path) -> str:
tree = ET.parse(xml_path)
root = tree.getroot()
suite = root.find('testsuite')
pass_message: str = ":heavy_check_mark: &nbsp; Pass"
fail_message: str = ":x: &nbsp; Fail"
if suite is None:
raise ValueError("Invalid JUnit XML: No testsuite found")
summary_lines: list[str] = []
summary_lines.append("#### Automatic validation of changes\n")
summary_lines.append("| Target | F+ Check | F- Check |")
summary_lines.append("|---|---|---|")
failures = int(suite.get('failures', 0))
errors_detected: bool = False
results: dict[str, dict[str, str]] = {}
for testcase in suite.findall('testcase'):
test_name = testcase.get('name').split('[')[0]
site_name = testcase.get('name').split('[')[1].rstrip(']')
failure = testcase.find('failure')
error = testcase.find('error')
if site_name not in results:
results[site_name] = {}
if test_name == "test_false_neg":
results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
elif test_name == "test_false_pos":
results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
if error is not None:
errors_detected = True
for result in results:
summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
if failures > 0:
summary_lines.append("\n___\n" +
"\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
" will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
if errors_detected:
summary_lines.append("\n___\n" +
"\n**Errors were detected during validation. Please review the workflow logs.**")
return "\n".join(summary_lines)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: summarize_site_validation.py <junit-xml-file>")
sys.exit(1)
xml_path: Path = Path(sys.argv[1])
if not xml_path.is_file():
print(f"Error: File '{xml_path}' does not exist.")
sys.exit(1)
summary: str = summarize_junit_xml(xml_path)
print(summary)

View File

@ -1,6 +1,6 @@
<p align=center> <p align="center">
<br> <br>
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png"/></a> <a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
<br> <br>
<span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span> <span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
<br> <br>
@ -15,8 +15,7 @@
</p> </p>
<p align="center"> <p align="center">
<img width="70%" height="70%" src="images/demo.png"/> <img width="70%" height="70%" src="images/demo.png" alt="demo"/>
</a>
</p> </p>
@ -115,14 +114,14 @@ $ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
}] }]
``` ```
Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmaticaly via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock). Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
## Credits ## Credits
Thank you to everyone who has contributed to Sherlock! ❤️ Thank you to everyone who has contributed to Sherlock! ❤️
<a href="https://github.com/sherlock-project/sherlock/graphs/contributors"> <a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" noZoom /> <img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
</a> </a>
## Star history ## Star history

View File

@ -1982,3 +1982,16 @@ __2025-02-16 :__ Unsure if any way to view profiles exists now
"username_claimed": "t3dotgg" "username_claimed": "t3dotgg"
} }
``` ```
## TorrentGalaxy
__2025-07-06 :__ Site appears to have gone offline in March and hasn't come back
```json
"TorrentGalaxy": {
"errorMsg": "<title>TGx:Can't show details</title>",
"errorType": "message",
"regexCheck": "^[A-Za-z0-9]{3,15}$",
"url": "https://torrentgalaxy.to/profile/{}",
"urlMain": "https://torrentgalaxy.to/",
"username_claimed": "GalaxyRG"
},
```

View File

@ -8,8 +8,7 @@ source = "init"
[tool.poetry] [tool.poetry]
name = "sherlock-project" name = "sherlock-project"
# single source of truth for version is __init__.py version = "0.16.0"
version = "0"
description = "Hunt down social media accounts by username across social networks" description = "Hunt down social media accounts by username across social networks"
license = "MIT" license = "MIT"
authors = [ authors = [
@ -47,15 +46,19 @@ PySocks = "^1.7.0"
requests = "^2.22.0" requests = "^2.22.0"
requests-futures = "^1.0.0" requests-futures = "^1.0.0"
stem = "^1.8.0" stem = "^1.8.0"
torrequest = "^0.1.0"
pandas = "^2.2.1" pandas = "^2.2.1"
openpyxl = "^3.0.10" openpyxl = "^3.0.10"
tomli = "^2.2.1"
[tool.poetry.extras]
tor = ["torrequest"]
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
jsonschema = "^4.0.0" jsonschema = "^4.0.0"
rstr = "^3.2.2"
pytest = "^8.4.2"
pytest-xdist = "^3.8.0"
[tool.poetry.group.ci.dependencies]
defusedxml = "^0.7.1"
[tool.poetry.scripts] [tool.poetry.scripts]
sherlock = 'sherlock_project.sherlock:main' sherlock = 'sherlock_project.sherlock:main'

View File

@ -1,4 +1,7 @@
[pytest] [pytest]
addopts = --strict-markers addopts = --strict-markers -m "not validate_targets"
markers = markers =
online: mark tests are requiring internet access. online: mark tests are requiring internet access.
validate_targets: mark tests for sweeping manifest validation (sends many requests).
validate_targets_fp: validate_targets, false positive tests only.
validate_targets_fn: validate_targets, false negative tests only.

View File

@ -5,11 +5,26 @@ networks.
""" """
from importlib.metadata import version as pkg_version, PackageNotFoundError
import pathlib
import tomli
def get_version() -> str:
"""Fetch the version number of the installed package."""
try:
return pkg_version("sherlock_project")
except PackageNotFoundError:
pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
with pyproject_path.open("rb") as f:
pyproject_data = tomli.load(f)
return pyproject_data["tool"]["poetry"]["version"]
# This variable is only used to check for ImportErrors induced by users running as script rather than as module or package # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
import_error_test_var = None import_error_test_var = None
__shortname__ = "Sherlock" __shortname__ = "Sherlock"
__longname__ = "Sherlock: Find Usernames Across Social Networks" __longname__ = "Sherlock: Find Usernames Across Social Networks"
__version__ = "0.15.0" __version__ = get_version()
forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"

View File

@ -79,13 +79,13 @@
"username_claimed": "pink" "username_claimed": "pink"
}, },
"AllMyLinks": { "AllMyLinks": {
"errorMsg": "Not Found", "errorMsg": "Page not found",
"errorType": "message", "errorType": "message",
"regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$", "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
"url": "https://allmylinks.com/{}", "url": "https://allmylinks.com/{}",
"urlMain": "https://allmylinks.com/", "urlMain": "https://allmylinks.com/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"AniWorld": { "AniWorld": {
"errorMsg": "Dieses Profil ist nicht verf\u00fcgbar", "errorMsg": "Dieses Profil ist nicht verf\u00fcgbar",
"errorType": "message", "errorType": "message",
@ -115,12 +115,20 @@
"username_claimed": "lio24d" "username_claimed": "lio24d"
}, },
"Apple Discussions": { "Apple Discussions": {
"errorMsg": "The page you tried was not found. You may have used an outdated link or may have typed the address (URL) incorrectly.", "errorMsg": "Looking for something in Apple Support Communities?",
"errorType": "message", "errorType": "message",
"url": "https://discussions.apple.com/profile/{}", "url": "https://discussions.apple.com/profile/{}",
"urlMain": "https://discussions.apple.com", "urlMain": "https://discussions.apple.com",
"username_claimed": "jason" "username_claimed": "jason"
}, },
"Aparat": {
"errorType": "status_code",
"request_method": "GET",
"url": "https://www.aparat.com/{}/",
"urlMain": "https://www.aparat.com/",
"urlProbe": "https://www.aparat.com/api/fa/v1/user/user/information/username/{}",
"username_claimed": "jadi"
},
"Archive of Our Own": { "Archive of Our Own": {
"errorType": "status_code", "errorType": "status_code",
"regexCheck": "^[^.]*?$", "regexCheck": "^[^.]*?$",
@ -250,6 +258,12 @@
"urlMain": "https://www.blipfoto.com/", "urlMain": "https://www.blipfoto.com/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"Blitz Tactics": {
"errorType": "status_code",
"url": "https://blitztactics.com/{}",
"urlMain": "https://blitztactics.com/",
"username_claimed": "Lance5500"
},
"Blogger": { "Blogger": {
"errorType": "status_code", "errorType": "status_code",
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
@ -257,13 +271,12 @@
"urlMain": "https://www.blogger.com/", "urlMain": "https://www.blogger.com/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"BoardGameGeek": { "Bluesky": {
"errorType": "message", "errorType": "status_code",
"regexCheck": "^[a-zA-Z0-9_]*$", "url": "https://bsky.app/profile/{}.bsky.social",
"errorMsg": "User not found", "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social",
"url": "https://boardgamegeek.com/user/{}", "urlMain": "https://bsky.app/",
"urlMain": "https://boardgamegeek.com", "username_claimed": "mcuban"
"username_claimed": "blue"
}, },
"BongaCams": { "BongaCams": {
"errorType": "status_code", "errorType": "status_code",
@ -278,6 +291,14 @@
"urlMain": "https://www.bookcrossing.com/", "urlMain": "https://www.bookcrossing.com/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"BoardGameGeek": {
"errorMsg": "\"isValid\":true",
"errorType": "message",
"url": "https://boardgamegeek.com/user/{}",
"urlMain": "https://boardgamegeek.com/",
"urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}",
"username_claimed": "blue"
},
"BraveCommunity": { "BraveCommunity": {
"errorType": "status_code", "errorType": "status_code",
"url": "https://community.brave.com/u/{}/", "url": "https://community.brave.com/u/{}/",
@ -357,6 +378,12 @@
"urlMain": "https://career.habr.com/", "urlMain": "https://career.habr.com/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"CashApp": {
"errorType": "status_code",
"url": "https://cash.app/${}",
"urlMain": "https://cash.app",
"username_claimed": "hotdiggitydog"
},
"Championat": { "Championat": {
"errorType": "status_code", "errorType": "status_code",
"url": "https://www.championat.com/user/{}", "url": "https://www.championat.com/user/{}",
@ -479,7 +506,8 @@
"username_claimed": "hacker" "username_claimed": "hacker"
}, },
"Code Sandbox": { "Code Sandbox": {
"errorType": "status_code", "errorType": "message",
"errorMsg": "Whoops, page not found",
"url": "https://codesandbox.io/u/{}", "url": "https://codesandbox.io/u/{}",
"urlMain": "https://codesandbox.io", "urlMain": "https://codesandbox.io",
"username_claimed": "icyjoseph" "username_claimed": "icyjoseph"
@ -551,8 +579,7 @@
"username_claimed": "brown" "username_claimed": "brown"
}, },
"CyberDefenders": { "CyberDefenders": {
"errorMsg": "<title>Blue Team Training for SOC analysts and DFIR - CyberDefenders</title>", "errorType": "status_code",
"errorType": "message",
"regexCheck": "^[^\\/:*?\"<>|@]{3,50}$", "regexCheck": "^[^\\/:*?\"<>|@]{3,50}$",
"request_method": "GET", "request_method": "GET",
"url": "https://cyberdefenders.org/p/{}", "url": "https://cyberdefenders.org/p/{}",
@ -579,6 +606,12 @@
"urlMain": "https://www.dailymotion.com/", "urlMain": "https://www.dailymotion.com/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"dcinside": {
"errorType": "status_code",
"url": "https://gallog.dcinside.com/{}",
"urlMain": "https://www.dcinside.com/",
"username_claimed": "anrbrb"
},
"Dealabs": { "Dealabs": {
"errorMsg": "La page que vous essayez", "errorMsg": "La page que vous essayez",
"errorType": "message", "errorType": "message",
@ -587,20 +620,21 @@
"urlMain": "https://www.dealabs.com/", "urlMain": "https://www.dealabs.com/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"DeviantART": { "DeviantArt": {
"errorType": "status_code", "errorType": "message",
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", "errorMsg": "Llama Not Found",
"url": "https://{}.deviantart.com", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"urlMain": "https://deviantart.com", "url": "https://www.deviantart.com/{}",
"username_claimed": "blue" "urlMain": "https://www.deviantart.com/",
}, "username_claimed": "blue"
},
"DigitalSpy": { "DigitalSpy": {
"errorMsg": "The page you were looking for could not be found.", "errorMsg": "The page you were looking for could not be found.",
"errorType": "message", "errorType": "message",
"url": "https://forums.digitalspy.com/profile/{}", "url": "https://forums.digitalspy.com/profile/{}",
"urlMain": "https://forums.digitalspy.com/", "urlMain": "https://forums.digitalspy.com/",
"username_claimed": "blue", "username_claimed": "blue",
"regexCheck": "^\\w{3,20}$" "regexCheck": "^\\w{3,20}$"
}, },
"Discogs": { "Discogs": {
"errorType": "status_code", "errorType": "status_code",
@ -786,13 +820,12 @@
"urlMain": "https://fosstodon.org/", "urlMain": "https://fosstodon.org/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"Freelance.habr": { "Framapiaf": {
"errorMsg": "<div class=\"icon_user_locked\"></div>", "errorType": "status_code",
"errorType": "message", "regexCheck": "^[a-zA-Z0-9_]{1,30}$",
"regexCheck": "^((?!\\.).)*$", "url": "https://framapiaf.org/@{}",
"url": "https://freelance.habr.com/freelancers/{}", "urlMain": "https://framapiaf.org",
"urlMain": "https://freelance.habr.com/", "username_claimed": "pylapp"
"username_claimed": "adam"
}, },
"Freelancer": { "Freelancer": {
"errorMsg": "\"users\":{}", "errorMsg": "\"users\":{}",
@ -1129,6 +1162,13 @@
"urlProbe": "https://imginn.com/{}", "urlProbe": "https://imginn.com/{}",
"username_claimed": "instagram" "username_claimed": "instagram"
}, },
"Instapaper": {
"errorType": "status_code",
"request_method": "GET",
"url": "https://www.instapaper.com/p/{}",
"urlMain": "https://www.instapaper.com/",
"username_claimed": "john"
},
"Instructables": { "Instructables": {
"errorType": "status_code", "errorType": "status_code",
"url": "https://www.instructables.com/member/{}", "url": "https://www.instructables.com/member/{}",
@ -1241,6 +1281,13 @@
"urlMain": "https://linux.org.ru/", "urlMain": "https://linux.org.ru/",
"username_claimed": "red" "username_claimed": "red"
}, },
"Laracast": {
"errorType":"status_code",
"url": "https://laracasts.com/@{}",
"urlMain": "https://laracasts.com/",
"regexCheck": "^[a-zA-Z0-9_-]{3,}$",
"username_claimed": "user1"
},
"Launchpad": { "Launchpad": {
"errorType": "status_code", "errorType": "status_code",
"url": "https://launchpad.net/~{}", "url": "https://launchpad.net/~{}",
@ -1298,6 +1345,12 @@
"urlMain": "https://linktr.ee/", "urlMain": "https://linktr.ee/",
"username_claimed": "anne" "username_claimed": "anne"
}, },
"LinuxFR.org": {
"errorType": "status_code",
"url": "https://linuxfr.org/users/{}",
"urlMain": "https://linuxfr.org/",
"username_claimed": "pylapp"
},
"Listed": { "Listed": {
"errorType": "response_url", "errorType": "response_url",
"errorUrl": "https://listed.to/@{}", "errorUrl": "https://listed.to/@{}",
@ -1338,6 +1391,13 @@
"urlMain": "https://forums.mmorpg.com/", "urlMain": "https://forums.mmorpg.com/",
"username_claimed": "goku" "username_claimed": "goku"
}, },
"Mamot": {
"errorType": "status_code",
"regexCheck": "^[a-zA-Z0-9_]{1,30}$",
"url": "https://mamot.fr/@{}",
"urlMain": "https://mamot.fr/",
"username_claimed": "anciensEnssat"
},
"Medium": { "Medium": {
"errorMsg": "<body", "errorMsg": "<body",
"errorType": "message", "errorType": "message",
@ -1353,8 +1413,8 @@
"username_claimed": "blue" "username_claimed": "blue"
}, },
"Minecraft": { "Minecraft": {
"errorCode": 204, "errorMsg": "Couldn't find any profile with name",
"errorType": "status_code", "errorType": "message",
"url": "https://api.mojang.com/users/profiles/minecraft/{}", "url": "https://api.mojang.com/users/profiles/minecraft/{}",
"urlMain": "https://minecraft.net/", "urlMain": "https://minecraft.net/",
"username_claimed": "blue" "username_claimed": "blue"
@ -1400,12 +1460,12 @@
"username_claimed": "blue" "username_claimed": "blue"
}, },
"Mydramalist": { "Mydramalist": {
"errorMsg": "Sign in - MyDramaList", "errorMsg": "The requested page was not found",
"errorType": "message", "errorType": "message",
"url": "https://www.mydramalist.com/profile/{}", "url": "https://www.mydramalist.com/profile/{}",
"urlMain": "https://mydramalist.com", "urlMain": "https://mydramalist.com",
"username_claimed": "elhadidy12398" "username_claimed": "elhadidy12398"
}, },
"Myspace": { "Myspace": {
"errorType": "status_code", "errorType": "status_code",
"url": "https://myspace.com/{}", "url": "https://myspace.com/{}",
@ -1419,6 +1479,13 @@
"urlMain": "https://www.native-instruments.com/forum/", "urlMain": "https://www.native-instruments.com/forum/",
"username_claimed": "jambert" "username_claimed": "jambert"
}, },
"namuwiki": {
"__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.",
"errorType": "status_code",
"url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}",
"urlMain": "https://namu.wiki/",
"username_claimed": "namu"
},
"NationStates Nation": { "NationStates Nation": {
"errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!", "errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!",
"errorType": "message", "errorType": "message",
@ -1499,6 +1566,13 @@
"urlMain": "https://nyaa.si/", "urlMain": "https://nyaa.si/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"Open Collective": {
"errorMsg": "Oops! Page not found",
"errorType": "message",
"url": "https://opencollective.com/{}",
"urlMain": "https://opencollective.com/",
"username_claimed": "pylapp"
},
"OpenStreetMap": { "OpenStreetMap": {
"errorType": "status_code", "errorType": "status_code",
"regexCheck": "^[^.]*?$", "regexCheck": "^[^.]*?$",
@ -1519,6 +1593,13 @@
"urlMain": "https://ourdjtalk.com/", "urlMain": "https://ourdjtalk.com/",
"username_claimed": "steve" "username_claimed": "steve"
}, },
"Outgress": {
"errorMsg": "Outgress - Error",
"errorType": "message",
"url": "https://outgress.com/agents/{}",
"urlMain": "https://outgress.com/",
"username_claimed": "pylapp"
},
"PCGamer": { "PCGamer": {
"errorMsg": "The specified member cannot be found. Please enter a member's entire name.", "errorMsg": "The specified member cannot be found. Please enter a member's entire name.",
"errorType": "message", "errorType": "message",
@ -1580,12 +1661,31 @@
"urlMain": "https://www.pinkbike.com/", "urlMain": "https://www.pinkbike.com/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"pixelfed.social": {
"errorType": "status_code",
"url": "https://pixelfed.social/{}/",
"urlMain": "https://pixelfed.social",
"username_claimed": "pylapp"
},
"PlayStore": { "PlayStore": {
"errorType": "status_code", "errorType": "status_code",
"url": "https://play.google.com/store/apps/developer?id={}", "url": "https://play.google.com/store/apps/developer?id={}",
"urlMain": "https://play.google.com/store", "urlMain": "https://play.google.com/store",
"username_claimed": "Facebook" "username_claimed": "Facebook"
}, },
"Playstrategy": {
"errorType": "status_code",
"url": "https://playstrategy.org/@/{}",
"urlMain": "https://playstrategy.org",
"username_claimed": "oruro"
},
"Plurk": {
"errorMsg": "User Not Found!",
"errorType": "message",
"url": "https://www.plurk.com/{}",
"urlMain": "https://www.plurk.com/",
"username_claimed": "plurkoffice"
},
"PocketStars": { "PocketStars": {
"errorMsg": "Join Your Favorite Adult Stars", "errorMsg": "Join Your Favorite Adult Stars",
"errorType": "message", "errorType": "message",
@ -1633,6 +1733,20 @@
"urlMain": "https://www.producthunt.com/", "urlMain": "https://www.producthunt.com/",
"username_claimed": "jenny" "username_claimed": "jenny"
}, },
"programming.dev": {
"errorMsg": "Error!",
"errorType": "message",
"url": "https://programming.dev/u/{}",
"urlMain": "https://programming.dev",
"username_claimed": "pylapp"
},
"Pychess": {
"errorType": "message",
"errorMsg": "404",
"url": "https://www.pychess.org/@/{}",
"urlMain": "https://www.pychess.org",
"username_claimed": "gbtami"
},
"PromoDJ": { "PromoDJ": {
"errorType": "status_code", "errorType": "status_code",
"url": "http://promodj.com/{}", "url": "http://promodj.com/{}",
@ -1722,8 +1836,7 @@
"username_claimed": "blue" "username_claimed": "blue"
}, },
"Roblox": { "Roblox": {
"errorMsg": "Page cannot be found or no longer exists", "errorType": "status_code",
"errorType": "message",
"url": "https://www.roblox.com/user.aspx?username={}", "url": "https://www.roblox.com/user.aspx?username={}",
"urlMain": "https://www.roblox.com/", "urlMain": "https://www.roblox.com/",
"username_claimed": "bluewolfekiller" "username_claimed": "bluewolfekiller"
@ -1831,7 +1944,7 @@
}, },
"SlideShare": { "SlideShare": {
"errorType": "message", "errorType": "message",
"errorMsg": "<title>Username available</title>", "errorMsg": "<title>Page no longer exists</title>",
"url": "https://slideshare.net/{}", "url": "https://slideshare.net/{}",
"urlMain": "https://slideshare.net/", "urlMain": "https://slideshare.net/",
"username_claimed": "blue" "username_claimed": "blue"
@ -1865,6 +1978,13 @@
"urlMain": "https://www.snapchat.com", "urlMain": "https://www.snapchat.com",
"username_claimed": "teamsnapchat" "username_claimed": "teamsnapchat"
}, },
"SOOP": {
"errorType": "status_code",
"url": "https://www.sooplive.co.kr/station/{}",
"urlMain": "https://www.sooplive.co.kr/",
"urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station",
"username_claimed": "udkn"
},
"SoundCloud": { "SoundCloud": {
"errorType": "status_code", "errorType": "status_code",
"url": "https://soundcloud.com/{}", "url": "https://soundcloud.com/{}",
@ -1884,6 +2004,12 @@
"urlMain": "https://soylentnews.org", "urlMain": "https://soylentnews.org",
"username_claimed": "adam" "username_claimed": "adam"
}, },
"SpeakerDeck": {
"errorType": "status_code",
"url": "https://speakerdeck.com/{}",
"urlMain": "https://speakerdeck.com/",
"username_claimed": "pylapp"
},
"Speedrun.com": { "Speedrun.com": {
"errorType": "status_code", "errorType": "status_code",
"url": "https://speedrun.com/users/{}", "url": "https://speedrun.com/users/{}",
@ -2025,6 +2151,12 @@
"urlMain": "https://themeforest.net/", "urlMain": "https://themeforest.net/",
"username_claimed": "user" "username_claimed": "user"
}, },
"tistory": {
"errorType": "status_code",
"url": "https://{}.tistory.com/",
"urlMain": "https://www.tistory.com/",
"username_claimed": "notice"
},
"TnAFlix": { "TnAFlix": {
"errorType": "status_code", "errorType": "status_code",
"isNSFW": true, "isNSFW": true,
@ -2032,14 +2164,6 @@
"urlMain": "https://www.tnaflix.com/", "urlMain": "https://www.tnaflix.com/",
"username_claimed": "hacker" "username_claimed": "hacker"
}, },
"TorrentGalaxy": {
"errorMsg": "<title>TGx:Can't show details</title>",
"errorType": "message",
"regexCheck": "^[A-Za-z0-9]{3,15}$",
"url": "https://torrentgalaxy.to/profile/{}",
"urlMain": "https://torrentgalaxy.to/",
"username_claimed": "GalaxyRG"
},
"TradingView": { "TradingView": {
"errorType": "status_code", "errorType": "status_code",
"request_method": "GET", "request_method": "GET",
@ -2706,7 +2830,7 @@
"username_claimed": "green" "username_claimed": "green"
}, },
"threads": { "threads": {
"errorMsg": "<title>Threads</title>", "errorMsg": "<title>Threads • Log in</title>",
"errorType": "message", "errorType": "message",
"headers": { "headers": {
"Sec-Fetch-Mode": "navigate" "Sec-Fetch-Mode": "navigate"
@ -2721,12 +2845,24 @@
"urlMain": "https://www.toster.ru/", "urlMain": "https://www.toster.ru/",
"username_claimed": "adam" "username_claimed": "adam"
}, },
"tumblr": {
"errorType": "status_code",
"url": "https://{}.tumblr.com/",
"urlMain": "https://www.tumblr.com/",
"username_claimed": "goku"
},
"uid": { "uid": {
"errorType": "status_code", "errorType": "status_code",
"url": "http://uid.me/{}", "url": "http://uid.me/{}",
"urlMain": "https://uid.me/", "urlMain": "https://uid.me/",
"username_claimed": "blue" "username_claimed": "blue"
}, },
"write.as": {
"errorType": "status_code",
"url": "https://write.as/{}",
"urlMain": "https://write.as",
"username_claimed": "pylapp"
},
"xHamster": { "xHamster": {
"errorType": "status_code", "errorType": "status_code",
"isNSFW": true, "isNSFW": true,
@ -2747,5 +2883,13 @@
"urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social", "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social",
"urlMain": "https://bsky.app/", "urlMain": "https://bsky.app/",
"username_claimed": "mcuban" "username_claimed": "mcuban"
},
"Platzi": {
"errorType": "status_code",
"errorCode": 404,
"url": "https://platzi.com/p/{}/",
"urlMain": "https://platzi.com/",
"username_claimed": "freddier",
"request_method": "GET"
} }
} }

View File

@ -1,80 +1,149 @@
{ {
"$schema": "https://json-schema.org/draft/2020-12/schema", "$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Sherlock Target Manifest", "title": "Sherlock Target Manifest",
"description": "Social media targets to probe for the existence of known usernames", "description": "Social media targets to probe for the existence of known usernames",
"type": "object", "type": "object",
"properties": { "properties": {
"$schema": { "type": "string" } "$schema": { "type": "string" }
}, },
"patternProperties": { "patternProperties": {
"^(?!\\$).*?$": { "^(?!\\$).*?$": {
"type": "object", "type": "object",
"description": "Target name and associated information (key should be human readable name)", "description": "Target name and associated information (key should be human readable name)",
"required": [ "url", "urlMain", "errorType", "username_claimed" ], "required": ["url", "urlMain", "errorType", "username_claimed"],
"properties": { "properties": {
"url": { "type": "string" }, "url": { "type": "string" },
"urlMain": { "type": "string" }, "urlMain": { "type": "string" },
"urlProbe": { "type": "string" }, "urlProbe": { "type": "string" },
"username_claimed": { "type": "string" }, "username_claimed": { "type": "string" },
"regexCheck": { "type": "string" }, "regexCheck": { "type": "string" },
"isNSFW": { "type": "boolean" }, "isNSFW": { "type": "boolean" },
"headers": { "type": "object" }, "headers": { "type": "object" },
"request_payload": { "type": "object" }, "request_payload": { "type": "object" },
"__comment__": { "__comment__": {
"type": "string", "type": "string",
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
}, },
"tags": { "tags": {
"oneOf": [ "oneOf": [
{ "$ref": "#/$defs/tag" }, { "$ref": "#/$defs/tag" },
{ "type": "array", "items": { "$ref": "#/$defs/tag" } } { "type": "array", "items": { "$ref": "#/$defs/tag" } }
] ]
}, },
"request_method": { "request_method": {
"type": "string", "type": "string",
"enum": [ "GET", "POST", "HEAD", "PUT" ] "enum": ["GET", "POST", "HEAD", "PUT"]
}, },
"errorType": {
"oneOf": [
{
"type": "string",
"enum": ["message", "response_url", "status_code"]
},
{
"type": "array",
"items": {
"type": "string",
"enum": ["message", "response_url", "status_code"]
}
}
]
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
},
"dependencies": {
"errorMsg": {
"oneOf": [
{ "properties": { "errorType": { "const": "message" } } },
{
"properties": {
"errorType": { "errorType": {
"type": "string", "type": "array",
"enum": [ "message", "response_url", "status_code" ] "contains": { "const": "message" }
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
},
"dependencies": {
"errorMsg": {
"properties" : { "errorType": { "const": "message" } }
},
"errorUrl": {
"properties": { "errorType": { "const": "response_url" } }
},
"errorCode": {
"properties": { "errorType": { "const": "status_code" } }
} }
}, }
"if": { "properties": { "errorType": { "const": "message" } } }, }
"then": { "required": [ "errorMsg" ] }, ]
"else": { },
"if": { "properties": { "errorType": { "const": "response_url" } } }, "errorUrl": {
"then": { "required": [ "errorUrl" ] } "oneOf": [
}, { "properties": { "errorType": { "const": "response_url" } } },
"additionalProperties": false {
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "response_url" }
}
}
}
]
},
"errorCode": {
"oneOf": [
{ "properties": { "errorType": { "const": "status_code" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "status_code" }
}
}
}
]
} }
}, },
"additionalProperties": false, "allOf": [
"$defs": { {
"tag": { "type": "string", "enum": [ "adult", "gaming" ] } "if": {
"anyOf": [
{ "properties": { "errorType": { "const": "message" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "message" }
}
}
}
]
},
"then": { "required": ["errorMsg"] }
},
{
"if": {
"anyOf": [
{ "properties": { "errorType": { "const": "response_url" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "response_url" }
}
}
}
]
},
"then": { "required": ["errorUrl"] }
}
],
"additionalProperties": false
} }
},
"additionalProperties": false,
"$defs": {
"tag": { "type": "string", "enum": ["adult", "gaming"] }
}
} }

View File

@ -169,14 +169,12 @@ def multiple_usernames(username):
def sherlock( def sherlock(
username: str, username: str,
site_data: dict, site_data: dict[str, dict[str, str]],
query_notify: QueryNotify, query_notify: QueryNotify,
tor: bool = False,
unique_tor: bool = False,
dump_response: bool = False, dump_response: bool = False,
proxy: Optional[str] = None, proxy: Optional[str] = None,
timeout: int = 60, timeout: int = 60,
): ) -> dict[str, dict[str, str | QueryResult]]:
"""Run Sherlock Analysis. """Run Sherlock Analysis.
Checks for existence of username on various social media sites. Checks for existence of username on various social media sites.
@ -188,8 +186,6 @@ def sherlock(
query_notify -- Object with base type of QueryNotify(). query_notify -- Object with base type of QueryNotify().
This will be used to notify the caller about This will be used to notify the caller about
query results. query results.
tor -- Boolean indicating whether to use a tor circuit for the requests.
unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
proxy -- String indicating the proxy URL proxy -- String indicating the proxy URL
timeout -- Time in seconds to wait before timing out request. timeout -- Time in seconds to wait before timing out request.
Default is 60 seconds. Default is 60 seconds.
@ -210,32 +206,9 @@ def sherlock(
# Notify caller that we are starting the query. # Notify caller that we are starting the query.
query_notify.start(username) query_notify.start(username)
# Create session based on request methodology
if tor or unique_tor:
try:
from torrequest import TorRequest # noqa: E402
except ImportError:
print("Important!")
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.")
print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n")
sys.exit(query_notify.finish())
print("Important!") # Normal requests
print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.") underlying_session = requests.session()
# Requests using Tor obfuscation
try:
underlying_request = TorRequest()
except OSError:
print("Tor not found in system path. Unable to continue.\n")
sys.exit(query_notify.finish())
underlying_session = underlying_request.session
else:
# Normal requests
underlying_session = requests.session()
underlying_request = requests.Request()
# Limit number of workers to 20. # Limit number of workers to 20.
# This is probably vastly overkill. # This is probably vastly overkill.
@ -359,15 +332,10 @@ def sherlock(
# Store future in data for access later # Store future in data for access later
net_info["request_future"] = future net_info["request_future"] = future
# Reset identify for tor (if needed)
if unique_tor:
underlying_request.reset_identity()
# Add this site's results into final dictionary with all the other results. # Add this site's results into final dictionary with all the other results.
results_total[social_network] = results_site results_total[social_network] = results_site
# Open the file containing account links # Open the file containing account links
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
for social_network, net_info in site_data.items(): for social_network, net_info in site_data.items():
# Retrieve results again # Retrieve results again
results_site = results_total.get(social_network) results_site = results_total.get(social_network)
@ -381,6 +349,8 @@ def sherlock(
# Get the expected error type # Get the expected error type
error_type = net_info["errorType"] error_type = net_info["errorType"]
if isinstance(error_type, str):
error_type: list[str] = [error_type]
# Retrieve future and ensure it has finished # Retrieve future and ensure it has finished
future = net_info["request_future"] future = net_info["request_future"]
@ -425,58 +395,60 @@ def sherlock(
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs): elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
query_status = QueryStatus.WAF query_status = QueryStatus.WAF
elif error_type == "message":
# error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
elif error_type == "status_code":
error_codes = net_info.get("errorCode")
query_status = QueryStatus.CLAIMED
# Type consistency, allowing for both singlets and lists in manifest
if isinstance(error_codes, int):
error_codes = [error_codes]
if error_codes is not None and r.status_code in error_codes:
query_status = QueryStatus.AVAILABLE
elif r.status_code >= 300 or r.status_code < 200:
query_status = QueryStatus.AVAILABLE
elif error_type == "response_url":
# For this detection method, we have turned off the redirect.
# So, there is no need to check the response URL: it will always
# match the request. Instead, we will ensure that the response
# code indicates that the request was successful (i.e. no 404, or
# forward to some odd redirect).
if 200 <= r.status_code < 300:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
else: else:
# It should be impossible to ever get here... if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
raise ValueError( error_context = f"Unknown error type '{error_type}' for {social_network}"
f"Unknown Error Type '{error_type}' for " f"site '{social_network}'" query_status = QueryStatus.UNKNOWN
) else:
if "message" in error_type:
# error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
error_codes = net_info.get("errorCode")
query_status = QueryStatus.CLAIMED
# Type consistency, allowing for both singlets and lists in manifest
if isinstance(error_codes, int):
error_codes = [error_codes]
if error_codes is not None and r.status_code in error_codes:
query_status = QueryStatus.AVAILABLE
elif r.status_code >= 300 or r.status_code < 200:
query_status = QueryStatus.AVAILABLE
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
# For this detection method, we have turned off the redirect.
# So, there is no need to check the response URL: it will always
# match the request. Instead, we will ensure that the response
# code indicates that the request was successful (i.e. no 404, or
# forward to some odd redirect).
if 200 <= r.status_code < 300:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if dump_response: if dump_response:
print("+++++++++++++++++++++") print("+++++++++++++++++++++")
@ -507,7 +479,7 @@ def sherlock(
print("+++++++++++++++++++++") print("+++++++++++++++++++++")
# Notify caller about results of query. # Notify caller about results of query.
result = QueryResult( result: QueryResult = QueryResult(
username=username, username=username,
site_name=social_network, site_name=social_network,
site_url_user=url, site_url_user=url,
@ -596,22 +568,6 @@ def main():
dest="output", dest="output",
help="If using single username, the output of the result will be saved to this file.", help="If using single username, the output of the result will be saved to this file.",
) )
parser.add_argument(
"--tor",
"-t",
action="store_true",
dest="tor",
default=False,
help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
)
parser.add_argument(
"--unique-tor",
"-u",
action="store_true",
dest="unique_tor",
default=False,
help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
)
parser.add_argument( parser.add_argument(
"--csv", "--csv",
action="store_true", action="store_true",
@ -719,12 +675,30 @@ def main():
help="Include checking of NSFW sites from default list.", help="Include checking of NSFW sites from default list.",
) )
# TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed
# in future release
parser.add_argument( parser.add_argument(
"--no-txt", "--no-txt",
action="store_true", action="store_true",
dest="no_txt", dest="no_txt",
default=False, default=False,
help="Disable creation of a txt file", help="Disable creation of a txt file - WILL BE DEPRECATED",
)
parser.add_argument(
"--txt",
action="store_true",
dest="output_txt",
default=False,
help="Enable creation of a txt file",
)
parser.add_argument(
"--ignore-exclusions",
action="store_true",
dest="ignore_exclusions",
default=False,
help="Ignore upstream exclusions (may return more false positives)",
) )
args = parser.parse_args() args = parser.parse_args()
@ -734,7 +708,7 @@ def main():
# Check for newer version of Sherlock. If it exists, let the user know about it # Check for newer version of Sherlock. If it exists, let the user know about it
try: try:
latest_release_raw = requests.get(forge_api_latest_release).text latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
latest_release_json = json_loads(latest_release_raw) latest_release_json = json_loads(latest_release_raw)
latest_remote_tag = latest_release_json["tag_name"] latest_remote_tag = latest_release_json["tag_name"]
@ -747,22 +721,10 @@ def main():
except Exception as error: except Exception as error:
print(f"A problem occurred while checking for an update: {error}") print(f"A problem occurred while checking for an update: {error}")
# Argument check
# TODO regex check on args.proxy
if args.tor and (args.proxy is not None):
raise Exception("Tor and Proxy cannot be set at the same time.")
# Make prompts # Make prompts
if args.proxy is not None: if args.proxy is not None:
print("Using the proxy: " + args.proxy) print("Using the proxy: " + args.proxy)
if args.tor or args.unique_tor:
print("Using Tor to make requests")
print(
"Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
)
if args.no_color: if args.no_color:
# Disable color output. # Disable color output.
init(strip=True, convert=False) init(strip=True, convert=False)
@ -784,7 +746,8 @@ def main():
try: try:
if args.local: if args.local:
sites = SitesInformation( sites = SitesInformation(
os.path.join(os.path.dirname(__file__), "resources/data.json") os.path.join(os.path.dirname(__file__), "resources/data.json"),
honor_exclusions=False,
) )
else: else:
json_file_location = args.json_file json_file_location = args.json_file
@ -793,7 +756,7 @@ def main():
if args.json_file.isnumeric(): if args.json_file.isnumeric():
pull_number = args.json_file pull_number = args.json_file
pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}" pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
pull_request_raw = requests.get(pull_url).text pull_request_raw = requests.get(pull_url, timeout=10).text
pull_request_json = json_loads(pull_request_raw) pull_request_json = json_loads(pull_request_raw)
# Check if it's a valid pull request # Check if it's a valid pull request
@ -804,7 +767,11 @@ def main():
head_commit_sha = pull_request_json["head"]["sha"] head_commit_sha = pull_request_json["head"]["sha"]
json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json" json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
sites = SitesInformation(json_file_location) sites = SitesInformation(
data_file_path=json_file_location,
honor_exclusions=not args.ignore_exclusions,
do_not_exclude=args.site_list,
)
except Exception as error: except Exception as error:
print(f"ERROR: {error}") print(f"ERROR: {error}")
sys.exit(1) sys.exit(1)
@ -858,8 +825,6 @@ def main():
username, username,
site_data, site_data,
query_notify, query_notify,
tor=args.tor,
unique_tor=args.unique_tor,
dump_response=args.dump_response, dump_response=args.dump_response,
proxy=args.proxy, proxy=args.proxy,
timeout=args.timeout, timeout=args.timeout,
@ -875,7 +840,7 @@ def main():
else: else:
result_file = f"{username}.txt" result_file = f"{username}.txt"
if not args.no_txt: if args.output_txt:
with open(result_file, "w", encoding="utf-8") as file: with open(result_file, "w", encoding="utf-8") as file:
exists_counter = 0 exists_counter = 0
for website_name in results: for website_name in results:

View File

@ -7,6 +7,10 @@ import json
import requests import requests
import secrets import secrets
MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
class SiteInformation: class SiteInformation:
def __init__(self, name, url_home, url_username_format, username_claimed, def __init__(self, name, url_home, url_username_format, username_claimed,
information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)): information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
@ -67,12 +71,17 @@ class SiteInformation:
Return Value: Return Value:
Nicely formatted string to get information about this object. Nicely formatted string to get information about this object.
""" """
return f"{self.name} ({self.url_home})" return f"{self.name} ({self.url_home})"
class SitesInformation: class SitesInformation:
def __init__(self, data_file_path=None): def __init__(
self,
data_file_path: str|None = None,
honor_exclusions: bool = True,
do_not_exclude: list[str] = [],
):
"""Create Sites Information Object. """Create Sites Information Object.
Contains information about all supported websites. Contains information about all supported websites.
@ -110,7 +119,7 @@ class SitesInformation:
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using # The default data file is the live data.json which is in the GitHub repo. The reason why we are using
# this instead of the local one is so that the user has the most up-to-date data. This prevents # this instead of the local one is so that the user has the most up-to-date data. This prevents
# users from creating issue about false positives which has already been fixed or having outdated data # users from creating issue about false positives which has already been fixed or having outdated data
data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json" data_file_path = MANIFEST_URL
# Ensure that specified data file has correct extension. # Ensure that specified data file has correct extension.
if not data_file_path.lower().endswith(".json"): if not data_file_path.lower().endswith(".json"):
@ -120,7 +129,7 @@ class SitesInformation:
if data_file_path.lower().startswith("http"): if data_file_path.lower().startswith("http"):
# Reference is to a URL. # Reference is to a URL.
try: try:
response = requests.get(url=data_file_path) response = requests.get(url=data_file_path, timeout=30)
except Exception as error: except Exception as error:
raise FileNotFoundError( raise FileNotFoundError(
f"Problem while attempting to access data file URL '{data_file_path}': {error}" f"Problem while attempting to access data file URL '{data_file_path}': {error}"
@ -152,9 +161,31 @@ class SitesInformation:
raise FileNotFoundError(f"Problem while attempting to access " raise FileNotFoundError(f"Problem while attempting to access "
f"data file '{data_file_path}'." f"data file '{data_file_path}'."
) )
site_data.pop('$schema', None) site_data.pop('$schema', None)
if honor_exclusions:
try:
response = requests.get(url=EXCLUSIONS_URL, timeout=10)
if response.status_code == 200:
exclusions = response.text.splitlines()
exclusions = [exclusion.strip() for exclusion in exclusions]
for site in do_not_exclude:
if site in exclusions:
exclusions.remove(site)
for exclusion in exclusions:
try:
site_data.pop(exclusion, None)
except KeyError:
pass
except Exception:
# If there was any problem loading the exclusions, just continue without them
print("Warning: Could not load exclusions, continuing without them.")
honor_exclusions = False
self.sites = {} self.sites = {}
# Add all site information from the json file to internal site list. # Add all site information from the json file to internal site list.
@ -194,7 +225,7 @@ class SitesInformation:
for site in self.sites: for site in self.sites:
if self.sites[site].is_nsfw and site.casefold() not in do_not_remove: if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
continue continue
sites[site] = self.sites[site] sites[site] = self.sites[site]
self.sites = sites self.sites = sites
def site_name_list(self): def site_name_list(self):

View File

@ -4,6 +4,11 @@ import urllib
import pytest import pytest
from sherlock_project.sites import SitesInformation from sherlock_project.sites import SitesInformation
def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
return sites_iterable
@pytest.fixture() @pytest.fixture()
def sites_obj(): def sites_obj():
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
@ -11,9 +16,7 @@ def sites_obj():
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def sites_info(): def sites_info():
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) yield fetch_local_manifest()
sites_iterable = {site.name: site.information for site in sites_obj}
yield sites_iterable
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def remote_schema(): def remote_schema():
@ -21,3 +24,28 @@ def remote_schema():
with urllib.request.urlopen(schema_url) as remoteschema: with urllib.request.urlopen(schema_url) as remoteschema:
schemadat = json.load(remoteschema) schemadat = json.load(remoteschema)
yield schemadat yield schemadat
def pytest_addoption(parser):
parser.addoption(
"--chunked-sites",
action="store",
default=None,
help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
)
def pytest_generate_tests(metafunc):
if "chunked_sites" in metafunc.fixturenames:
sites_info = fetch_local_manifest(honor_exclusions=False)
# Ingest and apply site selections
site_filter: str | None = metafunc.config.getoption("--chunked-sites")
if site_filter:
selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
sites_info = {
site: data for site, data in sites_info.items()
if site in selected_sites
}
params = [{name: data} for name, data in sites_info.items()]
ids = list(sites_info.keys())
metafunc.parametrize("chunked_sites", params, ids=ids)

View File

@ -7,7 +7,7 @@ def test_validate_manifest_against_local_schema():
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" """Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
json_relative: str = '../sherlock_project/resources/data.json' json_relative: str = '../sherlock_project/resources/data.json'
schema_relative: str = '../sherlock_project/resources/data.schema.json' schema_relative: str = '../sherlock_project/resources/data.schema.json'
json_path: str = os.path.join(os.path.dirname(__file__), json_relative) json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)

View File

@ -0,0 +1,99 @@
import pytest
import re
import rstr
from sherlock_project.sherlock import sherlock
from sherlock_project.notify import QueryNotify
from sherlock_project.result import QueryResult, QueryStatus
FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry
def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
def replace_upper_bound(match: re.Match) -> str: # type: ignore
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
return f'{{{lower_bound},{upper_bound}}}'
pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
return pattern
def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
"""Check if a site is likely to produce false positives."""
status: QueryStatus = QueryStatus.UNKNOWN
for _ in range(FALSE_POSITIVE_ATTEMPTS):
query_notify: QueryNotify = QueryNotify()
username: str = rstr.xeger(pattern)
result: QueryResult | str = sherlock(
username=username,
site_data=sites_info,
query_notify=query_notify,
)[site]['status']
if not hasattr(result, 'status'):
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
if type(result.status) is not QueryStatus: # type: ignore
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
status = result.status # type: ignore
if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
return status
return status
def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
"""Check if a site is likely to produce false negatives."""
status: QueryStatus = QueryStatus.UNKNOWN
query_notify: QueryNotify = QueryNotify()
result: QueryResult | str = sherlock(
username=sites_info[site]['username_claimed'],
site_data=sites_info,
query_notify=query_notify,
)[site]['status']
if not hasattr(result, 'status'):
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
if type(result.status) is not QueryStatus: # type: ignore
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
status = result.status # type: ignore
return status
@pytest.mark.validate_targets
@pytest.mark.online
class Test_All_Targets:
@pytest.mark.validate_targets_fp
def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
"""Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
pattern: str
for site in chunked_sites:
try:
pattern = chunked_sites[site]['regexCheck']
except KeyError:
pattern = FALSE_POSITIVE_DEFAULT_PATTERN
if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
pattern = set_pattern_upper_bound(pattern)
result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
@pytest.mark.validate_targets_fn
def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
"""Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
for site in chunked_sites:
result: QueryStatus = false_negative_check(chunked_sites, site)
assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"

View File

@ -7,8 +7,6 @@ envlist =
py312 py312
py311 py311
py310 py310
py39
py38
[testenv] [testenv]
description = Attempt to build and install the package description = Attempt to build and install the package
@ -16,6 +14,7 @@ deps =
coverage coverage
jsonschema jsonschema
pytest pytest
rstr
allowlist_externals = coverage allowlist_externals = coverage
commands = commands =
coverage run --source=sherlock_project --module pytest -v coverage run --source=sherlock_project --module pytest -v
@ -37,7 +36,7 @@ commands =
[gh-actions] [gh-actions]
python = python =
3.13: py313
3.12: py312 3.12: py312
3.11: py311 3.11: py311
3.10: py310 3.10: py310
3.9: py39