Merge branch 'master' into Issue#2442
This commit is contained in:
commit
37b30602fd
|
|
@ -0,0 +1,89 @@
|
|||
name: Exclusions Updater
|
||||
|
||||
on:
|
||||
schedule:
|
||||
#- cron: '0 5 * * 0' # Runs at 05:00 every Sunday
|
||||
- cron: '0 5 * * *' # Runs at 05:00 every day
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
update-exclusions:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: abatilo/actions-poetry@v4
|
||||
with:
|
||||
poetry-version: 'latest'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction --with dev
|
||||
|
||||
- name: Run false positive tests
|
||||
run: |
|
||||
$(poetry env activate)
|
||||
pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
|
||||
deactivate
|
||||
|
||||
- name: Parse false positive detections by desired categories
|
||||
run: |
|
||||
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
|
||||
| sort -u > false_positive_exclusions.txt
|
||||
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
|
||||
| sort -u > waf_hits.txt
|
||||
|
||||
- name: Detect if exclusions list changed
|
||||
id: detect_changes
|
||||
run: |
|
||||
git fetch origin exclusions || true
|
||||
|
||||
if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
|
||||
# If the exclusions branch and file exist, compare
|
||||
if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
|
||||
echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
else
|
||||
# If the exclusions branch or file do not exist, treat as changed
|
||||
echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Quantify and display results
|
||||
run: |
|
||||
FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
|
||||
WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
|
||||
echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
|
||||
echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
|
||||
echo ">>> WAF hits:" && cat waf_hits.txt
|
||||
|
||||
- name: Commit and push exclusions list
|
||||
if: steps.detect_changes.outputs.exclusions_changed == 'true'
|
||||
run: |
|
||||
git config user.name "Paul Pfeister (automation)"
|
||||
git config user.email "code@pfeister.dev"
|
||||
|
||||
mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
|
||||
|
||||
git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
|
||||
git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
|
||||
|
||||
git fetch origin exclusions || true # Allows creation of branch if deleted
|
||||
git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
|
||||
|
||||
git stash pop || true
|
||||
|
||||
mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
|
||||
|
||||
git rm -f false_positive_exclusions.txt.tmp || true
|
||||
git add false_positive_exclusions.txt
|
||||
git commit -m "auto: update exclusions list" || echo "No changes to commit"
|
||||
git push origin exclusions
|
||||
|
|
@ -49,10 +49,10 @@ jobs:
|
|||
macos-latest,
|
||||
]
|
||||
python-version: [
|
||||
'3.9',
|
||||
'3.10',
|
||||
'3.11',
|
||||
'3.12',
|
||||
'3.13',
|
||||
]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
|
|
|||
|
|
@ -0,0 +1,99 @@
|
|||
name: Modified Target Validation
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- "sherlock_project/resources/data.json"
|
||||
|
||||
jobs:
|
||||
validate-modified-targets:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: abatilo/actions-poetry@v4
|
||||
with:
|
||||
poetry-version: 'latest'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction --with dev
|
||||
|
||||
- name: Discover modified targets
|
||||
id: discover-modified
|
||||
run: |
|
||||
# Fetch the upstream branch
|
||||
git fetch origin ${{ github.base_ref }} --depth=1
|
||||
|
||||
# Discover changes
|
||||
git show origin/${{ github.base_ref }}:sherlock_project/resources/data.json > data.json.base
|
||||
cp sherlock_project/resources/data.json data.json.head
|
||||
|
||||
CHANGED=$(
|
||||
python - <<'EOF'
|
||||
import json
|
||||
with open("data.json.base") as f: base = json.load(f)
|
||||
with open("data.json.head") as f: head = json.load(f)
|
||||
|
||||
changed = []
|
||||
for k, v in head.items():
|
||||
if k not in base or base[k] != v:
|
||||
changed.append(k)
|
||||
|
||||
print(",".join(sorted(changed)))
|
||||
EOF
|
||||
)
|
||||
|
||||
# Preserve changelist
|
||||
echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
|
||||
echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Validate modified targets
|
||||
if: steps.discover-modified.outputs.changed_targets != ''
|
||||
continue-on-error: true
|
||||
run: |
|
||||
$(poetry env activate)
|
||||
pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" --junitxml=validation_results.xml
|
||||
deactivate
|
||||
|
||||
- name: Prepare validation summary
|
||||
if: steps.discover-modified.outputs.changed_targets != ''
|
||||
id: prepare-summary
|
||||
run: |
|
||||
$(poetry env activate)
|
||||
summary=$(
|
||||
python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
|
||||
)
|
||||
deactivate
|
||||
echo "$summary" > validation_summary.md
|
||||
|
||||
- name: Announce validation results
|
||||
if: steps.discover-modified.outputs.changed_targets != ''
|
||||
uses: actions/github-script@v8
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('validation_summary.md', 'utf8');
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.payload.pull_request.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: body,
|
||||
});
|
||||
|
||||
- name: This step shows as ran when no modifications are found
|
||||
if: steps.discover-modified.outputs.changed_targets == ''
|
||||
run: |
|
||||
echo "No modified targets found"
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
# 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
|
||||
# 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
|
||||
# 3. Build image with BOTH latest and version tags
|
||||
# i.e. `docker build -t sherlock/sherlock:0.15.0 -t sherlock/sherlock:latest .`
|
||||
# i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
|
||||
|
||||
FROM python:3.12-slim-bullseye as build
|
||||
WORKDIR /sherlock
|
||||
|
|
|
|||
|
|
@ -0,0 +1,72 @@
|
|||
#!/usr/bin/env python
|
||||
# This module summarizes the results of site validation tests queued by
|
||||
# workflow validate_modified_targets for presentation in Issue comments.
|
||||
|
||||
from defusedxml import ElementTree as ET
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def summarize_junit_xml(xml_path: Path) -> str:
|
||||
tree = ET.parse(xml_path)
|
||||
root = tree.getroot()
|
||||
suite = root.find('testsuite')
|
||||
|
||||
pass_message: str = ":heavy_check_mark: Pass"
|
||||
fail_message: str = ":x: Fail"
|
||||
|
||||
if suite is None:
|
||||
raise ValueError("Invalid JUnit XML: No testsuite found")
|
||||
|
||||
summary_lines: list[str] = []
|
||||
summary_lines.append("#### Automatic validation of changes\n")
|
||||
summary_lines.append("| Target | F+ Check | F- Check |")
|
||||
summary_lines.append("|---|---|---|")
|
||||
|
||||
failures = int(suite.get('failures', 0))
|
||||
errors_detected: bool = False
|
||||
|
||||
results: dict[str, dict[str, str]] = {}
|
||||
|
||||
for testcase in suite.findall('testcase'):
|
||||
test_name = testcase.get('name').split('[')[0]
|
||||
site_name = testcase.get('name').split('[')[1].rstrip(']')
|
||||
failure = testcase.find('failure')
|
||||
error = testcase.find('error')
|
||||
|
||||
if site_name not in results:
|
||||
results[site_name] = {}
|
||||
|
||||
if test_name == "test_false_neg":
|
||||
results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
|
||||
elif test_name == "test_false_pos":
|
||||
results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
|
||||
|
||||
if error is not None:
|
||||
errors_detected = True
|
||||
|
||||
for result in results:
|
||||
summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
|
||||
|
||||
if failures > 0:
|
||||
summary_lines.append("\n___\n" +
|
||||
"\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
|
||||
" will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
|
||||
|
||||
if errors_detected:
|
||||
summary_lines.append("\n___\n" +
|
||||
"\n**Errors were detected during validation. Please review the workflow logs.**")
|
||||
|
||||
return "\n".join(summary_lines)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: summarize_site_validation.py <junit-xml-file>")
|
||||
sys.exit(1)
|
||||
|
||||
xml_path: Path = Path(sys.argv[1])
|
||||
if not xml_path.is_file():
|
||||
print(f"Error: File '{xml_path}' does not exist.")
|
||||
sys.exit(1)
|
||||
|
||||
summary: str = summarize_junit_xml(xml_path)
|
||||
print(summary)
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
<p align=center>
|
||||
<p align="center">
|
||||
<br>
|
||||
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png"/></a>
|
||||
<a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
|
||||
<br>
|
||||
<span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
|
||||
<br>
|
||||
|
|
@ -15,8 +15,7 @@
|
|||
</p>
|
||||
|
||||
<p align="center">
|
||||
<img width="70%" height="70%" src="images/demo.png"/>
|
||||
</a>
|
||||
<img width="70%" height="70%" src="images/demo.png" alt="demo"/>
|
||||
</p>
|
||||
|
||||
|
||||
|
|
@ -112,17 +111,17 @@ $ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
|
|||
"https://www.1337x.to/user/user123/",
|
||||
...
|
||||
]
|
||||
}]s
|
||||
}]
|
||||
```
|
||||
|
||||
Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmaticaly via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
|
||||
Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
|
||||
|
||||
## Credits
|
||||
|
||||
Thank you to everyone who has contributed to Sherlock! ❤️
|
||||
|
||||
<a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" noZoom />
|
||||
<img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
|
||||
</a>
|
||||
|
||||
## Star history
|
||||
|
|
|
|||
|
|
@ -1982,3 +1982,16 @@ __2025-02-16 :__ Unsure if any way to view profiles exists now
|
|||
"username_claimed": "t3dotgg"
|
||||
}
|
||||
```
|
||||
|
||||
## TorrentGalaxy
|
||||
__2025-07-06 :__ Site appears to have gone offline in March and hasn't come back
|
||||
```json
|
||||
"TorrentGalaxy": {
|
||||
"errorMsg": "<title>TGx:Can't show details</title>",
|
||||
"errorType": "message",
|
||||
"regexCheck": "^[A-Za-z0-9]{3,15}$",
|
||||
"url": "https://torrentgalaxy.to/profile/{}",
|
||||
"urlMain": "https://torrentgalaxy.to/",
|
||||
"username_claimed": "GalaxyRG"
|
||||
},
|
||||
```
|
||||
|
|
|
|||
|
|
@ -8,8 +8,7 @@ source = "init"
|
|||
|
||||
[tool.poetry]
|
||||
name = "sherlock-project"
|
||||
# single source of truth for version is __init__.py
|
||||
version = "0"
|
||||
version = "0.16.0"
|
||||
description = "Hunt down social media accounts by username across social networks"
|
||||
license = "MIT"
|
||||
authors = [
|
||||
|
|
@ -50,12 +49,20 @@ stem = "^1.8.0"
|
|||
torrequest = "^0.1.0"
|
||||
pandas = "^2.2.1"
|
||||
openpyxl = "^3.0.10"
|
||||
tomli = "^2.2.1"
|
||||
|
||||
[tool.poetry.extras]
|
||||
tor = ["torrequest"]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
jsonschema = "^4.0.0"
|
||||
rstr = "^3.2.2"
|
||||
pytest = "^8.4.2"
|
||||
pytest-xdist = "^3.8.0"
|
||||
|
||||
|
||||
[tool.poetry.group.ci.dependencies]
|
||||
defusedxml = "^0.7.1"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
sherlock = 'sherlock_project.sherlock:main'
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
[pytest]
|
||||
addopts = --strict-markers
|
||||
addopts = --strict-markers -m "not validate_targets"
|
||||
markers =
|
||||
online: mark tests are requiring internet access.
|
||||
validate_targets: mark tests for sweeping manifest validation (sends many requests).
|
||||
validate_targets_fp: validate_targets, false positive tests only.
|
||||
validate_targets_fn: validate_targets, false negative tests only.
|
||||
|
|
|
|||
|
|
@ -5,11 +5,26 @@ networks.
|
|||
|
||||
"""
|
||||
|
||||
from importlib.metadata import version as pkg_version, PackageNotFoundError
|
||||
import pathlib
|
||||
import tomli
|
||||
|
||||
|
||||
def get_version() -> str:
|
||||
"""Fetch the version number of the installed package."""
|
||||
try:
|
||||
return pkg_version("sherlock_project")
|
||||
except PackageNotFoundError:
|
||||
pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
|
||||
with pyproject_path.open("rb") as f:
|
||||
pyproject_data = tomli.load(f)
|
||||
return pyproject_data["tool"]["poetry"]["version"]
|
||||
|
||||
# This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
|
||||
import_error_test_var = None
|
||||
|
||||
__shortname__ = "Sherlock"
|
||||
__longname__ = "Sherlock: Find Usernames Across Social Networks"
|
||||
__version__ = "0.15.0"
|
||||
__version__ = get_version()
|
||||
|
||||
forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"
|
||||
|
|
|
|||
|
|
@ -258,6 +258,12 @@
|
|||
"urlMain": "https://www.blipfoto.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"Blitz Tactics": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://blitztactics.com/{}",
|
||||
"urlMain": "https://blitztactics.com/",
|
||||
"username_claimed": "Lance5500"
|
||||
},
|
||||
"Blogger": {
|
||||
"errorType": "status_code",
|
||||
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
|
||||
|
|
@ -265,6 +271,13 @@
|
|||
"urlMain": "https://www.blogger.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"Bluesky": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://bsky.app/profile/{}.bsky.social",
|
||||
"urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social",
|
||||
"urlMain": "https://bsky.app/",
|
||||
"username_claimed": "mcuban"
|
||||
},
|
||||
"BoardGameGeek": {
|
||||
"errorType": "message",
|
||||
"regexCheck": "^[a-zA-Z0-9_]*$",
|
||||
|
|
@ -365,6 +378,12 @@
|
|||
"urlMain": "https://career.habr.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"CashApp": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://cash.app/${}",
|
||||
"urlMain": "https://cash.app",
|
||||
"username_claimed": "hotdiggitydog"
|
||||
},
|
||||
"Championat": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://www.championat.com/user/{}",
|
||||
|
|
@ -603,7 +622,7 @@
|
|||
"urlMain": "https://forums.digitalspy.com/",
|
||||
"username_claimed": "blue",
|
||||
"regexCheck": "^\\w{3,20}$"
|
||||
},
|
||||
},
|
||||
"Discogs": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://www.discogs.com/user/{}",
|
||||
|
|
@ -789,13 +808,12 @@
|
|||
"urlMain": "https://fosstodon.org/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"Freelance.habr": {
|
||||
"errorMsg": "<div class=\"icon_user_locked\"></div>",
|
||||
"errorType": "message",
|
||||
"regexCheck": "^((?!\\.).)*$",
|
||||
"url": "https://freelance.habr.com/freelancers/{}",
|
||||
"urlMain": "https://freelance.habr.com/",
|
||||
"username_claimed": "adam"
|
||||
"Framapiaf": {
|
||||
"errorType": "status_code",
|
||||
"regexCheck": "^[a-zA-Z0-9_]{1,30}$",
|
||||
"url": "https://framapiaf.org/@{}",
|
||||
"urlMain": "https://framapiaf.org",
|
||||
"username_claimed": "pylapp"
|
||||
},
|
||||
"Freelancer": {
|
||||
"errorMsg": "\"users\":{}",
|
||||
|
|
@ -1124,6 +1142,13 @@
|
|||
"urlProbe": "https://imginn.com/{}",
|
||||
"username_claimed": "instagram"
|
||||
},
|
||||
"Instapaper": {
|
||||
"errorType": "status_code",
|
||||
"request_method": "GET",
|
||||
"url": "https://www.instapaper.com/p/{}",
|
||||
"urlMain": "https://www.instapaper.com/",
|
||||
"username_claimed": "john"
|
||||
},
|
||||
"Instructables": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://www.instructables.com/member/{}",
|
||||
|
|
@ -1236,6 +1261,13 @@
|
|||
"urlMain": "https://linux.org.ru/",
|
||||
"username_claimed": "red"
|
||||
},
|
||||
"Laracast": {
|
||||
"errorType":"status_code",
|
||||
"url": "https://laracasts.com/@{}",
|
||||
"urlMain": "https://laracasts.com/",
|
||||
"regexCheck": "^[a-zA-Z0-9_-]{3,}$",
|
||||
"username_claimed": "user1"
|
||||
},
|
||||
"Launchpad": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://launchpad.net/~{}",
|
||||
|
|
@ -1279,7 +1311,6 @@
|
|||
},
|
||||
"LinkedIn": {
|
||||
"errorType": "status_code",
|
||||
|
||||
"regexCheck": "^[a-zA-Z0-9]{3,100}$",
|
||||
"request_method": "GET",
|
||||
"url": "https://linkedin.com/in/{}",
|
||||
|
|
@ -1294,6 +1325,12 @@
|
|||
"urlMain": "https://linktr.ee/",
|
||||
"username_claimed": "anne"
|
||||
},
|
||||
"LinuxFR.org": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://linuxfr.org/users/{}",
|
||||
"urlMain": "https://linuxfr.org/",
|
||||
"username_claimed": "pylapp"
|
||||
},
|
||||
"Listed": {
|
||||
"errorType": "response_url",
|
||||
"errorUrl": "https://listed.to/@{}",
|
||||
|
|
@ -1334,6 +1371,13 @@
|
|||
"urlMain": "https://forums.mmorpg.com/",
|
||||
"username_claimed": "goku"
|
||||
},
|
||||
"Mamot": {
|
||||
"errorType": "status_code",
|
||||
"regexCheck": "^[a-zA-Z0-9_]{1,30}$",
|
||||
"url": "https://mamot.fr/@{}",
|
||||
"urlMain": "https://mamot.fr/",
|
||||
"username_claimed": "anciensEnssat"
|
||||
},
|
||||
"Medium": {
|
||||
"errorMsg": "<body",
|
||||
"errorType": "message",
|
||||
|
|
@ -1349,8 +1393,8 @@
|
|||
"username_claimed": "blue"
|
||||
},
|
||||
"Minecraft": {
|
||||
"errorCode": 204,
|
||||
"errorType": "status_code",
|
||||
"errorMsg": "Couldn't find any profile with name",
|
||||
"errorType": "message",
|
||||
"url": "https://api.mojang.com/users/profiles/minecraft/{}",
|
||||
"urlMain": "https://minecraft.net/",
|
||||
"username_claimed": "blue"
|
||||
|
|
@ -1495,6 +1539,13 @@
|
|||
"urlMain": "https://nyaa.si/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"Open Collective": {
|
||||
"errorMsg": "Oops! Page not found",
|
||||
"errorType": "message",
|
||||
"url": "https://opencollective.com/{}",
|
||||
"urlMain": "https://opencollective.com/",
|
||||
"username_claimed": "pylapp"
|
||||
},
|
||||
"OpenStreetMap": {
|
||||
"errorType": "status_code",
|
||||
"regexCheck": "^[^.]*?$",
|
||||
|
|
@ -1515,6 +1566,13 @@
|
|||
"urlMain": "https://ourdjtalk.com/",
|
||||
"username_claimed": "steve"
|
||||
},
|
||||
"Outgress": {
|
||||
"errorMsg": "Outgress - Error",
|
||||
"errorType": "message",
|
||||
"url": "https://outgress.com/agents/{}",
|
||||
"urlMain": "https://outgress.com/",
|
||||
"username_claimed": "pylapp"
|
||||
},
|
||||
"PCGamer": {
|
||||
"errorMsg": "The specified member cannot be found. Please enter a member's entire name.",
|
||||
"errorType": "message",
|
||||
|
|
@ -1576,12 +1634,31 @@
|
|||
"urlMain": "https://www.pinkbike.com/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"pixelfed.social": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://pixelfed.social/{}/",
|
||||
"urlMain": "https://pixelfed.social",
|
||||
"username_claimed": "pylapp"
|
||||
},
|
||||
"PlayStore": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://play.google.com/store/apps/developer?id={}",
|
||||
"urlMain": "https://play.google.com/store",
|
||||
"username_claimed": "Facebook"
|
||||
},
|
||||
"Playstrategy": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://playstrategy.org/@/{}",
|
||||
"urlMain": "https://playstrategy.org",
|
||||
"username_claimed": "oruro"
|
||||
},
|
||||
"Plurk": {
|
||||
"errorMsg": "User Not Found!",
|
||||
"errorType": "message",
|
||||
"url": "https://www.plurk.com/{}",
|
||||
"urlMain": "https://www.plurk.com/",
|
||||
"username_claimed": "plurkoffice"
|
||||
},
|
||||
"PocketStars": {
|
||||
"errorMsg": "Join Your Favorite Adult Stars",
|
||||
"errorType": "message",
|
||||
|
|
@ -1629,6 +1706,20 @@
|
|||
"urlMain": "https://www.producthunt.com/",
|
||||
"username_claimed": "jenny"
|
||||
},
|
||||
"programming.dev": {
|
||||
"errorMsg": "Error!",
|
||||
"errorType": "message",
|
||||
"url": "https://programming.dev/u/{}",
|
||||
"urlMain": "https://programming.dev",
|
||||
"username_claimed": "pylapp"
|
||||
},
|
||||
"Pychess": {
|
||||
"errorType": "message",
|
||||
"errorMsg": "404",
|
||||
"url": "https://www.pychess.org/@/{}",
|
||||
"urlMain": "https://www.pychess.org",
|
||||
"username_claimed": "gbtami"
|
||||
},
|
||||
"PromoDJ": {
|
||||
"errorType": "status_code",
|
||||
"url": "http://promodj.com/{}",
|
||||
|
|
@ -1880,6 +1971,12 @@
|
|||
"urlMain": "https://soylentnews.org",
|
||||
"username_claimed": "adam"
|
||||
},
|
||||
"SpeakerDeck": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://speakerdeck.com/{}",
|
||||
"urlMain": "https://speakerdeck.com/",
|
||||
"username_claimed": "pylapp"
|
||||
},
|
||||
"Speedrun.com": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://speedrun.com/users/{}",
|
||||
|
|
@ -2029,14 +2126,6 @@
|
|||
"urlMain": "https://www.tnaflix.com/",
|
||||
"username_claimed": "hacker"
|
||||
},
|
||||
"TorrentGalaxy": {
|
||||
"errorMsg": "<title>TGx:Can't show details</title>",
|
||||
"errorType": "message",
|
||||
"regexCheck": "^[A-Za-z0-9]{3,15}$",
|
||||
"url": "https://torrentgalaxy.to/profile/{}",
|
||||
"urlMain": "https://torrentgalaxy.to/",
|
||||
"username_claimed": "GalaxyRG"
|
||||
},
|
||||
"TradingView": {
|
||||
"errorType": "status_code",
|
||||
"request_method": "GET",
|
||||
|
|
@ -2719,12 +2808,24 @@
|
|||
"urlMain": "https://www.toster.ru/",
|
||||
"username_claimed": "adam"
|
||||
},
|
||||
"tumblr": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://{}.tumblr.com/",
|
||||
"urlMain": "https://www.tumblr.com/",
|
||||
"username_claimed": "goku"
|
||||
},
|
||||
"uid": {
|
||||
"errorType": "status_code",
|
||||
"url": "http://uid.me/{}",
|
||||
"urlMain": "https://uid.me/",
|
||||
"username_claimed": "blue"
|
||||
},
|
||||
"write.as": {
|
||||
"errorType": "status_code",
|
||||
"url": "https://write.as/{}",
|
||||
"urlMain": "https://write.as",
|
||||
"username_claimed": "pylapp"
|
||||
},
|
||||
"xHamster": {
|
||||
"errorType": "status_code",
|
||||
"isNSFW": true,
|
||||
|
|
@ -2745,5 +2846,13 @@
|
|||
"urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social",
|
||||
"urlMain": "https://bsky.app/",
|
||||
"username_claimed": "mcuban"
|
||||
},
|
||||
"Platzi": {
|
||||
"errorType": "status_code",
|
||||
"errorCode": 404,
|
||||
"url": "https://platzi.com/p/{}/",
|
||||
"urlMain": "https://platzi.com/",
|
||||
"username_claimed": "freddier",
|
||||
"request_method": "GET"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -169,14 +169,14 @@ def multiple_usernames(username):
|
|||
|
||||
def sherlock(
|
||||
username: str,
|
||||
site_data: dict,
|
||||
site_data: dict[str, dict[str, str]],
|
||||
query_notify: QueryNotify,
|
||||
tor: bool = False,
|
||||
unique_tor: bool = False,
|
||||
dump_response: bool = False,
|
||||
proxy: Optional[str] = None,
|
||||
timeout: int = 60,
|
||||
):
|
||||
) -> dict[str, dict[str, str | QueryResult]]:
|
||||
"""Run Sherlock Analysis.
|
||||
|
||||
Checks for existence of username on various social media sites.
|
||||
|
|
@ -507,7 +507,7 @@ def sherlock(
|
|||
print("+++++++++++++++++++++")
|
||||
|
||||
# Notify caller about results of query.
|
||||
result = QueryResult(
|
||||
result: QueryResult = QueryResult(
|
||||
username=username,
|
||||
site_name=social_network,
|
||||
site_url_user=url,
|
||||
|
|
@ -727,6 +727,14 @@ def main():
|
|||
help="Disable creation of a txt file",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--ignore-exclusions",
|
||||
action="store_true",
|
||||
dest="ignore_exclusions",
|
||||
default=False,
|
||||
help="Ignore upstream exclusions (may return more false positives)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# If the user presses CTRL-C, exit gracefully without throwing errors
|
||||
|
|
@ -784,7 +792,8 @@ def main():
|
|||
try:
|
||||
if args.local:
|
||||
sites = SitesInformation(
|
||||
os.path.join(os.path.dirname(__file__), "resources/data.json")
|
||||
os.path.join(os.path.dirname(__file__), "resources/data.json"),
|
||||
honor_exclusions=False,
|
||||
)
|
||||
else:
|
||||
json_file_location = args.json_file
|
||||
|
|
@ -804,7 +813,11 @@ def main():
|
|||
head_commit_sha = pull_request_json["head"]["sha"]
|
||||
json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
|
||||
|
||||
sites = SitesInformation(json_file_location)
|
||||
sites = SitesInformation(
|
||||
data_file_path=json_file_location,
|
||||
honor_exclusions=not args.ignore_exclusions,
|
||||
do_not_exclude=args.site_list,
|
||||
)
|
||||
except Exception as error:
|
||||
print(f"ERROR: {error}")
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@ import json
|
|||
import requests
|
||||
import secrets
|
||||
|
||||
|
||||
MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
|
||||
EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
|
||||
|
||||
class SiteInformation:
|
||||
def __init__(self, name, url_home, url_username_format, username_claimed,
|
||||
information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
|
||||
|
|
@ -67,12 +71,17 @@ class SiteInformation:
|
|||
Return Value:
|
||||
Nicely formatted string to get information about this object.
|
||||
"""
|
||||
|
||||
|
||||
return f"{self.name} ({self.url_home})"
|
||||
|
||||
|
||||
class SitesInformation:
|
||||
def __init__(self, data_file_path=None):
|
||||
def __init__(
|
||||
self,
|
||||
data_file_path: str|None = None,
|
||||
honor_exclusions: bool = True,
|
||||
do_not_exclude: list[str] = [],
|
||||
):
|
||||
"""Create Sites Information Object.
|
||||
|
||||
Contains information about all supported websites.
|
||||
|
|
@ -110,7 +119,7 @@ class SitesInformation:
|
|||
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using
|
||||
# this instead of the local one is so that the user has the most up-to-date data. This prevents
|
||||
# users from creating issue about false positives which has already been fixed or having outdated data
|
||||
data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
|
||||
data_file_path = MANIFEST_URL
|
||||
|
||||
# Ensure that specified data file has correct extension.
|
||||
if not data_file_path.lower().endswith(".json"):
|
||||
|
|
@ -152,9 +161,31 @@ class SitesInformation:
|
|||
raise FileNotFoundError(f"Problem while attempting to access "
|
||||
f"data file '{data_file_path}'."
|
||||
)
|
||||
|
||||
|
||||
site_data.pop('$schema', None)
|
||||
|
||||
if honor_exclusions:
|
||||
try:
|
||||
response = requests.get(url=EXCLUSIONS_URL)
|
||||
if response.status_code == 200:
|
||||
exclusions = response.text.splitlines()
|
||||
exclusions = [exclusion.strip() for exclusion in exclusions]
|
||||
|
||||
for site in do_not_exclude:
|
||||
if site in exclusions:
|
||||
exclusions.remove(site)
|
||||
|
||||
for exclusion in exclusions:
|
||||
try:
|
||||
site_data.pop(exclusion, None)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
except Exception:
|
||||
# If there was any problem loading the exclusions, just continue without them
|
||||
print("Warning: Could not load exclusions, continuing without them.")
|
||||
honor_exclusions = False
|
||||
|
||||
self.sites = {}
|
||||
|
||||
# Add all site information from the json file to internal site list.
|
||||
|
|
@ -194,7 +225,7 @@ class SitesInformation:
|
|||
for site in self.sites:
|
||||
if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
|
||||
continue
|
||||
sites[site] = self.sites[site]
|
||||
sites[site] = self.sites[site]
|
||||
self.sites = sites
|
||||
|
||||
def site_name_list(self):
|
||||
|
|
|
|||
|
|
@ -4,6 +4,11 @@ import urllib
|
|||
import pytest
|
||||
from sherlock_project.sites import SitesInformation
|
||||
|
||||
def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
|
||||
sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
|
||||
return sites_iterable
|
||||
|
||||
@pytest.fixture()
|
||||
def sites_obj():
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
|
||||
|
|
@ -11,9 +16,7 @@ def sites_obj():
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def sites_info():
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
|
||||
sites_iterable = {site.name: site.information for site in sites_obj}
|
||||
yield sites_iterable
|
||||
yield fetch_local_manifest()
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def remote_schema():
|
||||
|
|
@ -21,3 +24,28 @@ def remote_schema():
|
|||
with urllib.request.urlopen(schema_url) as remoteschema:
|
||||
schemadat = json.load(remoteschema)
|
||||
yield schemadat
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--chunked-sites",
|
||||
action="store",
|
||||
default=None,
|
||||
help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
|
||||
)
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
if "chunked_sites" in metafunc.fixturenames:
|
||||
sites_info = fetch_local_manifest(honor_exclusions=False)
|
||||
|
||||
# Ingest and apply site selections
|
||||
site_filter: str | None = metafunc.config.getoption("--chunked-sites")
|
||||
if site_filter:
|
||||
selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
|
||||
sites_info = {
|
||||
site: data for site, data in sites_info.items()
|
||||
if site in selected_sites
|
||||
}
|
||||
|
||||
params = [{name: data} for name, data in sites_info.items()]
|
||||
ids = list(sites_info.keys())
|
||||
metafunc.parametrize("chunked_sites", params, ids=ids)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ def test_validate_manifest_against_local_schema():
|
|||
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
|
||||
json_relative: str = '../sherlock_project/resources/data.json'
|
||||
schema_relative: str = '../sherlock_project/resources/data.schema.json'
|
||||
|
||||
|
||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
||||
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,99 @@
|
|||
import pytest
|
||||
import re
|
||||
import rstr
|
||||
|
||||
from sherlock_project.sherlock import sherlock
|
||||
from sherlock_project.notify import QueryNotify
|
||||
from sherlock_project.result import QueryResult, QueryStatus
|
||||
|
||||
|
||||
FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
|
||||
FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
|
||||
FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry
|
||||
|
||||
|
||||
def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
|
||||
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
|
||||
def replace_upper_bound(match: re.Match) -> str: # type: ignore
|
||||
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
|
||||
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
|
||||
return f'{{{lower_bound},{upper_bound}}}'
|
||||
|
||||
pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
|
||||
pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
|
||||
pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
|
||||
|
||||
return pattern
|
||||
|
||||
def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
|
||||
"""Check if a site is likely to produce false positives."""
|
||||
status: QueryStatus = QueryStatus.UNKNOWN
|
||||
|
||||
for _ in range(FALSE_POSITIVE_ATTEMPTS):
|
||||
query_notify: QueryNotify = QueryNotify()
|
||||
username: str = rstr.xeger(pattern)
|
||||
|
||||
result: QueryResult | str = sherlock(
|
||||
username=username,
|
||||
site_data=sites_info,
|
||||
query_notify=query_notify,
|
||||
)[site]['status']
|
||||
|
||||
if not hasattr(result, 'status'):
|
||||
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
||||
if type(result.status) is not QueryStatus: # type: ignore
|
||||
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
||||
status = result.status # type: ignore
|
||||
|
||||
if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
|
||||
return status
|
||||
|
||||
return status
|
||||
|
||||
|
||||
def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
|
||||
"""Check if a site is likely to produce false negatives."""
|
||||
status: QueryStatus = QueryStatus.UNKNOWN
|
||||
query_notify: QueryNotify = QueryNotify()
|
||||
|
||||
result: QueryResult | str = sherlock(
|
||||
username=sites_info[site]['username_claimed'],
|
||||
site_data=sites_info,
|
||||
query_notify=query_notify,
|
||||
)[site]['status']
|
||||
|
||||
if not hasattr(result, 'status'):
|
||||
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
||||
if type(result.status) is not QueryStatus: # type: ignore
|
||||
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
||||
status = result.status # type: ignore
|
||||
|
||||
return status
|
||||
|
||||
@pytest.mark.validate_targets
|
||||
@pytest.mark.online
|
||||
class Test_All_Targets:
|
||||
|
||||
@pytest.mark.validate_targets_fp
|
||||
def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
|
||||
"""Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
|
||||
pattern: str
|
||||
for site in chunked_sites:
|
||||
try:
|
||||
pattern = chunked_sites[site]['regexCheck']
|
||||
except KeyError:
|
||||
pattern = FALSE_POSITIVE_DEFAULT_PATTERN
|
||||
|
||||
if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
|
||||
pattern = set_pattern_upper_bound(pattern)
|
||||
|
||||
result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
|
||||
assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
|
||||
|
||||
@pytest.mark.validate_targets_fn
|
||||
def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
|
||||
"""Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
|
||||
for site in chunked_sites:
|
||||
result: QueryStatus = false_negative_check(chunked_sites, site)
|
||||
assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"
|
||||
|
||||
5
tox.ini
5
tox.ini
|
|
@ -7,8 +7,6 @@ envlist =
|
|||
py312
|
||||
py311
|
||||
py310
|
||||
py39
|
||||
py38
|
||||
|
||||
[testenv]
|
||||
description = Attempt to build and install the package
|
||||
|
|
@ -16,6 +14,7 @@ deps =
|
|||
coverage
|
||||
jsonschema
|
||||
pytest
|
||||
rstr
|
||||
allowlist_externals = coverage
|
||||
commands =
|
||||
coverage run --source=sherlock_project --module pytest -v
|
||||
|
|
@ -37,7 +36,7 @@ commands =
|
|||
|
||||
[gh-actions]
|
||||
python =
|
||||
3.13: py313
|
||||
3.12: py312
|
||||
3.11: py311
|
||||
3.10: py310
|
||||
3.9: py39
|
||||
|
|
|
|||
Loading…
Reference in New Issue