Merge pull request #2536 from sherlock-project/feature/username_fuzz
Return support for F+/F- detection via fuzzing
This commit is contained in:
commit
e09319f29f
|
|
@ -0,0 +1,64 @@
|
|||
name: Exclusions Updater
|
||||
|
||||
on:
|
||||
schedule:
|
||||
#- cron: '0 5 * * 0' # Runs at 05:00 every Sunday
|
||||
- cron: '0 5 * * *' # Runs at 05:00 every day
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
update-exclusions:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.13'
|
||||
|
||||
- name: Install Poetry
|
||||
uses: abatilo/actions-poetry@v4
|
||||
with:
|
||||
poetry-version: 'latest'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --no-interaction --with dev
|
||||
|
||||
- name: Run false positive tests
|
||||
run: |
|
||||
$(poetry env activate)
|
||||
pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
|
||||
deactivate
|
||||
|
||||
- name: Parse false positive detections by desired categories
|
||||
id: parse_detections
|
||||
run: |
|
||||
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
|
||||
| sort -u > false_positive_exclusions.txt
|
||||
grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
|
||||
| sort -u > waf_hits.txt
|
||||
|
||||
- name: Quantify and display results
|
||||
run: |
|
||||
FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
|
||||
WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
|
||||
echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
|
||||
echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
|
||||
echo ">>> WAF hits:" && cat waf_hits.txt
|
||||
|
||||
- name: Commit and push exclusions list
|
||||
if: steps.parse_detections.outputs.changed == 'true' || steps.parse_detections.outputs.changed == 'true'
|
||||
run: |
|
||||
git config user.name "Paul Pfeister (automation)"
|
||||
git config user.email "code@pfeister.dev"
|
||||
|
||||
git fetch origin exclusions || true # Allows creation of branch if deleted
|
||||
git checkout -B exclusions origin/exclusions || git checkout --orphan exclusions
|
||||
|
||||
git add false_positive_exclusions.txt
|
||||
|
||||
git commit -m "auto: Update exclusions list" || echo "No changes to commit"
|
||||
git push origin exclusions
|
||||
|
|
@ -49,10 +49,10 @@ jobs:
|
|||
macos-latest,
|
||||
]
|
||||
python-version: [
|
||||
'3.9',
|
||||
'3.10',
|
||||
'3.11',
|
||||
'3.12',
|
||||
'3.13',
|
||||
]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
|
|
|||
|
|
@ -56,6 +56,9 @@ tor = ["torrequest"]
|
|||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
jsonschema = "^4.0.0"
|
||||
rstr = "^3.2.2"
|
||||
pytest = "^8.4.2"
|
||||
pytest-xdist = "^3.8.0"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
sherlock = 'sherlock_project.sherlock:main'
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
[pytest]
|
||||
addopts = --strict-markers
|
||||
addopts = --strict-markers -m "not validate_targets"
|
||||
markers =
|
||||
online: mark tests are requiring internet access.
|
||||
validate_targets: mark tests for sweeping manifest validation (sends many requests).
|
||||
validate_targets_fp: validate_targets, false positive tests only.
|
||||
validate_targets_fn: validate_targets, false negative tests only.
|
||||
|
|
|
|||
|
|
@ -169,14 +169,14 @@ def multiple_usernames(username):
|
|||
|
||||
def sherlock(
|
||||
username: str,
|
||||
site_data: dict,
|
||||
site_data: dict[str, dict[str, str]],
|
||||
query_notify: QueryNotify,
|
||||
tor: bool = False,
|
||||
unique_tor: bool = False,
|
||||
dump_response: bool = False,
|
||||
proxy: Optional[str] = None,
|
||||
timeout: int = 60,
|
||||
):
|
||||
) -> dict[str, dict[str, str | QueryResult]]:
|
||||
"""Run Sherlock Analysis.
|
||||
|
||||
Checks for existence of username on various social media sites.
|
||||
|
|
@ -507,7 +507,7 @@ def sherlock(
|
|||
print("+++++++++++++++++++++")
|
||||
|
||||
# Notify caller about results of query.
|
||||
result = QueryResult(
|
||||
result: QueryResult = QueryResult(
|
||||
username=username,
|
||||
site_name=social_network,
|
||||
site_url_user=url,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,11 @@ import urllib
|
|||
import pytest
|
||||
from sherlock_project.sites import SitesInformation
|
||||
|
||||
def fetch_local_manifest() -> dict[str, dict[str, str]]:
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
|
||||
sites_iterable = {site.name: site.information for site in sites_obj}
|
||||
return sites_iterable
|
||||
|
||||
@pytest.fixture()
|
||||
def sites_obj():
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
|
||||
|
|
@ -11,9 +16,7 @@ def sites_obj():
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def sites_info():
|
||||
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
|
||||
sites_iterable = {site.name: site.information for site in sites_obj}
|
||||
yield sites_iterable
|
||||
yield fetch_local_manifest()
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def remote_schema():
|
||||
|
|
@ -21,3 +24,10 @@ def remote_schema():
|
|||
with urllib.request.urlopen(schema_url) as remoteschema:
|
||||
schemadat = json.load(remoteschema)
|
||||
yield schemadat
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
if "chunked_sites" in metafunc.fixturenames:
|
||||
sites_info = fetch_local_manifest()
|
||||
params = [{name: data} for name, data in sites_info.items()]
|
||||
ids = list(sites_info.keys())
|
||||
metafunc.parametrize("chunked_sites", params, ids=ids)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ def test_validate_manifest_against_local_schema():
|
|||
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
|
||||
json_relative: str = '../sherlock_project/resources/data.json'
|
||||
schema_relative: str = '../sherlock_project/resources/data.schema.json'
|
||||
|
||||
|
||||
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
|
||||
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,99 @@
|
|||
import pytest
|
||||
import re
|
||||
import rstr
|
||||
|
||||
from sherlock_project.sherlock import sherlock
|
||||
from sherlock_project.notify import QueryNotify
|
||||
from sherlock_project.result import QueryResult, QueryStatus
|
||||
|
||||
|
||||
FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
|
||||
FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
|
||||
FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry
|
||||
|
||||
|
||||
def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
|
||||
"""Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
|
||||
def replace_upper_bound(match: re.Match) -> str: # type: ignore
|
||||
lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
|
||||
upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823
|
||||
return f'{{{lower_bound},{upper_bound}}}'
|
||||
|
||||
pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
|
||||
pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
|
||||
pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
|
||||
|
||||
return pattern
|
||||
|
||||
def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
|
||||
"""Check if a site is likely to produce false positives."""
|
||||
status: QueryStatus = QueryStatus.UNKNOWN
|
||||
|
||||
for _ in range(FALSE_POSITIVE_ATTEMPTS):
|
||||
query_notify: QueryNotify = QueryNotify()
|
||||
username: str = rstr.xeger(pattern)
|
||||
|
||||
result: QueryResult | str = sherlock(
|
||||
username=username,
|
||||
site_data=sites_info,
|
||||
query_notify=query_notify,
|
||||
)[site]['status']
|
||||
|
||||
if not hasattr(result, 'status'):
|
||||
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
||||
if type(result.status) is not QueryStatus: # type: ignore
|
||||
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
||||
status = result.status # type: ignore
|
||||
|
||||
if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
|
||||
return status
|
||||
|
||||
return status
|
||||
|
||||
|
||||
def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
|
||||
"""Check if a site is likely to produce false negatives."""
|
||||
status: QueryStatus = QueryStatus.UNKNOWN
|
||||
query_notify: QueryNotify = QueryNotify()
|
||||
|
||||
result: QueryResult | str = sherlock(
|
||||
username=sites_info[site]['username_claimed'],
|
||||
site_data=sites_info,
|
||||
query_notify=query_notify,
|
||||
)[site]['status']
|
||||
|
||||
if not hasattr(result, 'status'):
|
||||
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
|
||||
if type(result.status) is not QueryStatus: # type: ignore
|
||||
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
|
||||
status = result.status # type: ignore
|
||||
|
||||
return status
|
||||
|
||||
@pytest.mark.validate_targets
|
||||
@pytest.mark.online
|
||||
class Test_All_Targets:
|
||||
|
||||
@pytest.mark.validate_targets_fp
|
||||
def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
|
||||
"""Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
|
||||
pattern: str
|
||||
for site in chunked_sites:
|
||||
try:
|
||||
pattern = chunked_sites[site]['regexCheck']
|
||||
except KeyError:
|
||||
pattern = FALSE_POSITIVE_DEFAULT_PATTERN
|
||||
|
||||
if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
|
||||
pattern = set_pattern_upper_bound(pattern)
|
||||
|
||||
result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
|
||||
assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
|
||||
|
||||
@pytest.mark.validate_targets_fn
|
||||
def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
|
||||
"""Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
|
||||
for site in chunked_sites:
|
||||
result: QueryStatus = false_negative_check(chunked_sites, site)
|
||||
assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"
|
||||
|
||||
5
tox.ini
5
tox.ini
|
|
@ -7,8 +7,6 @@ envlist =
|
|||
py312
|
||||
py311
|
||||
py310
|
||||
py39
|
||||
py38
|
||||
|
||||
[testenv]
|
||||
description = Attempt to build and install the package
|
||||
|
|
@ -16,6 +14,7 @@ deps =
|
|||
coverage
|
||||
jsonschema
|
||||
pytest
|
||||
rstr
|
||||
allowlist_externals = coverage
|
||||
commands =
|
||||
coverage run --source=sherlock_project --module pytest -v
|
||||
|
|
@ -37,7 +36,7 @@ commands =
|
|||
|
||||
[gh-actions]
|
||||
python =
|
||||
3.13: py313
|
||||
3.12: py312
|
||||
3.11: py311
|
||||
3.10: py310
|
||||
3.9: py39
|
||||
|
|
|
|||
Loading…
Reference in New Issue