test: prepare false positive detection base

This commit is contained in:
Paul Pfeister 2025-09-14 00:39:35 -04:00
parent 5113dcfb36
commit ca094d8264
No known key found for this signature in database
GPG Key ID: 70D33A96CBD7A994
7 changed files with 76 additions and 8 deletions

View File

@ -56,6 +56,9 @@ tor = ["torrequest"]
[tool.poetry.group.dev.dependencies]
jsonschema = "^4.0.0"
rstr = "^3.2.2"
pytest = "^8.4.2"
pytest-xdist = "^3.8.0"
[tool.poetry.scripts]
sherlock = 'sherlock_project.sherlock:main'

View File

@ -1,4 +1,5 @@
[pytest]
addopts = --strict-markers
addopts = --strict-markers -m "not validate_targets"
markers =
online: mark tests are requiring internet access.
validate_targets: mark tests for sweeping manifest validation (sends many requests).

View File

@ -169,14 +169,14 @@ def multiple_usernames(username):
def sherlock(
username: str,
site_data: dict,
site_data: dict[str, dict[str, str]],
query_notify: QueryNotify,
tor: bool = False,
unique_tor: bool = False,
dump_response: bool = False,
proxy: Optional[str] = None,
timeout: int = 60,
):
) -> dict[str, dict[str, str | QueryResult]]:
"""Run Sherlock Analysis.
Checks for existence of username on various social media sites.
@ -507,7 +507,7 @@ def sherlock(
print("+++++++++++++++++++++")
# Notify caller about results of query.
result = QueryResult(
result: QueryResult = QueryResult(
username=username,
site_name=social_network,
site_url_user=url,

View File

@ -4,6 +4,11 @@ import urllib
import pytest
from sherlock_project.sites import SitesInformation
def fetch_local_manifest() -> dict[str, dict[str, str]]:
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
sites_iterable = {site.name: site.information for site in sites_obj}
return sites_iterable
@pytest.fixture()
def sites_obj():
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
@ -11,9 +16,7 @@ def sites_obj():
@pytest.fixture(scope="session")
def sites_info():
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
sites_iterable = {site.name: site.information for site in sites_obj}
yield sites_iterable
yield fetch_local_manifest()
@pytest.fixture(scope="session")
def remote_schema():
@ -21,3 +24,10 @@ def remote_schema():
with urllib.request.urlopen(schema_url) as remoteschema:
schemadat = json.load(remoteschema)
yield schemadat
def pytest_generate_tests(metafunc):
if "chunked_sites" in metafunc.fixturenames:
sites_info = fetch_local_manifest()
params = [{name: data} for name, data in sites_info.items()]
ids = list(sites_info.keys())
metafunc.parametrize("chunked_sites", params, ids=ids)

View File

@ -7,7 +7,7 @@ def test_validate_manifest_against_local_schema():
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
json_relative: str = '../sherlock_project/resources/data.json'
schema_relative: str = '../sherlock_project/resources/data.schema.json'
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)

View File

@ -0,0 +1,53 @@
import pytest
import rstr
from sherlock_project.sherlock import sherlock
from sherlock_project.notify import QueryNotify
from sherlock_project.result import QueryResult, QueryStatus
FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
"""Check if a site is likely to produce false positives."""
attempts: int = 1
status: QueryStatus = QueryStatus.UNKNOWN
for _ in range(attempts):
query_notify = QueryNotify()
username: str = rstr.xeger(pattern)
result: QueryResult | str = sherlock(
username=username,
site_data=sites_info,
query_notify=query_notify,
)[site]['status']
if not hasattr(result, 'status'):
raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
if type(result.status) is not QueryStatus: # type: ignore
raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
status = result.status # type: ignore
if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
return status
return status
@pytest.mark.validate_targets
@pytest.mark.online
class Test_All_Targets:
def test_manifest_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
"""Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
pattern: str
for site in chunked_sites:
try:
pattern = chunked_sites[site]['regexCheck']
except KeyError:
pattern = r'^[a-zA-Z0-9._-]{7,20}$'
result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"

View File

@ -16,6 +16,7 @@ deps =
coverage
jsonschema
pytest
rstr
allowlist_externals = coverage
commands =
coverage run --source=sherlock_project --module pytest -v