From ca094d8264b43b184bbad774d12ff08954c6e5ba Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 14 Sep 2025 00:39:35 -0400 Subject: [PATCH] test: prepare false positive detection base --- pyproject.toml | 3 ++ pytest.ini | 3 +- sherlock_project/sherlock.py | 6 ++-- tests/conftest.py | 16 ++++++++-- tests/test_manifest.py | 2 +- tests/test_validate_targets.py | 53 ++++++++++++++++++++++++++++++++++ tox.ini | 1 + 7 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 tests/test_validate_targets.py diff --git a/pyproject.toml b/pyproject.toml index 069cb9d3..76a6fab3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,9 @@ tor = ["torrequest"] [tool.poetry.group.dev.dependencies] jsonschema = "^4.0.0" +rstr = "^3.2.2" +pytest = "^8.4.2" +pytest-xdist = "^3.8.0" [tool.poetry.scripts] sherlock = 'sherlock_project.sherlock:main' diff --git a/pytest.ini b/pytest.ini index bc1df7de..e05d3088 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] -addopts = --strict-markers +addopts = --strict-markers -m "not validate_targets" markers = online: mark tests are requiring internet access. + validate_targets: mark tests for sweeping manifest validation (sends many requests). diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 4e80d31c..e3786c90 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -169,14 +169,14 @@ def multiple_usernames(username): def sherlock( username: str, - site_data: dict, + site_data: dict[str, dict[str, str]], query_notify: QueryNotify, tor: bool = False, unique_tor: bool = False, dump_response: bool = False, proxy: Optional[str] = None, timeout: int = 60, -): +) -> dict[str, dict[str, str | QueryResult]]: """Run Sherlock Analysis. Checks for existence of username on various social media sites. @@ -507,7 +507,7 @@ def sherlock( print("+++++++++++++++++++++") # Notify caller about results of query. - result = QueryResult( + result: QueryResult = QueryResult( username=username, site_name=social_network, site_url_user=url, diff --git a/tests/conftest.py b/tests/conftest.py index 51c90814..75aa25e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,11 @@ import urllib import pytest from sherlock_project.sites import SitesInformation +def fetch_local_manifest() -> dict[str, dict[str, str]]: + sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) + sites_iterable = {site.name: site.information for site in sites_obj} + return sites_iterable + @pytest.fixture() def sites_obj(): sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) @@ -11,9 +16,7 @@ def sites_obj(): @pytest.fixture(scope="session") def sites_info(): - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) - sites_iterable = {site.name: site.information for site in sites_obj} - yield sites_iterable + yield fetch_local_manifest() @pytest.fixture(scope="session") def remote_schema(): @@ -21,3 +24,10 @@ def remote_schema(): with urllib.request.urlopen(schema_url) as remoteschema: schemadat = json.load(remoteschema) yield schemadat + +def pytest_generate_tests(metafunc): + if "chunked_sites" in metafunc.fixturenames: + sites_info = fetch_local_manifest() + params = [{name: data} for name, data in sites_info.items()] + ids = list(sites_info.keys()) + metafunc.parametrize("chunked_sites", params, ids=ids) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 5c47fbb8..b73e9240 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -7,7 +7,7 @@ def test_validate_manifest_against_local_schema(): """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" json_relative: str = '../sherlock_project/resources/data.json' schema_relative: str = '../sherlock_project/resources/data.schema.json' - + json_path: str = os.path.join(os.path.dirname(__file__), json_relative) schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py new file mode 100644 index 00000000..a71f3b1e --- /dev/null +++ b/tests/test_validate_targets.py @@ -0,0 +1,53 @@ +import pytest +import rstr + +from sherlock_project.sherlock import sherlock +from sherlock_project.notify import QueryNotify +from sherlock_project.result import QueryResult, QueryStatus + + +FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit + + +def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus: + """Check if a site is likely to produce false positives.""" + attempts: int = 1 + status: QueryStatus = QueryStatus.UNKNOWN + + for _ in range(attempts): + query_notify = QueryNotify() + username: str = rstr.xeger(pattern) + + result: QueryResult | str = sherlock( + username=username, + site_data=sites_info, + query_notify=query_notify, + )[site]['status'] + + if not hasattr(result, 'status'): + raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}") + if type(result.status) is not QueryStatus: # type: ignore + raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore + status = result.status # type: ignore + + if status in (QueryStatus.AVAILABLE, QueryStatus.WAF): + return status + + return status + + +@pytest.mark.validate_targets +@pytest.mark.online +class Test_All_Targets: + + def test_manifest_false_pos(self, chunked_sites: dict[str, dict[str, str]]): + """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" + pattern: str + for site in chunked_sites: + try: + pattern = chunked_sites[site]['regexCheck'] + except KeyError: + pattern = r'^[a-zA-Z0-9._-]{7,20}$' + result: QueryStatus = false_positive_check(chunked_sites, site, pattern) + assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}" + diff --git a/tox.ini b/tox.ini index 1e9a47de..da91d7aa 100644 --- a/tox.ini +++ b/tox.ini @@ -16,6 +16,7 @@ deps = coverage jsonschema pytest + rstr allowlist_externals = coverage commands = coverage run --source=sherlock_project --module pytest -v