diff --git a/tests/test_validate_targets.py b/tests/test_validate_targets.py index 8f2874ac..4eb7ea16 100644 --- a/tests/test_validate_targets.py +++ b/tests/test_validate_targets.py @@ -1,4 +1,5 @@ import pytest +import re import rstr from sherlock_project.sherlock import sherlock @@ -7,8 +8,23 @@ from sherlock_project.result import QueryResult, QueryStatus FALSE_POSITIVE_ATTEMPTS: int = 2 # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit +FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15 # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable) +FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$' # Used in absence of a regexCheck entry +def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str: + """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`.""" + def replace_upper_bound(match: re.Match) -> str: # type: ignore + lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore + upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore # noqa: F823 + return f'{{{lower_bound},{upper_bound}}}' + + pattern = re.sub(r'(? QueryStatus: """Check if a site is likely to produce false positives.""" status: QueryStatus = QueryStatus.UNKNOWN @@ -66,7 +82,11 @@ class Test_All_Targets: try: pattern = chunked_sites[site]['regexCheck'] except KeyError: - pattern = r'^[a-zA-Z0-9._-]{7,20}$' + pattern = FALSE_POSITIVE_DEFAULT_PATTERN + + if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0: + pattern = set_pattern_upper_bound(pattern) + result: QueryStatus = false_positive_check(chunked_sites, site, pattern) assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"