Merge pull request #2758 from Aaditya-Chunekar/patch-2

Add Credly data to JSON resource
Merge pull request #2636 from simplyNour/Bug/fix-gradle-false-pos-test-failure
2025-12-29 19:54:44 -08:00 · 2025-12-29 18:56:30 -08:00 · 2025-11-14 09:27:07 +05:30 · 2025-10-29 16:34:10 -07:00 · 2025-10-28 20:47:32 -07:00 · 2025-10-28 20:39:53 -05:00
33 changed files with 1902 additions and 3404 deletions
--- a/.actor/Dockerfile
+++ b/.actor/Dockerfile
@ -0,0 +1,19 @@
+FROM sherlock/sherlock as sherlock
+
+# Install Node.js
+RUN apt-get update; apt-get install curl gpg -y
+RUN mkdir -p /etc/apt/keyrings
+RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
+RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
+RUN apt-get update && apt-get install -y curl bash git jq jo xz-utils nodejs
+
+# Install Apify CLI (node.js) for the Actor Runtime
+RUN npm -g install apify-cli
+
+# Install Dependencies for the Actor Shell Script
+RUN apt-get update && apt-get install -y bash jq jo xz-utils nodejs
+
+# Copy Actor dir with the actorization shell script
+COPY .actor/ .actor
+
+ENTRYPOINT [".actor/actor.sh"]
--- a/.actor/README.md
+++ b/.actor/README.md
@ -0,0 +1,93 @@
+# Sherlock Actor on Apify
+
+[![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock)
+
+This Actor wraps the [Sherlock Project](https://sherlockproject.xyz/) to provide serverless username reconnaissance across social networks in the cloud. It helps you find usernames across multiple social media platforms without installing and running the tool locally.
+
+## What are Actors?
+[Actors](https://docs.apify.com/platform/actors?fpr=sherlock) are serverless microservices running on the [Apify Platform](https://apify.com/?fpr=sherlock). They are based on the [Actor SDK](https://docs.apify.com/sdk/js?fpr=sherlock) and can be found in the [Apify Store](https://apify.com/store?fpr=sherlock). Learn more about Actors in the [Apify Whitepaper](https://whitepaper.actor?fpr=sherlock).
+
+## Usage
+
+### Apify Console
+
+1. Go to the Apify Actor page
+2. Click "Run"
+3. In the input form, fill in **Username(s)** to search for
+4. The Actor will run and produce its outputs in the default datastore
+
+
+### Apify CLI
+
+```bash
+apify call YOUR_USERNAME/sherlock --input='{
+  "usernames": ["johndoe", "janedoe"]
+}'
+```
+
+### Using Apify API
+
+```bash
+curl --request POST \
+  --url "https://api.apify.com/v2/acts/YOUR_USERNAME~sherlock/run" \
+  --header 'Content-Type: application/json' \
+  --header 'Authorization: Bearer YOUR_API_TOKEN' \
+  --data '{
+  "usernames": ["johndoe", "janedoe"],
+  }
+}'
+```
+
+## Input Parameters
+
+The Actor accepts a JSON schema with the following structure:
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `usernames` | array | Yes | - | List of usernames to search for |
+| `usernames[]` | string | Yes | "json" | Username to search for |
+
+
+### Example Input
+
+```json
+{
+  "usernames": ["techuser", "designuser"],
+}
+```
+
+## Output
+
+The Actor provides three types of outputs:
+
+### Dataset Record*
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `username` | string | Yes | Username the search was conducted for |
+| `links` | array | Yes | Array with found links to the social media |
+| `links[]`| string | No | URL to the account
+
+### Example Dataset Item (JSON)
+
+```json
+{
+  "username": "johndoe",
+  "links": [
+    "https://github.com/johndoe" 
+  ]
+}
+```
+
+## Performance & Resources
+
+- **Memory Requirements**:
+  - Minimum: 512 MB RAM
+  - Recommended: 1 GB RAM for multiple usernames
+- **Processing Time**:
+  - Single username: ~1-2 minutes
+  - Multiple usernames: 2-5 minutes
+  - Varies based on number of sites checked and response times
+
+
+For more help, check the [Sherlock Project documentation](https://github.com/sherlock-project/sherlock) or raise an issue in the Actor's repository.
--- a/.actor/actor.json
+++ b/.actor/actor.json
@ -0,0 +1,13 @@
+{
+  "actorSpecification": 1,
+  "name": "sherlock",
+  "version": "0.0",
+  "buildTag": "latest",
+  "environmentVariables": {},
+  "dockerFile": "./Dockerfile", 
+  "dockerContext": "../",
+  "input": "./input_schema.json",
+  "storages": {
+    "dataset": "./dataset_schema.json"
+  }
+}
--- a/.actor/actor.sh
+++ b/.actor/actor.sh
@ -0,0 +1,14 @@
+#!/bin/bash
+INPUT=`apify actor:get-input | jq -r .usernames[] | xargs echo`
+echo "INPUT: $INPUT"
+
+sherlock $INPUT
+
+for username in $INPUT; do
+  # escape the special meaning leading characters 
+  # https://github.com/jpmens/jo/blob/master/jo.md#description
+  safe_username=$(echo $username | sed 's/^@/\\@/' | sed 's/^:/\\:/' | sed 's/%/\\%/')
+  echo "pushing results for username: $username, content:"
+  cat $username.txt
+  sed '$d' $username.txt | jo -a | jo username=$safe_username links:=- | apify actor:push-data
+done
--- a/.actor/dataset_schema.json
+++ b/.actor/dataset_schema.json
@ -0,0 +1,45 @@
+{
+    "actorSpecification": 1,
+    "fields":{
+      "title": "Sherlock actor input",
+      "description": "This is actor input schema",
+      "type": "object",
+      "schemaVersion": 1,
+      "properties": {
+        "links": {
+          "title": "Links to accounts",
+          "type": "array",
+          "description": "A list of social media accounts found for the uername"
+        },
+        "username": {
+          "title": "Lookup username",
+          "type": "string",
+          "description": "Username the lookup was performed for"
+        }
+      },
+      "required": [
+        "username", 
+        "links"
+      ]
+    },
+    "views": {
+        "overview": {
+            "title": "Overview",
+            "transformation": {
+              "fields": [
+                "username",
+                "links"
+              ],
+            },
+            "display": {
+               "component": "table",
+               "links": {
+                 "label": "Links"
+               },
+               "username":{
+                 "label": "Username"
+               }
+            }
+        }
+    }
+}
--- a/.actor/input_schema.json
+++ b/.actor/input_schema.json
@ -0,0 +1,18 @@
+{
+  "title": "Sherlock actor input",
+  "description": "This is actor input schema",
+  "type": "object",
+  "schemaVersion": 1,
+  "properties": {
+    "usernames": {
+      "title": "Usernames to hunt down",
+      "type": "array",
+      "description": "A list of usernames to be checked for existence across social media",
+      "editor": "stringList",
+      "prefill": ["johndoe"]
+    }
+  },
+  "required": [
+    "usernames"
+  ]
+}
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -1,5 +1,5 @@
 ### REPOSITORY
-/.github/CODEOWNERS @sdushantha
+/.github/CODEOWNERS @sdushantha @ppfeister
 /.github/FUNDING.yml @sdushantha
 /LICENSE @sdushantha

--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -19,6 +19,15 @@ body:
        - Other (indicate below)
    validations:
      required: true
+  - type: input
+    id: package-version
+    attributes:
+      label: Package version
+      description: |
+        Knowing the version of the package you are using can help us diagnose your issue more quickly.
+        You can find the version by running `sherlock --version`.
+    validations:
+      required: true
  - type: textarea
    id: description
    attributes:
--- a/.github/workflows/exclusions.yml
+++ b/.github/workflows/exclusions.yml
@ -0,0 +1,89 @@
+name: Exclusions Updater
+
+on:
+  schedule:
+    #- cron: '0 5 * * 0'  # Runs at 05:00 every Sunday
+    - cron: '0 5 * * *' # Runs at 05:00 every day
+  workflow_dispatch:
+
+jobs:
+  update-exclusions:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.13'
+
+      - name: Install Poetry
+        uses: abatilo/actions-poetry@v4
+        with:
+          poetry-version: 'latest'
+
+      - name: Install dependencies
+        run: |
+          poetry install --no-interaction --with dev
+
+      - name: Run false positive tests
+        run: |
+          $(poetry env activate)
+          pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
+          deactivate
+
+      - name: Parse false positive detections by desired categories
+        run: |
+          grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
+            | sort -u > false_positive_exclusions.txt
+          grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
+            | sort -u > waf_hits.txt
+
+      - name: Detect if exclusions list changed
+        id: detect_changes
+        run: |
+          git fetch origin exclusions || true
+
+          if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
+            # If the exclusions branch and file exist, compare
+            if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
+              echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
+            else
+              echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
+            fi
+          else
+            # If the exclusions branch or file do not exist, treat as changed
+            echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Quantify and display results
+        run: |
+          FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
+          WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
+          echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
+          echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
+          echo ">>> WAF hits:" && cat waf_hits.txt
+
+      - name: Commit and push exclusions list
+        if: steps.detect_changes.outputs.exclusions_changed == 'true'
+        run: |
+          git config user.name "Paul Pfeister (automation)"
+          git config user.email "code@pfeister.dev"
+
+          mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
+
+          git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
+          git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
+
+          git fetch origin exclusions || true # Allows creation of branch if deleted
+          git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
+
+          git stash pop || true
+
+          mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
+
+          git rm -f false_positive_exclusions.txt.tmp || true
+          git add false_positive_exclusions.txt
+          git commit -m "auto: update exclusions list" || echo "No changes to commit"
+          git push origin exclusions
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@ -11,6 +11,7 @@ on:
      - '**/*.py'
      - '**/*.ini'
      - '**/*.toml'
+      - 'Dockerfile'
  push:
    branches:
      - master
@ -21,11 +22,13 @@ on:
      - '**/*.py'
      - '**/*.ini'
      - '**/*.toml'
+      - 'Dockerfile'

 jobs:
  tox-lint:
-    # Linting is ran through tox to ensure that the same linter is used by local runners
    runs-on: ubuntu-latest
+    # Linting is ran through tox to ensure that the same linter
+    # is used by local runners
    steps:
      - uses: actions/checkout@v4
      - name: Set up linting environment
@ -41,7 +44,8 @@ jobs:
  tox-matrix:
    runs-on: ${{ matrix.os }}
    strategy:
-      fail-fast: false # We want to know what specicic versions it fails on
+      # We want to know what specicic versions it fails on
+      fail-fast: false
      matrix:
        os: [
          ubuntu-latest,
@ -49,11 +53,10 @@ jobs:
          macos-latest,
        ]
        python-version: [
-          '3.8',
-          '3.9',
          '3.10',
          '3.11',
          '3.12',
+          '3.13',
        ]
    steps:
      - uses: actions/checkout@v4
@ -68,3 +71,22 @@ jobs:
          pip install tox-gh-actions
      - name: Run tox
        run: tox
+  docker-build-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Get version from pyproject.toml
+        id: get-version
+        run: |
+          VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
+          echo "version=$VERSION" >> $GITHUB_OUTPUT
+      - name: Build Docker image
+        run: |
+          docker build \
+            --build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
+            -t sherlock-test:latest .
+      - name: Test Docker image runs
+        run: docker run --rm sherlock-test:latest --version
--- a/.github/workflows/validate_modified_targets.yml
+++ b/.github/workflows/validate_modified_targets.yml
@ -0,0 +1,126 @@
+name: Modified Target Validation
+
+on:
+  pull_request_target:
+    branches:
+      - master
+    paths:
+      - "sherlock_project/resources/data.json"
+
+jobs:
+  validate-modified-targets:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v5
+        with:
+          # Checkout the base branch but fetch all history to avoid a second fetch call
+          ref: ${{ github.base_ref }}
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.13"
+
+      - name: Install Poetry
+        uses: abatilo/actions-poetry@v4
+        with:
+          poetry-version: "latest"
+
+      - name: Install dependencies
+        run: |
+          poetry install --no-interaction --with dev
+
+      - name: Prepare JSON versions for comparison
+        run: |
+          # Fetch only the PR's branch head (single network call in this step)
+          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr
+
+          # Find the merge-base commit between the target branch and the PR branch
+          MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr)
+          echo "Comparing PR head against merge-base commit: $MERGE_BASE"
+
+          # Safely extract the file from the PR's head and the merge-base commit
+          git show pr:sherlock_project/resources/data.json > data.json.head
+          git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base
+
+          # CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR
+          # This ensures that pytest runs against the new, updated file.
+          cp data.json.head sherlock_project/resources/data.json
+
+      - name: Discover modified targets
+        id: discover-modified
+        run: |
+          CHANGED=$(
+            python - <<'EOF'
+          import json
+          import sys
+          try:
+              with open("data.json.base") as f: base = json.load(f)
+              with open("data.json.head") as f: head = json.load(f)
+          except FileNotFoundError as e:
+              print(f"Error: Could not find {e.filename}", file=sys.stderr)
+              sys.exit(1)
+          except json.JSONDecodeError as e:
+              print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr)
+              sys.exit(1)
+
+          changed = []
+          for k, v in head.items():
+              if k not in base or base[k] != v:
+                  changed.append(k)
+
+          print(",".join(sorted(changed)))
+          EOF
+          )
+
+          # Preserve changelist
+          echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
+          echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
+
+      - name: Validate remote manifest against local schema
+        if: steps.discover-modified.outputs.changed_targets != ''
+        run: |
+          poetry run pytest tests/test_manifest.py::test_validate_manifest_against_local_schema
+
+      # --- The rest of the steps below are unchanged ---
+
+      - name: Validate modified targets
+        if: steps.discover-modified.outputs.changed_targets != ''
+        continue-on-error: true
+        run: |
+          poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
+            --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
+            --junitxml=validation_results.xml
+
+      - name: Prepare validation summary
+        if: steps.discover-modified.outputs.changed_targets != ''
+        id: prepare-summary
+        run: |
+          summary=$(
+            poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
+          )
+          echo "$summary" > validation_summary.md
+
+      - name: Announce validation results
+        if: steps.discover-modified.outputs.changed_targets != ''
+        uses: actions/github-script@v8
+        with:
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('validation_summary.md', 'utf8');
+            await github.rest.issues.createComment({
+              issue_number: context.payload.pull_request.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: body,
+            });
+
+      - name: This step shows as ran when no modifications are found
+        if: steps.discover-modified.outputs.changed_targets == ''
+        run: |
+          echo "No modified targets found"
--- a/7
+++ b/7
@ -2,13 +2,12 @@
  # 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
  # 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
  # 3. Build image with BOTH latest and version tags
-    # i.e. `docker build -t sherlock/sherlock:0.15.0 -t sherlock/sherlock:latest .`
+    # i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`

-FROM python:3.12-slim-bullseye as build
+FROM python:3.12-slim-bullseye AS build
 WORKDIR /sherlock

-RUN apt-get update \
-  pip3 install --no-cache-dir --upgrade pip
+RUN pip3 install --no-cache-dir --upgrade pip

 FROM python:3.12-slim-bullseye
 WORKDIR /sherlock
--- a/devel/site-list.py
+++ b/devel/site-list.py
@ -1,36 +1,45 @@
 #!/usr/bin/env python
 # This module generates the listing of supported sites which can be found in
-# sites.md. It also organizes all the sites in alphanumeric order
+# sites.mdx. It also organizes all the sites in alphanumeric order
 import json
 import os

+DATA_REL_URI: str = "sherlock_project/resources/data.json"
+
+DEFAULT_ENCODING = "utf-8"
+
 # Read the data.json file
-with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
+with open(DATA_REL_URI, "r", encoding=DEFAULT_ENCODING) as data_file:
    data: dict = json.load(data_file)

 # Removes schema-specific keywords for proper processing
-social_networks: dict = dict(data)
+social_networks = data.copy()
 social_networks.pop('$schema', None)

 # Sort the social networks in alphanumeric order
-social_networks: list = sorted(social_networks.items())
+social_networks = sorted(social_networks.items())

 # Make output dir where the site list will be written
 os.mkdir("output")

-# Write the list of supported sites to sites.md
-with open("output/sites.mdx", "w") as site_file:
-    site_file.write("---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n")
+# Write the list of supported sites to sites.mdx
+with open("output/sites.mdx", "w", encoding=DEFAULT_ENCODING) as site_file:
+    site_file.write("---\n")
+    site_file.write("title: 'List of supported sites'\n")
+    site_file.write("sidebarTitle: 'Supported sites'\n")
+    site_file.write("icon: 'globe'\n")
+    site_file.write("description: 'Sherlock currently supports **400+** sites'\n")
+    site_file.write("---\n\n")
+
    for social_network, info in social_networks:
        url_main = info["urlMain"]
        is_nsfw = "**(NSFW)**" if info.get("isNSFW") else ""
        site_file.write(f"1. [{social_network}]({url_main}) {is_nsfw}\n")

 # Overwrite the data.json file with sorted data
-with open("sherlock/resources/data.json", "w") as data_file:
+with open(DATA_REL_URI, "w", encoding=DEFAULT_ENCODING) as data_file:
    sorted_data = json.dumps(data, indent=2, sort_keys=True)
    data_file.write(sorted_data)
-    data_file.write("\n")
+    data_file.write("\n")  # Keep the newline after writing data

 print("Finished updating supported site listing!")
-
--- a/devel/summarize_site_validation.py
+++ b/devel/summarize_site_validation.py
@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# This module summarizes the results of site validation tests queued by
+# workflow validate_modified_targets for presentation in Issue comments.
+
+from defusedxml import ElementTree as ET
+import sys
+from pathlib import Path
+
+def summarize_junit_xml(xml_path: Path) -> str:
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+    suite = root.find('testsuite')
+
+    pass_message: str = ":heavy_check_mark: &nbsp; Pass"
+    fail_message: str = ":x: &nbsp; Fail"
+
+    if suite is None:
+        raise ValueError("Invalid JUnit XML: No testsuite found")
+
+    summary_lines: list[str] = []
+    summary_lines.append("#### Automatic validation of changes\n")
+    summary_lines.append("| Target | F+ Check | F- Check |")
+    summary_lines.append("|---|---|---|")
+
+    failures = int(suite.get('failures', 0))
+    errors_detected: bool = False
+
+    results: dict[str, dict[str, str]] = {}
+
+    for testcase in suite.findall('testcase'):
+        test_name = testcase.get('name').split('[')[0]
+        site_name = testcase.get('name').split('[')[1].rstrip(']')
+        failure = testcase.find('failure')
+        error = testcase.find('error')
+
+        if site_name not in results:
+            results[site_name] = {}
+
+        if test_name == "test_false_neg":
+            results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
+        elif test_name == "test_false_pos":
+            results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
+
+        if error is not None:
+            errors_detected = True
+
+    for result in results:
+        summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
+
+    if failures > 0:
+        summary_lines.append("\n___\n" +
+            "\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
+            " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
+
+    if errors_detected:
+        summary_lines.append("\n___\n" +
+            "\n**Errors were detected during validation. Please review the workflow logs.**")
+
+    return "\n".join(summary_lines)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: summarize_site_validation.py <junit-xml-file>")
+        sys.exit(1)
+
+    xml_path: Path = Path(sys.argv[1])
+    if not xml_path.is_file():
+        print(f"Error: File '{xml_path}' does not exist.")
+        sys.exit(1)
+
+    summary: str = summarize_junit_xml(xml_path)
+    print(summary)
--- a/docs/README.md
+++ b/docs/README.md
@ -1,6 +1,6 @@
-<p align=center>
+<p align="center">
  <br>
-  <a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png"/></a>
+  <a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
  <br>
  <span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
  <br>
@ -15,25 +15,27 @@
 </p>

 <p align="center">
-<img width="70%" height="70%" src="images/demo.png"/>
-</a>
+<img width="70%" height="70%" src="images/demo.png" alt="demo"/>
 </p>


 ## Installation

+> [!WARNING]  
+> Packages for ParrotOS and Ubuntu 24.04, maintained by a third party, appear to be __broken__.  
+> Users of these systems should defer to pipx/pip or Docker.

-| | Command | Notes |
-| - | - | - |
-| PyPI | `pipx install sherlock-project` | `pip` may be used in place of `pipx` |
-| Docker | `docker pull sherlock/sherlock` | |
-| Debian family | `apt install sherlock` | Kali, Parrot, Debian Testing and Sid |
-| BlackArch | `pacman -S sherlock` |  |
-| Homebrew | `brew install sherlock` | |
+| Method | Notes |
+| - | - |
+| `pipx install sherlock-project` | `pip` may be used in place of `pipx` |
+| `docker run -it --rm sherlock/sherlock` |
+| `dnf install sherlock-project` | |
+
+Community-maintained packages are available for Debian (>= 13), Ubuntu (>= 22.10), Homebrew, Kali, and BlackArch. These packages are not directly supported or maintained by the Sherlock Project.

 See all alternative installation methods [here](https://sherlockproject.xyz/installation)

-## Usage
+## General usage

 To search for only one user:
 ```bash
@ -95,15 +97,35 @@ optional arguments:
  --local, -l           Force the use of the local data.json file.
  --nsfw                Include checking of NSFW sites from default list.
 ```
+## Apify Actor Usage [![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock)
+
+<a href="https://apify.com/netmilk/sherlock?fpr=sherlock"><img src="https://apify.com/ext/run-on-apify.png" alt="Run Sherlock Actor on Apify" width="176" height="39" /></a>
+
+You can run Sherlock in the cloud without installation using the [Sherlock Actor](https://apify.com/netmilk/sherlock?fpr=sherlock) on [Apify](https://apify.com?fpr=sherlock) free of charge.
+
+``` bash
+$ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
+[{
+  "username": "user123",
+  "links": [
+    "https://www.1337x.to/user/user123/",
+    ...
+  ]
+}]
+```
+
+Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).

 ## Credits

 Thank you to everyone who has contributed to Sherlock! ❤️

 <a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
-  <img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" noZoom />
+  <img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
 </a>

+## Star History
+
 <picture>
  <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date&theme=dark" />
  <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=sherlock-project/sherlock&type=Date" />
--- a/docs/removed-sites.md
+++ b/docs/removed-sites.md
@ -84,22 +84,6 @@ As of 2020-02-23, all usernames are reported as not existing.
  },
 ```

-## Fanpop
-
-As of 2020-02-23, all usernames are reported as not existing.
-
-```json
-  "fanpop": {
-    "errorType": "response_url",
-    "errorUrl": "http://www.fanpop.com/",
-    "rank": 9454,
-    "url": "http://www.fanpop.com/fans/{}",
-    "urlMain": "http://www.fanpop.com/",
-    "username_claimed": "blue",
-    "username_unclaimed": "noonewould_everusethis7"
-  },
-```
-
 ## Canva

 As of 2020-02-23, all usernames are reported as not existing.
@ -1273,19 +1257,6 @@ As of 2022-05-1, FanCentro returns false positives. Will later in new version of
  },
 ```

-## Codeforces
-As og 2022-05-01, Codeforces returns false positives
-```json
-  "Codeforces": {
-    "errorType": "response_url",
-    "errorUrl": "https://codeforces.com/",
-    "url": "https://codeforces.com/profile/{}",
-    "urlMain": "https://www.codeforces.com/",
-    "username_claimed": "tourist",
-    "username_unclaimed": "noonewouldeverusethis789"
-  },
-```
-
 ## Smashcast
 As og 2022-05-01, Smashcast is down
 ```json
@ -1919,3 +1890,108 @@ __2024-06-10 :__ Http request returns 403 forbidden, and tries to verify the con
    "username_claimed": "JennyKrafts"
  }
 ```
+
+## Alik.cz
+__2024-07-21 :__ Target is now BLACKLISTED from the default manifest due to the site recieving unnecessarily high traffic from Sherlock (by request of the site owners). This target is not permitted to be reactivited. Inclusion in unrelated manifests is not impacted, but it is discouraged.
+
+## 8tracks
+__2025-02-02 :__ Might be dead again. Nobody knows for sure.
+```json
+"8tracks": {
+    "errorType": "message",
+    "errorMsg": "\"available\":true",
+    "headers": {
+      "Accept-Language": "en-US,en;q=0.5"
+    },
+    "url": "https://8tracks.com/{}",
+    "urlProbe": "https://8tracks.com/users/check_username?login={}&format=jsonh",
+    "urlMain": "https://8tracks.com/",
+    "username_claimed": "blue"
+  }
+```
+
+## Shpock
+__2025-02-02 :__ Can likely be added back with a new endpoint (source username availability endpoint from mobile app reg flow?)
+```json
+"Shpock": {
+    "errorType": "status_code",
+    "url": "https://www.shpock.com/shop/{}/items",
+    "urlMain": "https://www.shpock.com/",
+    "username_claimed": "user"
+  }
+```
+
+## Twitch
+__2025-02-02 :__
+```json
+"Twitch": {
+    "errorType": "message",
+    "errorMsg": "components.availability-tracking.warn-unavailable.component",
+    "url": "https://www.twitch.tv/{}",
+    "urlMain": "https://www.twitch.tv/",
+    "urlProbe": "https://m.twitch.tv/{}",
+    "username_claimed": "jenny"
+  }
+```
+
+## Fiverr
+__2025-02-02 :__ Fiverr added CSRF protections that messed with this test
+```json
+"Fiverr": {
+    "errorMsg": "\"status\":\"success\"",
+    "errorType": "message",
+    "headers": {
+      "Content-Type": "application/json",
+      "Accept-Language": "en-US,en;q=0.9"
+    },
+    "regexCheck": "^[A-Za-z][A-Za-z\\d_]{5,14}$",
+    "request_method": "POST",
+    "request_payload": {
+      "username": "{}"
+    },
+    "url": "https://www.fiverr.com/{}",
+    "urlMain": "https://www.fiverr.com/",
+    "urlProbe": "https://www.fiverr.com/validate_username",
+    "username_claimed": "blueman"
+  }
+```
+
+## BabyRU
+__2025-02-02 :__ Just being problematic (possibly related to errorMsg encoding?)
+```json
+"babyRU": {
+    "errorMsg": [
+      "\u0421\u0442\u0440\u0430\u043d\u0438\u0446\u0430, \u043a\u043e\u0442\u043e\u0440\u0443\u044e \u0432\u044b \u0438\u0441\u043a\u0430\u043b\u0438, \u043d\u0435 \u043d\u0430\u0439\u0434\u0435\u043d\u0430",
+      "Доступ с вашего IP-адреса временно ограничен"
+    ],
+    "errorType": "message",
+    "url": "https://www.baby.ru/u/{}/",
+    "urlMain": "https://www.baby.ru/",
+    "username_claimed": "blue"
+  }
+```
+
+## v0.dev
+__2025-02-16 :__ Unsure if any way to view profiles exists now
+```json
+"v0.dev": {
+    "errorType": "message",
+    "errorMsg": "<title>v0 by Vercel</title>",
+    "url": "https://v0.dev/{}",
+    "urlMain": "https://v0.dev",
+    "username_claimed": "t3dotgg"
+  }
+```
+
+## TorrentGalaxy
+__2025-07-06 :__ Site appears to have gone offline in March and hasn't come back
+```json
+  "TorrentGalaxy": {
+    "errorMsg": "<title>TGx:Can't show details</title>",
+    "errorType": "message",
+    "regexCheck": "^[A-Za-z0-9]{3,15}$",
+    "url": "https://torrentgalaxy.to/profile/{}",
+    "urlMain": "https://torrentgalaxy.to/",
+    "username_claimed": "GalaxyRG"
+  },
+```
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,8 +8,7 @@ source = "init"

 [tool.poetry]
 name = "sherlock-project"
-# single source of truth for version is __init__.py
-version = "0"
+version = "0.16.0"
 description = "Hunt down social media accounts by username across social networks"
 license = "MIT"
 authors = [
@ -30,6 +29,10 @@ classifiers = [
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
    "Topic :: Security"
 ]
 homepage = "https://sherlockproject.xyz/"
@ -40,23 +43,26 @@ repository = "https://github.com/sherlock-project/sherlock"
 "Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues"

 [tool.poetry.dependencies]
-python = "^3.8"
+python = "^3.9"
 certifi = ">=2019.6.16"
 colorama = "^0.4.1"
 PySocks = "^1.7.0"
 requests = "^2.22.0"
 requests-futures = "^1.0.0"
 stem = "^1.8.0"
-torrequest = "^0.1.0"
-# pandas can likely be bumped up to ^2.0.0 after fc39 EOL
-pandas = ">=1.0.0,<3.0.0"
+pandas = "^2.2.1"
 openpyxl = "^3.0.10"
-
-[tool.poetry.extras]
-tor = ["torrequest"]
+tomli = "^2.2.1"

 [tool.poetry.group.dev.dependencies]
 jsonschema = "^4.0.0"
+rstr = "^3.2.2"
+pytest = "^8.4.2"
+pytest-xdist = "^3.8.0"
+
+
+[tool.poetry.group.ci.dependencies]
+defusedxml = "^0.7.1"

 [tool.poetry.scripts]
 sherlock = 'sherlock_project.sherlock:main'
--- a/pytest.ini
+++ b/pytest.ini
@ -1,4 +1,7 @@
 [pytest]
-addopts = --strict-markers
+addopts = --strict-markers -m "not validate_targets"
 markers =
    online: mark tests are requiring internet access.
+    validate_targets: mark tests for sweeping manifest validation (sends many requests).
+    validate_targets_fp: validate_targets, false positive tests only.
+    validate_targets_fn: validate_targets, false negative tests only.
--- a/sherlock/resources/data.json
+++ b/sherlock/resources/data.json
--- a/sherlock/resources/data.schema.json
+++ b/sherlock/resources/data.schema.json
@ -1,80 +0,0 @@
-{
-    "$schema": "https://json-schema.org/draft/2020-12/schema",
-    "title": "Sherlock Target Manifest",
-    "description": "Social media targets to probe for the existence of known usernames",
-    "type": "object",
-    "properties": {
-        "$schema": { "type": "string" }
-    },
-    "patternProperties": {
-        "^(?!\\$).*?$": {
-            "type": "object",
-            "description": "Target name and associated information (key should be human readable name)",
-            "required": [ "url", "urlMain", "errorType", "username_claimed" ],
-            "properties": {
-                "url": { "type": "string" },
-                "urlMain": { "type": "string" },
-                "urlProbe": { "type": "string" },
-                "username_claimed": { "type": "string" },
-                "regexCheck": { "type": "string" },
-                "isNSFW": { "type": "boolean" },
-                "headers": { "type": "object" },
-                "request_payload": { "type": "object" },
-                "__comment__": {
-                    "type": "string",
-                    "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
-                },
-                "tags": {
-                    "oneOf": [
-                        { "$ref": "#/$defs/tag" },
-                        { "type": "array", "items": { "$ref": "#/$defs/tag" } }
-                    ]
-                },
-                "request_method": {
-                    "type": "string",
-                    "enum": [ "GET", "POST", "HEAD", "PUT" ]
-                },
-                "errorType": {
-                    "type": "string",
-                    "enum": [ "message", "response_url", "status_code" ]
-                },
-                "errorMsg": {
-                    "oneOf": [
-                        { "type": "string" },
-                        { "type": "array", "items": { "type": "string" } }
-                    ]
-                },
-                "errorCode": {
-                    "oneOf": [
-                        { "type": "integer" },
-                        { "type": "array", "items": { "type": "integer" } }
-                    ]
-                },
-                "errorUrl": { "type": "string" },
-                "response_url": { "type": "string" }
-            },
-            "dependencies": {
-                "errorMsg": {
-                    "properties" : { "errorType": { "const": "message" } }
-                },
-                "errorUrl": {
-                    "properties": { "errorType": { "const": "response_url" } }
-                },
-                "errorCode": {
-                    "properties": { "errorType": { "const": "status_code" } }
-                }
-            },
-            "if": { "properties": { "errorType": { "const": "message" } } },
-            "then": { "required": [ "errorMsg" ] },
-            "else": {
-                "if": { "properties": { "errorType": { "const": "response_url" } } },
-                "then": { "required": [ "errorUrl" ] }
-            },
-            "additionalProperties": false
-        }
-    },
-    "additionalProperties": false,
-    "$defs": {
-        "tag": { "type": "string", "enum": [ "adult", "gaming" ] }
-    }
-}
--- a/sherlock_project/init.py
+++ b/sherlock_project/init.py
@ -5,11 +5,26 @@ networks.

 """

+from importlib.metadata import version as pkg_version, PackageNotFoundError
+import pathlib
+import tomli
+
+
+def get_version() -> str:
+    """Fetch the version number of the installed package."""
+    try:
+        return pkg_version("sherlock_project")
+    except PackageNotFoundError:
+        pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
+        with pyproject_path.open("rb") as f:
+            pyproject_data = tomli.load(f)
+        return pyproject_data["tool"]["poetry"]["version"]
+
 # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
 import_error_test_var = None

 __shortname__   = "Sherlock"
 __longname__    = "Sherlock: Find Usernames Across Social Networks"
-__version__     = "0.15.0"
+__version__     = get_version()

 forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"
--- a/sherlock_project/main.py
+++ b/sherlock_project/main.py
@ -14,8 +14,8 @@ if __name__ == "__main__":
    # Check if the user is using the correct version of Python
    python_version = sys.version.split()[0]

-    if sys.version_info < (3, 8):
-        print(f"Sherlock requires Python 3.8+\nYou are using Python {python_version}, which is not supported by Sherlock.")
+    if sys.version_info < (3, 9):
+        print(f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock.")
        sys.exit(1)

    from sherlock_project import sherlock
--- a/sherlock_project/py.typed
+++ b/sherlock_project/py.typed
--- a/sherlock_project/resources/data.json
+++ b/sherlock_project/resources/data.json
--- a/sherlock_project/resources/data.schema.json
+++ b/sherlock_project/resources/data.schema.json
@ -1,80 +1,149 @@
 {
-    "$schema": "https://json-schema.org/draft/2020-12/schema",
-    "title": "Sherlock Target Manifest",
-    "description": "Social media targets to probe for the existence of known usernames",
-    "type": "object",
-    "properties": {
-        "$schema": { "type": "string" }
-    },
-    "patternProperties": {
-        "^(?!\\$).*?$": {
-            "type": "object",
-            "description": "Target name and associated information (key should be human readable name)",
-            "required": [ "url", "urlMain", "errorType", "username_claimed" ],
-            "properties": {
-                "url": { "type": "string" },
-                "urlMain": { "type": "string" },
-                "urlProbe": { "type": "string" },
-                "username_claimed": { "type": "string" },
-                "regexCheck": { "type": "string" },
-                "isNSFW": { "type": "boolean" },
-                "headers": { "type": "object" },
-                "request_payload": { "type": "object" },
-                "__comment__": {
-                    "type": "string",
-                    "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
-                },
-                "tags": {
-                    "oneOf": [
-                        { "$ref": "#/$defs/tag" },
-                        { "type": "array", "items": { "$ref": "#/$defs/tag" } }
-                    ]
-                },
-                "request_method": {
-                    "type": "string",
-                    "enum": [ "GET", "POST", "HEAD", "PUT" ]
-                },
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "title": "Sherlock Target Manifest",
+  "description": "Social media targets to probe for the existence of known usernames",
+  "type": "object",
+  "properties": {
+    "$schema": { "type": "string" }
+  },
+  "patternProperties": {
+    "^(?!\\$).*?$": {
+      "type": "object",
+      "description": "Target name and associated information (key should be human readable name)",
+      "required": ["url", "urlMain", "errorType", "username_claimed"],
+      "properties": {
+        "url": { "type": "string" },
+        "urlMain": { "type": "string" },
+        "urlProbe": { "type": "string" },
+        "username_claimed": { "type": "string" },
+        "regexCheck": { "type": "string" },
+        "isNSFW": { "type": "boolean" },
+        "headers": { "type": "object" },
+        "request_payload": { "type": "object" },
+        "__comment__": {
+          "type": "string",
+          "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
+        },
+        "tags": {
+          "oneOf": [
+            { "$ref": "#/$defs/tag" },
+            { "type": "array", "items": { "$ref": "#/$defs/tag" } }
+          ]
+        },
+        "request_method": {
+          "type": "string",
+          "enum": ["GET", "POST", "HEAD", "PUT"]
+        },
+        "errorType": {
+          "oneOf": [
+            {
+              "type": "string",
+              "enum": ["message", "response_url", "status_code"]
+            },
+            {
+              "type": "array",
+              "items": {
+                "type": "string",
+                "enum": ["message", "response_url", "status_code"]
+              }
+            }
+          ]
+        },
+        "errorMsg": {
+          "oneOf": [
+            { "type": "string" },
+            { "type": "array", "items": { "type": "string" } }
+          ]
+        },
+        "errorCode": {
+          "oneOf": [
+            { "type": "integer" },
+            { "type": "array", "items": { "type": "integer" } }
+          ]
+        },
+        "errorUrl": { "type": "string" },
+        "response_url": { "type": "string" }
+      },
+      "dependencies": {
+        "errorMsg": {
+          "oneOf": [
+            { "properties": { "errorType": { "const": "message" } } },
+            {
+              "properties": {
                "errorType": {
-                    "type": "string",
-                    "enum": [ "message", "response_url", "status_code" ]
-                },
-                "errorMsg": {
-                    "oneOf": [
-                        { "type": "string" },
-                        { "type": "array", "items": { "type": "string" } }
-                    ]
-                },
-                "errorCode": {
-                    "oneOf": [
-                        { "type": "integer" },
-                        { "type": "array", "items": { "type": "integer" } }
-                    ]
-                },
-                "errorUrl": { "type": "string" },
-                "response_url": { "type": "string" }
-            },
-            "dependencies": {
-                "errorMsg": {
-                    "properties" : { "errorType": { "const": "message" } }
-                },
-                "errorUrl": {
-                    "properties": { "errorType": { "const": "response_url" } }
-                },
-                "errorCode": {
-                    "properties": { "errorType": { "const": "status_code" } }
+                  "type": "array",
+                  "contains": { "const": "message" }
                }
-            },
-            "if": { "properties": { "errorType": { "const": "message" } } },
-            "then": { "required": [ "errorMsg" ] },
-            "else": {
-                "if": { "properties": { "errorType": { "const": "response_url" } } },
-                "then": { "required": [ "errorUrl" ] }
-            },
-            "additionalProperties": false
+              }
+            }
+          ]
+        },
+        "errorUrl": {
+          "oneOf": [
+            { "properties": { "errorType": { "const": "response_url" } } },
+            {
+              "properties": {
+                "errorType": {
+                  "type": "array",
+                  "contains": { "const": "response_url" }
+                }
+              }
+            }
+          ]
+        },
+        "errorCode": {
+          "oneOf": [
+            { "properties": { "errorType": { "const": "status_code" } } },
+            {
+              "properties": {
+                "errorType": {
+                  "type": "array",
+                  "contains": { "const": "status_code" }
+                }
+              }
+            }
+          ]
        }
-    },
-    "additionalProperties": false,
-    "$defs": {
-        "tag": { "type": "string", "enum": [ "adult", "gaming" ] }
+      },
+      "allOf": [
+        {
+          "if": {
+            "anyOf": [
+              { "properties": { "errorType": { "const": "message" } } },
+              {
+                "properties": {
+                  "errorType": {
+                    "type": "array",
+                    "contains": { "const": "message" }
+                  }
+                }
+              }
+            ]
+          },
+          "then": { "required": ["errorMsg"] }
+        },
+        {
+          "if": {
+            "anyOf": [
+              { "properties": { "errorType": { "const": "response_url" } } },
+              {
+                "properties": {
+                  "errorType": {
+                    "type": "array",
+                    "contains": { "const": "response_url" }
+                  }
+                }
+              }
+            ]
+          },
+          "then": { "required": ["errorUrl"] }
+        }
+      ],
+      "additionalProperties": false
    }
+  },
+  "additionalProperties": false,
+  "$defs": {
+    "tag": { "type": "string", "enum": ["adult", "gaming"] }
+  }
 }
--- a/sherlock_project/sherlock.py
+++ b/sherlock_project/sherlock.py
@ -24,6 +24,7 @@ import re
 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 from json import loads as json_loads
 from time import monotonic
+from typing import Optional

 import requests
 from requests_futures.sessions import FuturesSession
@ -167,15 +168,13 @@ def multiple_usernames(username):


 def sherlock(
-    username,
-    site_data,
+    username: str,
+    site_data: dict[str, dict[str, str]],
    query_notify: QueryNotify,
-    tor: bool = False,
-    unique_tor: bool = False,
    dump_response: bool = False,
-    proxy=None,
-    timeout=60,
-):
+    proxy: Optional[str] = None,
+    timeout: int = 60,
+) -> dict[str, dict[str, str | QueryResult]]:
    """Run Sherlock Analysis.

    Checks for existence of username on various social media sites.
@ -187,8 +186,6 @@ def sherlock(
    query_notify           -- Object with base type of QueryNotify().
                              This will be used to notify the caller about
                              query results.
-    tor                    -- Boolean indicating whether to use a tor circuit for the requests.
-    unique_tor             -- Boolean indicating whether to use a new tor circuit for each request.
    proxy                  -- String indicating the proxy URL
    timeout                -- Time in seconds to wait before timing out request.
                              Default is 60 seconds.
@ -209,32 +206,9 @@ def sherlock(

    # Notify caller that we are starting the query.
    query_notify.start(username)
-    # Create session based on request methodology
-    if tor or unique_tor:
-        try:
-            from torrequest import TorRequest  # noqa: E402
-        except ImportError:
-            print("Important!")
-            print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
-            print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.")
-            print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n")
-            sys.exit(query_notify.finish())

-        print("Important!")
-        print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
-
-        # Requests using Tor obfuscation
-        try:
-            underlying_request = TorRequest()
-        except OSError:
-            print("Tor not found in system path. Unable to continue.\n")
-            sys.exit(query_notify.finish())
-
-        underlying_session = underlying_request.session
-    else:
-        # Normal requests
-        underlying_session = requests.session()
-        underlying_request = requests.Request()
+    # Normal requests
+    underlying_session = requests.session()

    # Limit number of workers to 20.
    # This is probably vastly overkill.
@ -261,7 +235,7 @@ def sherlock(
        # A user agent is needed because some sites don't return the correct
        # information since they think that we are bots (Which we actually are...)
        headers = {
-            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/116.0",
+            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0",
        }

        if "headers" in net_info:
@ -358,15 +332,10 @@ def sherlock(
            # Store future in data for access later
            net_info["request_future"] = future

-            # Reset identify for tor (if needed)
-            if unique_tor:
-                underlying_request.reset_identity()
-
        # Add this site's results into final dictionary with all the other results.
        results_total[social_network] = results_site

    # Open the file containing account links
-    # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
    for social_network, net_info in site_data.items():
        # Retrieve results again
        results_site = results_total.get(social_network)
@ -380,6 +349,8 @@ def sherlock(

        # Get the expected error type
        error_type = net_info["errorType"]
+        if isinstance(error_type, str):
+            error_type: list[str] = [error_type]

        # Retrieve future and ensure it has finished
        future = net_info["request_future"]
@ -412,8 +383,10 @@ def sherlock(
        # be highly targetted. Comment at the end of each fingerprint to
        # indicate target and date fingerprinted.
        WAFHitMsgs = [
-            '.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare
-            '{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security
+            r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare
+            r'<span id="challenge-error-text">', # 2024-11-11 Cloudflare error page
+            r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS)
+            r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security
        ]

        if error_text is not None:
@ -422,59 +395,61 @@ def sherlock(
        elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
            query_status = QueryStatus.WAF

-        elif error_type == "message":
-            # error_flag True denotes no error found in the HTML
-            # error_flag False denotes error found in the HTML
-            error_flag = True
-            errors = net_info.get("errorMsg")
-            # errors will hold the error message
-            # it can be string or list
-            # by isinstance method we can detect that
-            # and handle the case for strings as normal procedure
-            # and if its list we can iterate the errors
-            if isinstance(errors, str):
-                # Checks if the error message is in the HTML
-                # if error is present we will set flag to False
-                if errors in r.text:
-                    error_flag = False
-            else:
-                # If it's list, it will iterate all the error message
-                for error in errors:
-                    if error in r.text:
-                        error_flag = False
-                        break
-            if error_flag:
-                query_status = QueryStatus.CLAIMED
-            else:
-                query_status = QueryStatus.AVAILABLE
-        elif error_type == "status_code":
-            error_codes = net_info.get("errorCode")
-            query_status = QueryStatus.CLAIMED
-
-            # Type consistency, allowing for both singlets and lists in manifest
-            if isinstance(error_codes, int):
-                error_codes = [error_codes]
-
-            if error_codes is not None and r.status_code in error_codes:
-                query_status = QueryStatus.AVAILABLE
-            elif r.status_code >= 300 or r.status_code < 200:
-                query_status = QueryStatus.AVAILABLE
-        elif error_type == "response_url":
-            # For this detection method, we have turned off the redirect.
-            # So, there is no need to check the response URL: it will always
-            # match the request.  Instead, we will ensure that the response
-            # code indicates that the request was successful (i.e. no 404, or
-            # forward to some odd redirect).
-            if 200 <= r.status_code < 300:
-                query_status = QueryStatus.CLAIMED
-            else:
-                query_status = QueryStatus.AVAILABLE
        else:
-            # It should be impossible to ever get here...
-            raise ValueError(
-                f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
-            )
-        
+            if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
+                error_context = f"Unknown error type '{error_type}' for {social_network}"
+                query_status = QueryStatus.UNKNOWN
+            else:
+                if "message" in error_type:
+                    # error_flag True denotes no error found in the HTML
+                    # error_flag False denotes error found in the HTML
+                    error_flag = True
+                    errors = net_info.get("errorMsg")
+                    # errors will hold the error message
+                    # it can be string or list
+                    # by isinstance method we can detect that
+                    # and handle the case for strings as normal procedure
+                    # and if its list we can iterate the errors
+                    if isinstance(errors, str):
+                        # Checks if the error message is in the HTML
+                        # if error is present we will set flag to False
+                        if errors in r.text:
+                            error_flag = False
+                    else:
+                        # If it's list, it will iterate all the error message
+                        for error in errors:
+                            if error in r.text:
+                                error_flag = False
+                                break
+                    if error_flag:
+                        query_status = QueryStatus.CLAIMED
+                    else:
+                        query_status = QueryStatus.AVAILABLE
+
+                if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
+                    error_codes = net_info.get("errorCode")
+                    query_status = QueryStatus.CLAIMED
+
+                    # Type consistency, allowing for both singlets and lists in manifest
+                    if isinstance(error_codes, int):
+                        error_codes = [error_codes]
+
+                    if error_codes is not None and r.status_code in error_codes:
+                        query_status = QueryStatus.AVAILABLE
+                    elif r.status_code >= 300 or r.status_code < 200:
+                        query_status = QueryStatus.AVAILABLE
+
+                if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
+                    # For this detection method, we have turned off the redirect.
+                    # So, there is no need to check the response URL: it will always
+                    # match the request.  Instead, we will ensure that the response
+                    # code indicates that the request was successful (i.e. no 404, or
+                    # forward to some odd redirect).
+                    if 200 <= r.status_code < 300:
+                        query_status = QueryStatus.CLAIMED
+                    else:
+                        query_status = QueryStatus.AVAILABLE
+
        if dump_response:
            print("+++++++++++++++++++++")
            print(f"TARGET NAME   : {social_network}")
@ -504,7 +479,7 @@ def sherlock(
            print("+++++++++++++++++++++")

        # Notify caller about results of query.
-        result = QueryResult(
+        result: QueryResult = QueryResult(
            username=username,
            site_name=social_network,
            site_url_user=url,
@ -593,22 +568,6 @@ def main():
        dest="output",
        help="If using single username, the output of the result will be saved to this file.",
    )
-    parser.add_argument(
-        "--tor",
-        "-t",
-        action="store_true",
-        dest="tor",
-        default=False,
-        help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
-    )
-    parser.add_argument(
-        "--unique-tor",
-        "-u",
-        action="store_true",
-        dest="unique_tor",
-        default=False,
-        help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
-    )
    parser.add_argument(
        "--csv",
        action="store_true",
@ -653,7 +612,7 @@ def main():
        metavar="JSON_FILE",
        dest="json_file",
        default=None,
-        help="Load data from a JSON file or an online, valid, JSON file.",
+        help="Load data from a JSON file or an online, valid, JSON file. Upstream PR numbers also accepted.",
    )
    parser.add_argument(
        "--timeout",
@ -716,6 +675,32 @@ def main():
        help="Include checking of NSFW sites from default list.",
    )

+    # TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed
+    # in future release
+    parser.add_argument(
+        "--no-txt",
+        action="store_true",
+        dest="no_txt",
+        default=False,
+        help="Disable creation of a txt file - WILL BE DEPRECATED",
+    )
+
+    parser.add_argument(
+        "--txt",
+        action="store_true",
+        dest="output_txt",
+        default=False,
+        help="Enable creation of a txt file",
+    )
+
+    parser.add_argument(
+        "--ignore-exclusions",
+        action="store_true",
+        dest="ignore_exclusions",
+        default=False,
+        help="Ignore upstream exclusions (may return more false positives)",
+    )
+
    args = parser.parse_args()

    # If the user presses CTRL-C, exit gracefully without throwing errors
@ -723,7 +708,7 @@ def main():

    # Check for newer version of Sherlock. If it exists, let the user know about it
    try:
-        latest_release_raw = requests.get(forge_api_latest_release).text
+        latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
        latest_release_json = json_loads(latest_release_raw)
        latest_remote_tag = latest_release_json["tag_name"]

@ -736,22 +721,10 @@ def main():
    except Exception as error:
        print(f"A problem occurred while checking for an update: {error}")

-    # Argument check
-    # TODO regex check on args.proxy
-    if args.tor and (args.proxy is not None):
-        raise Exception("Tor and Proxy cannot be set at the same time.")
-
    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)

-    if args.tor or args.unique_tor:
-        print("Using Tor to make requests")
-
-        print(
-            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
-        )
-
    if args.no_color:
        # Disable color output.
        init(strip=True, convert=False)
@ -773,10 +746,32 @@ def main():
    try:
        if args.local:
            sites = SitesInformation(
-                os.path.join(os.path.dirname(__file__), "resources/data.json")
+                os.path.join(os.path.dirname(__file__), "resources/data.json"),
+                honor_exclusions=False,
            )
        else:
-            sites = SitesInformation(args.json_file)
+            json_file_location = args.json_file
+            if args.json_file:
+                # If --json parameter is a number, interpret it as a pull request number
+                if args.json_file.isnumeric():
+                    pull_number = args.json_file
+                    pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
+                    pull_request_raw = requests.get(pull_url, timeout=10).text
+                    pull_request_json = json_loads(pull_request_raw)
+
+                    # Check if it's a valid pull request
+                    if "message" in pull_request_json:
+                        print(f"ERROR: Pull request #{pull_number} not found.")
+                        sys.exit(1)
+
+                    head_commit_sha = pull_request_json["head"]["sha"]
+                    json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
+
+            sites = SitesInformation(
+                data_file_path=json_file_location,
+                honor_exclusions=not args.ignore_exclusions,
+                do_not_exclude=args.site_list,
+            )
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)
@ -830,8 +825,6 @@ def main():
            username,
            site_data,
            query_notify,
-            tor=args.tor,
-            unique_tor=args.unique_tor,
            dump_response=args.dump_response,
            proxy=args.proxy,
            timeout=args.timeout,
@ -847,14 +840,15 @@ def main():
        else:
            result_file = f"{username}.txt"

-        with open(result_file, "w", encoding="utf-8") as file:
-            exists_counter = 0
-            for website_name in results:
-                dictionary = results[website_name]
-                if dictionary.get("status").status == QueryStatus.CLAIMED:
-                    exists_counter += 1
-                    file.write(dictionary["url_user"] + "\n")
-            file.write(f"Total Websites Username Detected On : {exists_counter}\n")
+        if args.output_txt:
+            with open(result_file, "w", encoding="utf-8") as file:
+                exists_counter = 0
+                for website_name in results:
+                    dictionary = results[website_name]
+                    if dictionary.get("status").status == QueryStatus.CLAIMED:
+                        exists_counter += 1
+                        file.write(dictionary["url_user"] + "\n")
+                file.write(f"Total Websites Username Detected On : {exists_counter}\n")

        if args.csv:
            result_file = f"{username}.csv"
@ -931,8 +925,8 @@ def main():
                {
                    "username": usernames,
                    "name": names,
-                    "url_main": url_main,
-                    "url_user": url_user,
+                    "url_main": [f'=HYPERLINK(\"{u}\")' for u in url_main],
+                    "url_user": [f'=HYPERLINK(\"{u}\")' for u in url_user],
                    "exists": exists,
                    "http_status": http_status,
                    "response_time_s": response_time_s,
--- a/sherlock_project/sites.py
+++ b/sherlock_project/sites.py
@ -7,6 +7,10 @@ import json
 import requests
 import secrets

+
+MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
+EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
+
 class SiteInformation:
    def __init__(self, name, url_home, url_username_format, username_claimed,
                information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
@ -67,12 +71,17 @@ class SiteInformation:
        Return Value:
        Nicely formatted string to get information about this object.
        """
-        
+
        return f"{self.name} ({self.url_home})"


 class SitesInformation:
-    def __init__(self, data_file_path=None):
+    def __init__(
+            self,
+            data_file_path: str|None = None,
+            honor_exclusions: bool = True,
+            do_not_exclude: list[str] = [],
+        ):
        """Create Sites Information Object.

        Contains information about all supported websites.
@ -110,7 +119,7 @@ class SitesInformation:
            # The default data file is the live data.json which is in the GitHub repo. The reason why we are using
            # this instead of the local one is so that the user has the most up-to-date data. This prevents
            # users from creating issue about false positives which has already been fixed or having outdated data
-            data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
+            data_file_path = MANIFEST_URL

        # Ensure that specified data file has correct extension.
        if not data_file_path.lower().endswith(".json"):
@ -120,7 +129,7 @@ class SitesInformation:
        if data_file_path.lower().startswith("http"):
            # Reference is to a URL.
            try:
-                response = requests.get(url=data_file_path)
+                response = requests.get(url=data_file_path, timeout=30)
            except Exception as error:
                raise FileNotFoundError(
                    f"Problem while attempting to access data file URL '{data_file_path}':  {error}"
@ -152,9 +161,31 @@ class SitesInformation:
                raise FileNotFoundError(f"Problem while attempting to access "
                                        f"data file '{data_file_path}'."
                                        )
-        
+
        site_data.pop('$schema', None)

+        if honor_exclusions:
+            try:
+                response = requests.get(url=EXCLUSIONS_URL, timeout=10)
+                if response.status_code == 200:
+                    exclusions = response.text.splitlines()
+                    exclusions = [exclusion.strip() for exclusion in exclusions]
+
+                    for site in do_not_exclude:
+                        if site in exclusions:
+                            exclusions.remove(site)
+
+                    for exclusion in exclusions:
+                        try:
+                            site_data.pop(exclusion, None)
+                        except KeyError:
+                            pass
+
+            except Exception:
+                # If there was any problem loading the exclusions, just continue without them
+                print("Warning: Could not load exclusions, continuing without them.")
+                honor_exclusions = False
+
        self.sites = {}

        # Add all site information from the json file to internal site list.
@ -194,7 +225,7 @@ class SitesInformation:
        for site in self.sites:
            if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
                continue
-            sites[site] = self.sites[site]  
+            sites[site] = self.sites[site]
        self.sites =  sites

    def site_name_list(self):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -4,6 +4,11 @@ import urllib
 import pytest
 from sherlock_project.sites import SitesInformation

+def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
+    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
+    sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
+    return sites_iterable
+
@pytest.fixture()
 def sites_obj():
    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
@ -11,9 +16,7 @@ def sites_obj():

@pytest.fixture(scope="session")
 def sites_info():
-    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
-    sites_iterable = {site.name: site.information for site in sites_obj}
-    yield sites_iterable
+    yield fetch_local_manifest()

@pytest.fixture(scope="session")
 def remote_schema():
@ -21,3 +24,28 @@ def remote_schema():
    with urllib.request.urlopen(schema_url) as remoteschema:
        schemadat = json.load(remoteschema)
    yield schemadat
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--chunked-sites",
+        action="store",
+        default=None,
+        help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
+    )
+
+def pytest_generate_tests(metafunc):
+    if "chunked_sites" in metafunc.fixturenames:
+        sites_info = fetch_local_manifest(honor_exclusions=False)
+
+        # Ingest and apply site selections
+        site_filter: str | None = metafunc.config.getoption("--chunked-sites")
+        if site_filter:
+            selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
+            sites_info = {
+                site: data for site, data in sites_info.items()
+                if site in selected_sites
+            }
+
+        params = [{name: data} for name, data in sites_info.items()]
+        ids = list(sites_info.keys())
+        metafunc.parametrize("chunked_sites", params, ids=ids)
--- a/tests/sherlock_interactives.py
+++ b/tests/sherlock_interactives.py
@ -7,8 +7,8 @@ class Interactives:
    def run_cli(args:str = "") -> str:
        """Pass arguments to Sherlock as a normal user on the command line"""
        # Adapt for platform differences (Windows likes to be special)
-        if platform.system == "Windows":
-            command:str = f"py -m sherlock {args}"
+        if platform.system() == "Windows":
+            command:str = f"py -m sherlock_project {args}"
        else:
            command:str = f"sherlock {args}"

@ -20,8 +20,7 @@ class Interactives:
            raise InteractivesSubprocessError(e.output.decode())


-    # -> list[str] is prefered, but will require deprecation of support for Python 3.8
-    def walk_sherlock_for_files_with(pattern: str) -> list:
+    def walk_sherlock_for_files_with(pattern: str) -> list[str]:
        """Check all files within the Sherlock package for matching patterns"""
        pattern:re.Pattern = re.compile(pattern)
        matching_files:list[str] = []
--- a/tests/test_manifest.py
+++ b/tests/test_manifest.py
@ -7,7 +7,7 @@ def test_validate_manifest_against_local_schema():
    """Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
    json_relative: str = '../sherlock_project/resources/data.json'
    schema_relative: str = '../sherlock_project/resources/data.schema.json'
-    
+
    json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
    schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)

--- a/tests/test_probes.py
+++ b/tests/test_probes.py
@ -44,7 +44,7 @@ class TestLiveTargets:

    # Known positives should only use sites trusted to be reliable and unchanging
    @pytest.mark.parametrize('site,username',[
-        ('BodyBuilding', 'blue'),
+        ('Keybase', 'blue'),
        ('devRant', 'blue'),
    ])
    def test_known_positives_via_response_url(self, sites_info, site, username):
--- a/tests/test_validate_targets.py
+++ b/tests/test_validate_targets.py
@ -0,0 +1,100 @@
+import pytest
+import re
+import rstr
+
+from sherlock_project.sherlock import sherlock
+from sherlock_project.notify import QueryNotify
+from sherlock_project.result import QueryResult, QueryStatus
+
+
+FALSE_POSITIVE_ATTEMPTS: int = 2    # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
+FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15  # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
+FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$'  # Used in absence of a regexCheck entry
+
+
+def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
+    """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
+    def replace_upper_bound(match: re.Match) -> str: # type: ignore
+        lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
+        nonlocal upper_bound
+        upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore  # noqa: F823
+        return f'{{{lower_bound},{upper_bound}}}'
+
+    pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
+    pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
+    pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
+
+    return pattern
+
+def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
+    """Check if a site is likely to produce false positives."""
+    status: QueryStatus = QueryStatus.UNKNOWN
+
+    for _ in range(FALSE_POSITIVE_ATTEMPTS):
+        query_notify: QueryNotify = QueryNotify()
+        username: str = rstr.xeger(pattern)
+
+        result: QueryResult | str = sherlock(
+            username=username,
+            site_data=sites_info,
+            query_notify=query_notify,
+        )[site]['status']
+
+        if not hasattr(result, 'status'):
+            raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
+        if type(result.status) is not QueryStatus: # type: ignore
+            raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
+        status = result.status # type: ignore
+
+        if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
+            return status
+
+    return status
+
+
+def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
+    """Check if a site is likely to produce false negatives."""
+    status: QueryStatus = QueryStatus.UNKNOWN
+    query_notify: QueryNotify = QueryNotify()
+
+    result: QueryResult | str = sherlock(
+        username=sites_info[site]['username_claimed'],
+        site_data=sites_info,
+        query_notify=query_notify,
+    )[site]['status']
+
+    if not hasattr(result, 'status'):
+            raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
+    if type(result.status) is not QueryStatus: # type: ignore
+        raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
+    status = result.status # type: ignore
+
+    return status
+
+@pytest.mark.validate_targets
+@pytest.mark.online
+class Test_All_Targets:
+
+    @pytest.mark.validate_targets_fp
+    def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
+        """Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
+        pattern: str
+        for site in chunked_sites:
+            try:
+                pattern = chunked_sites[site]['regexCheck']
+            except KeyError:
+                pattern = FALSE_POSITIVE_DEFAULT_PATTERN
+
+            if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
+                pattern = set_pattern_upper_bound(pattern)
+
+            result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
+            assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
+
+    @pytest.mark.validate_targets_fn
+    def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
+        """Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
+        for site in chunked_sites:
+            result: QueryStatus = false_negative_check(chunked_sites, site)
+            assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"
+
--- a/tox.ini
+++ b/tox.ini
@ -7,8 +7,6 @@ envlist =
    py312
    py311
    py310
-    py39
-    py38

 [testenv]
 description = Attempt to build and install the package
@ -16,6 +14,7 @@ deps =
    coverage
    jsonschema
    pytest
+    rstr
 allowlist_externals = coverage
 commands =
    coverage run --source=sherlock_project --module pytest -v
@ -37,8 +36,7 @@ commands =

 [gh-actions]
 python =
+    3.13: py313
    3.12: py312
    3.11: py311
    3.10: py310
-    3.9: py39
-    3.8: py38