chore: add error message to the codesandbox

2025-10-05 15:22:37 +05:30 · 2025-10-05 15:22:37 +05:30 · 738df6c362
parent 4706323976 83a38db110
commit 738df6c362
19 changed files with 941 additions and 289 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -1,5 +1,5 @@
 ### REPOSITORY
-/.github/CODEOWNERS @sdushantha
+/.github/CODEOWNERS @sdushantha @ppfeister
 /.github/FUNDING.yml @sdushantha
 /LICENSE @sdushantha
--- a/.github/workflows/exclusions.yml
+++ b/.github/workflows/exclusions.yml
@ -0,0 +1,89 @@
 name: Exclusions Updater
 on:
  schedule:
    #- cron: '0 5 * * 0'  # Runs at 05:00 every Sunday
    - cron: '0 5 * * *' # Runs at 05:00 every day
  workflow_dispatch:
 jobs:
  update-exclusions:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v5
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.13'
      - name: Install Poetry
        uses: abatilo/actions-poetry@v4
        with:
          poetry-version: 'latest'
      - name: Install dependencies
        run: |
          poetry install --no-interaction --with dev
      - name: Run false positive tests
        run: |
          $(poetry env activate)
          pytest -q --tb no -m validate_targets_fp -n 20 | tee fp_test_results.txt
          deactivate
      - name: Parse false positive detections by desired categories
        run: |
          grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was Claimed)' fp_test_results.txt \
            | sort -u > false_positive_exclusions.txt
          grep -oP '(?<=test_false_pos\[)[^\]]+(?=\].*result was WAF)' fp_test_results.txt \
            | sort -u > waf_hits.txt
      - name: Detect if exclusions list changed
        id: detect_changes
        run: |
          git fetch origin exclusions || true
          if git show origin/exclusions:false_positive_exclusions.txt >/dev/null 2>&1; then
            # If the exclusions branch and file exist, compare
            if git diff --quiet origin/exclusions -- false_positive_exclusions.txt; then
              echo "exclusions_changed=false" >> "$GITHUB_OUTPUT"
            else
              echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
            fi
          else
            # If the exclusions branch or file do not exist, treat as changed
            echo "exclusions_changed=true" >> "$GITHUB_OUTPUT"
          fi
      - name: Quantify and display results
        run: |
          FP_COUNT=$(wc -l < false_positive_exclusions.txt | xargs)
          WAF_COUNT=$(wc -l < waf_hits.txt | xargs)
          echo ">>> Found $FP_COUNT false positives and $WAF_COUNT WAF hits."
          echo ">>> False positive exclusions:" && cat false_positive_exclusions.txt
          echo ">>> WAF hits:" && cat waf_hits.txt
      - name: Commit and push exclusions list
        if: steps.detect_changes.outputs.exclusions_changed == 'true'
        run: |
          git config user.name "Paul Pfeister (automation)"
          git config user.email "code@pfeister.dev"
          mv false_positive_exclusions.txt false_positive_exclusions.txt.tmp
          git add -f false_positive_exclusions.txt.tmp # -f required to override .gitignore
          git stash push -m "stash false positive exclusion list" -- false_positive_exclusions.txt.tmp
          git fetch origin exclusions || true # Allows creation of branch if deleted
          git checkout -B exclusions origin/exclusions || (git checkout --orphan exclusions && git rm -rf .)
          git stash pop || true
          mv false_positive_exclusions.txt.tmp false_positive_exclusions.txt
          git rm -f false_positive_exclusions.txt.tmp || true
          git add false_positive_exclusions.txt
          git commit -m "auto: update exclusions list" || echo "No changes to commit"
          git push origin exclusions
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@ -11,6 +11,7 @@ on:
      - '**/*.py'
      - '**/*.ini'
      - '**/*.toml'
      - 'Dockerfile'
  push:
    branches:
      - master
@ -21,11 +22,13 @@ on:
      - '**/*.py'
      - '**/*.ini'
      - '**/*.toml'
      - 'Dockerfile'
 jobs:
  tox-lint:
    # Linting is ran through tox to ensure that the same linter is used by local runners
    runs-on: ubuntu-latest
    # Linting is ran through tox to ensure that the same linter
    # is used by local runners
    steps:
      - uses: actions/checkout@v4
      - name: Set up linting environment
@ -41,7 +44,8 @@ jobs:
  tox-matrix:
    runs-on: ${{ matrix.os }}
    strategy:
-      fail-fast: false # We want to know what specicic versions it fails on
+      # We want to know what specicic versions it fails on
      fail-fast: false
      matrix:
        os: [
          ubuntu-latest,
@ -49,10 +53,10 @@ jobs:
          macos-latest,
        ]
        python-version: [
          '3.9',
          '3.10',
          '3.11',
          '3.12',
          '3.13',
        ]
    steps:
      - uses: actions/checkout@v4
@ -67,3 +71,22 @@ jobs:
          pip install tox-gh-actions
      - name: Run tox
        run: tox
  docker-build-test:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Get version from pyproject.toml
        id: get-version
        run: |
          VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2)
          echo "version=$VERSION" >> $GITHUB_OUTPUT
      - name: Build Docker image
        run: |
          docker build \
            --build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \
            -t sherlock-test:latest .
      - name: Test Docker image runs
        run: docker run --rm sherlock-test:latest --version
--- a/.github/workflows/validate_modified_targets.yml
+++ b/.github/workflows/validate_modified_targets.yml
@ -0,0 +1,100 @@
 name: Modified Target Validation
 on:
  pull_request_target:
    branches:
      - master
    paths:
      - "sherlock_project/resources/data.json"
 jobs:
  validate-modified-targets:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v5
        with:
          ref: ${{ github.base_ref }}
          fetch-depth: 1
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.13'
      - name: Install Poetry
        uses: abatilo/actions-poetry@v4
        with:
          poetry-version: 'latest'
      - name: Install dependencies
        run: |
          poetry install --no-interaction --with dev
      - name: Drop in place updated manifest from base
        run: |
          cp sherlock_project/resources/data.json data.json.base
          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1
          git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json
          cp sherlock_project/resources/data.json data.json.head
      - name: Discover modified targets
        id: discover-modified
        run: |
          CHANGED=$(
            python - <<'EOF'
          import json
          with open("data.json.base") as f: base = json.load(f)
          with open("data.json.head") as f: head = json.load(f)
          changed = []
          for k, v in head.items():
              if k not in base or base[k] != v:
                  changed.append(k)
          print(",".join(sorted(changed)))
          EOF
          )
          # Preserve changelist
          echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')"
          echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT"
      - name: Validate modified targets
        if: steps.discover-modified.outputs.changed_targets != ''
        continue-on-error: true
        run: |
          poetry run pytest -q --tb no -rA -m validate_targets -n 20 \
            --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" \
            --junitxml=validation_results.xml
      - name: Prepare validation summary
        if: steps.discover-modified.outputs.changed_targets != ''
        id: prepare-summary
        run: |
          summary=$(
            poetry run python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
          )
          echo "$summary" > validation_summary.md
      - name: Announce validation results
        if: steps.discover-modified.outputs.changed_targets != ''
        uses: actions/github-script@v8
        with:
          script: |
            const fs = require('fs');
            const body = fs.readFileSync('validation_summary.md', 'utf8');
            await github.rest.issues.createComment({
              issue_number: context.payload.pull_request.number,
              owner: context.repo.owner,
              repo: context.repo.repo,
              body: body,
            });
      - name: This step shows as ran when no modifications are found
        if: steps.discover-modified.outputs.changed_targets == ''
        run: |
          echo "No modified targets found"
--- a/4
+++ b/4
@ -2,9 +2,9 @@
  # 1. Update the version tag in the Dockerfile to match the version in sherlock/__init__.py
  # 2. Update the VCS_REF tag to match the tagged version's FULL commit hash
  # 3. Build image with BOTH latest and version tags
-    # i.e. `docker build -t sherlock/sherlock:0.15.0 -t sherlock/sherlock:latest .`
+    # i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .`
-FROM python:3.12-slim-bullseye as build
+FROM python:3.12-slim-bullseye AS build
 WORKDIR /sherlock
 RUN pip3 install --no-cache-dir --upgrade pip
--- a/devel/summarize_site_validation.py
+++ b/devel/summarize_site_validation.py
@ -0,0 +1,72 @@
 #!/usr/bin/env python
 # This module summarizes the results of site validation tests queued by
 # workflow validate_modified_targets for presentation in Issue comments.
 from defusedxml import ElementTree as ET
 import sys
 from pathlib import Path
 def summarize_junit_xml(xml_path: Path) -> str:
    tree = ET.parse(xml_path)
    root = tree.getroot()
    suite = root.find('testsuite')
    pass_message: str = ":heavy_check_mark: &nbsp; Pass"
    fail_message: str = ":x: &nbsp; Fail"
    if suite is None:
        raise ValueError("Invalid JUnit XML: No testsuite found")
    summary_lines: list[str] = []
    summary_lines.append("#### Automatic validation of changes\n")
    summary_lines.append("| Target | F+ Check | F- Check |")
    summary_lines.append("|---|---|---|")
    failures = int(suite.get('failures', 0))
    errors_detected: bool = False
    results: dict[str, dict[str, str]] = {}
    for testcase in suite.findall('testcase'):
        test_name = testcase.get('name').split('[')[0]
        site_name = testcase.get('name').split('[')[1].rstrip(']')
        failure = testcase.find('failure')
        error = testcase.find('error')
        if site_name not in results:
            results[site_name] = {}
        if test_name == "test_false_neg":
            results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
        elif test_name == "test_false_pos":
            results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
        if error is not None:
            errors_detected = True
    for result in results:
        summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
    if failures > 0:
        summary_lines.append("\n___\n" +
            "\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
            " will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
    if errors_detected:
        summary_lines.append("\n___\n" +
            "\n**Errors were detected during validation. Please review the workflow logs.**")
    return "\n".join(summary_lines)
 if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: summarize_site_validation.py <junit-xml-file>")
        sys.exit(1)
    xml_path: Path = Path(sys.argv[1])
    if not xml_path.is_file():
        print(f"Error: File '{xml_path}' does not exist.")
        sys.exit(1)
    summary: str = summarize_junit_xml(xml_path)
    print(summary)
--- a/docs/README.md
+++ b/docs/README.md
@ -1,6 +1,6 @@
-<p align=center>
+<p align="center">
  <br>
-  <a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png"/></a>
+  <a href="https://sherlock-project.github.io/" target="_blank"><img src="images/sherlock-logo.png" alt="sherlock"/></a>
  <br>
  <span>Hunt down social media accounts by username across <a href="https://sherlockproject.xyz/sites">400+ social networks</a></span>
  <br>
@ -15,8 +15,7 @@
 </p>
 <p align="center">
-<img width="70%" height="70%" src="images/demo.png"/>
+<img width="70%" height="70%" src="images/demo.png" alt="demo"/>
 </a>
 </p>
@ -115,14 +114,14 @@ $ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock
 }]
 ```
-Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmaticaly via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
+Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmatically via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock).
 ## Credits
 Thank you to everyone who has contributed to Sherlock! ❤️
 <a href="https://github.com/sherlock-project/sherlock/graphs/contributors">
-  <img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" noZoom />
+  <img src="https://contrib.rocks/image?&columns=25&max=10000&&repo=sherlock-project/sherlock" alt="contributors"/>
 </a>
 ## Star history
--- a/docs/removed-sites.md
+++ b/docs/removed-sites.md
@ -1982,3 +1982,16 @@ __2025-02-16 :__ Unsure if any way to view profiles exists now
    "username_claimed": "t3dotgg"
  }
 ```
 ## TorrentGalaxy
 __2025-07-06 :__ Site appears to have gone offline in March and hasn't come back
 ```json
  "TorrentGalaxy": {
    "errorMsg": "<title>TGx:Can't show details</title>",
    "errorType": "message",
    "regexCheck": "^[A-Za-z0-9]{3,15}$",
    "url": "https://torrentgalaxy.to/profile/{}",
    "urlMain": "https://torrentgalaxy.to/",
    "username_claimed": "GalaxyRG"
  },
 ```
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,8 +8,7 @@ source = "init"
 [tool.poetry]
 name = "sherlock-project"
-# single source of truth for version is __init__.py
+version = "0.16.0"
 version = "0"
 description = "Hunt down social media accounts by username across social networks"
 license = "MIT"
 authors = [
@ -47,15 +46,19 @@ PySocks = "^1.7.0"
 requests = "^2.22.0"
 requests-futures = "^1.0.0"
 stem = "^1.8.0"
 torrequest = "^0.1.0"
 pandas = "^2.2.1"
 openpyxl = "^3.0.10"
-
+tomli = "^2.2.1"
 [tool.poetry.extras]
 tor = ["torrequest"]
 [tool.poetry.group.dev.dependencies]
 jsonschema = "^4.0.0"
 rstr = "^3.2.2"
 pytest = "^8.4.2"
 pytest-xdist = "^3.8.0"
 [tool.poetry.group.ci.dependencies]
 defusedxml = "^0.7.1"
 [tool.poetry.scripts]
 sherlock = 'sherlock_project.sherlock:main'
--- a/pytest.ini
+++ b/pytest.ini
@ -1,4 +1,7 @@
 [pytest]
-addopts = --strict-markers
+addopts = --strict-markers -m "not validate_targets"
 markers =
    online: mark tests are requiring internet access.
    validate_targets: mark tests for sweeping manifest validation (sends many requests).
    validate_targets_fp: validate_targets, false positive tests only.
    validate_targets_fn: validate_targets, false negative tests only.
--- a/sherlock_project/init.py
+++ b/sherlock_project/init.py
@ -5,11 +5,26 @@ networks.
 """
 from importlib.metadata import version as pkg_version, PackageNotFoundError
 import pathlib
 import tomli
 def get_version() -> str:
    """Fetch the version number of the installed package."""
    try:
        return pkg_version("sherlock_project")
    except PackageNotFoundError:
        pyproject_path: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent / "pyproject.toml"
        with pyproject_path.open("rb") as f:
            pyproject_data = tomli.load(f)
        return pyproject_data["tool"]["poetry"]["version"]
 # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package
 import_error_test_var = None
 __shortname__   = "Sherlock"
 __longname__    = "Sherlock: Find Usernames Across Social Networks"
-__version__     = "0.15.0"
+__version__     = get_version()
 forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest"
--- a/sherlock_project/resources/data.json
+++ b/sherlock_project/resources/data.json
@ -79,13 +79,13 @@
    "username_claimed": "pink"
  },
  "AllMyLinks": {
-    "errorMsg": "Not Found",
+  "errorMsg": "Page not found",
-    "errorType": "message",
+  "errorType": "message",
-    "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
+  "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$",
-    "url": "https://allmylinks.com/{}",
+  "url": "https://allmylinks.com/{}",
-    "urlMain": "https://allmylinks.com/",
+  "urlMain": "https://allmylinks.com/",
-    "username_claimed": "blue"
+  "username_claimed": "blue"
-  },
+},
  "AniWorld": {
    "errorMsg": "Dieses Profil ist nicht verf\u00fcgbar",
    "errorType": "message",
@ -115,12 +115,20 @@
    "username_claimed": "lio24d"
  },
  "Apple Discussions": {
-    "errorMsg": "The page you tried was not found. You may have used an outdated link or may have typed the address (URL) incorrectly.",
+    "errorMsg": "Looking for something in Apple Support Communities?",
    "errorType": "message",
    "url": "https://discussions.apple.com/profile/{}",
    "urlMain": "https://discussions.apple.com",
    "username_claimed": "jason"
  },
  "Aparat": {
    "errorType": "status_code",
    "request_method": "GET",
    "url": "https://www.aparat.com/{}/",
    "urlMain": "https://www.aparat.com/",
    "urlProbe": "https://www.aparat.com/api/fa/v1/user/user/information/username/{}",
    "username_claimed": "jadi"
  },
  "Archive of Our Own": {
    "errorType": "status_code",
    "regexCheck": "^[^.]*?$",
@ -250,6 +258,12 @@
    "urlMain": "https://www.blipfoto.com/",
    "username_claimed": "blue"
  },
  "Blitz Tactics": {
    "errorType": "status_code",
    "url": "https://blitztactics.com/{}",
    "urlMain": "https://blitztactics.com/",
    "username_claimed": "Lance5500"
  },
  "Blogger": {
    "errorType": "status_code",
    "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
@ -257,13 +271,12 @@
    "urlMain": "https://www.blogger.com/",
    "username_claimed": "blue"
  },
-  "BoardGameGeek": {
+  "Bluesky": {
-    "errorType": "message",
+    "errorType": "status_code",
-    "regexCheck": "^[a-zA-Z0-9_]*$",
+    "url": "https://bsky.app/profile/{}.bsky.social",
-    "errorMsg": "User not found",
+    "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social",
-    "url": "https://boardgamegeek.com/user/{}",
+    "urlMain": "https://bsky.app/",
-    "urlMain": "https://boardgamegeek.com",
+    "username_claimed": "mcuban"
    "username_claimed": "blue"
  },
  "BongaCams": {
    "errorType": "status_code",
@ -278,6 +291,14 @@
    "urlMain": "https://www.bookcrossing.com/",
    "username_claimed": "blue"
  },
  "BoardGameGeek": {
    "errorMsg": "\"isValid\":true",
    "errorType": "message",
    "url": "https://boardgamegeek.com/user/{}",
    "urlMain": "https://boardgamegeek.com/",
    "urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}",
    "username_claimed": "blue"
  },
  "BraveCommunity": {
    "errorType": "status_code",
    "url": "https://community.brave.com/u/{}/",
@ -357,6 +378,12 @@
    "urlMain": "https://career.habr.com/",
    "username_claimed": "blue"
  },
  "CashApp": {
    "errorType": "status_code",
    "url": "https://cash.app/${}",
    "urlMain": "https://cash.app",
    "username_claimed": "hotdiggitydog"
  },
  "Championat": {
    "errorType": "status_code",
    "url": "https://www.championat.com/user/{}",
@ -479,7 +506,8 @@
    "username_claimed": "hacker"
  },
  "Code Sandbox": {
-    "errorType": "status_code",
+    "errorType": "message",
    "errorMsg": "Whoops, page not found",
    "url": "https://codesandbox.io/u/{}",
    "urlMain": "https://codesandbox.io",
    "username_claimed": "icyjoseph"
@ -551,8 +579,7 @@
    "username_claimed": "brown"
  },
  "CyberDefenders": {
-    "errorMsg": "<title>Blue Team Training for SOC analysts and DFIR - CyberDefenders</title>",
+    "errorType": "status_code",
    "errorType": "message",
    "regexCheck": "^[^\\/:*?\"<>|@]{3,50}$",
    "request_method": "GET",
    "url": "https://cyberdefenders.org/p/{}",
@ -579,6 +606,12 @@
    "urlMain": "https://www.dailymotion.com/",
    "username_claimed": "blue"
  },
  "dcinside": {
    "errorType": "status_code",
    "url": "https://gallog.dcinside.com/{}",
    "urlMain": "https://www.dcinside.com/",
    "username_claimed": "anrbrb"
  },
  "Dealabs": {
    "errorMsg": "La page que vous essayez",
    "errorType": "message",
@ -587,20 +620,21 @@
    "urlMain": "https://www.dealabs.com/",
    "username_claimed": "blue"
  },
-  "DeviantART": {
+ "DeviantArt": {
-    "errorType": "status_code",
+  "errorType": "message",
-    "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
+  "errorMsg": "Llama Not Found",
-    "url": "https://{}.deviantart.com",
+  "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
-    "urlMain": "https://deviantart.com",
+  "url": "https://www.deviantart.com/{}",
-    "username_claimed": "blue"
+  "urlMain": "https://www.deviantart.com/",
-  },
+  "username_claimed": "blue"
 },
  "DigitalSpy": {
-    "errorMsg": "The page you were looking for could not be found.",
+      "errorMsg": "The page you were looking for could not be found.",
-    "errorType": "message",
+      "errorType": "message",
-    "url": "https://forums.digitalspy.com/profile/{}",
+      "url": "https://forums.digitalspy.com/profile/{}",
-    "urlMain": "https://forums.digitalspy.com/",
+      "urlMain": "https://forums.digitalspy.com/",
-    "username_claimed": "blue",
+      "username_claimed": "blue",
-    "regexCheck": "^\\w{3,20}$"
+      "regexCheck": "^\\w{3,20}$"
  },
  "Discogs": {
    "errorType": "status_code",
@ -786,13 +820,12 @@
    "urlMain": "https://fosstodon.org/",
    "username_claimed": "blue"
  },
-  "Freelance.habr": {
+  "Framapiaf": {
-    "errorMsg": "<div class=\"icon_user_locked\"></div>",
+    "errorType": "status_code",
-    "errorType": "message",
+    "regexCheck": "^[a-zA-Z0-9_]{1,30}$",
-    "regexCheck": "^((?!\\.).)*$",
+    "url": "https://framapiaf.org/@{}",
-    "url": "https://freelance.habr.com/freelancers/{}",
+    "urlMain": "https://framapiaf.org",
-    "urlMain": "https://freelance.habr.com/",
+    "username_claimed": "pylapp"
    "username_claimed": "adam"
  },
  "Freelancer": {
    "errorMsg": "\"users\":{}",
@ -1129,6 +1162,13 @@
    "urlProbe": "https://imginn.com/{}",
    "username_claimed": "instagram"
  },
  "Instapaper": {
    "errorType": "status_code",
    "request_method": "GET",
    "url": "https://www.instapaper.com/p/{}",
    "urlMain": "https://www.instapaper.com/",
    "username_claimed": "john"
  },
  "Instructables": {
    "errorType": "status_code",
    "url": "https://www.instructables.com/member/{}",
@ -1241,6 +1281,13 @@
    "urlMain": "https://linux.org.ru/",
    "username_claimed": "red"
  },
  "Laracast": {
    "errorType":"status_code",
    "url": "https://laracasts.com/@{}",
    "urlMain": "https://laracasts.com/",
    "regexCheck": "^[a-zA-Z0-9_-]{3,}$",
    "username_claimed": "user1"
  },
  "Launchpad": {
    "errorType": "status_code",
    "url": "https://launchpad.net/~{}",
@ -1298,6 +1345,12 @@
    "urlMain": "https://linktr.ee/",
    "username_claimed": "anne"
  },
  "LinuxFR.org": {
    "errorType": "status_code",
    "url": "https://linuxfr.org/users/{}",
    "urlMain": "https://linuxfr.org/",
    "username_claimed": "pylapp"
  },
  "Listed": {
    "errorType": "response_url",
    "errorUrl": "https://listed.to/@{}",
@ -1338,6 +1391,13 @@
    "urlMain": "https://forums.mmorpg.com/",
    "username_claimed": "goku"
  },
  "Mamot": {
    "errorType": "status_code",
    "regexCheck": "^[a-zA-Z0-9_]{1,30}$",
    "url": "https://mamot.fr/@{}",
    "urlMain": "https://mamot.fr/",
    "username_claimed": "anciensEnssat"
  },
  "Medium": {
    "errorMsg": "<body",
    "errorType": "message",
@ -1353,8 +1413,8 @@
    "username_claimed": "blue"
  },
  "Minecraft": {
-    "errorCode": 204,
+    "errorMsg": "Couldn't find any profile with name",
-    "errorType": "status_code",
+    "errorType": "message",
    "url": "https://api.mojang.com/users/profiles/minecraft/{}",
    "urlMain": "https://minecraft.net/",
    "username_claimed": "blue"
@ -1400,12 +1460,12 @@
    "username_claimed": "blue"
  },
  "Mydramalist": {
-    "errorMsg": "Sign in - MyDramaList",
+  "errorMsg": "The requested page was not found",
-    "errorType": "message",
+  "errorType": "message",
-    "url": "https://www.mydramalist.com/profile/{}",
+  "url": "https://www.mydramalist.com/profile/{}",
-    "urlMain": "https://mydramalist.com",
+  "urlMain": "https://mydramalist.com",
-    "username_claimed": "elhadidy12398"
+  "username_claimed": "elhadidy12398"
-  },
+},
  "Myspace": {
    "errorType": "status_code",
    "url": "https://myspace.com/{}",
@ -1419,6 +1479,13 @@
    "urlMain": "https://www.native-instruments.com/forum/",
    "username_claimed": "jambert"
  },
  "namuwiki": {
    "__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.",
    "errorType": "status_code",
    "url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}",
    "urlMain": "https://namu.wiki/",
    "username_claimed": "namu"
  },
  "NationStates Nation": {
    "errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!",
    "errorType": "message",
@ -1499,6 +1566,13 @@
    "urlMain": "https://nyaa.si/",
    "username_claimed": "blue"
  },
  "Open Collective": {
    "errorMsg": "Oops! Page not found",
    "errorType": "message",
    "url": "https://opencollective.com/{}",
    "urlMain": "https://opencollective.com/",
    "username_claimed": "pylapp"
  },
  "OpenStreetMap": {
    "errorType": "status_code",
    "regexCheck": "^[^.]*?$",
@ -1519,6 +1593,13 @@
    "urlMain": "https://ourdjtalk.com/",
    "username_claimed": "steve"
  },
  "Outgress": {
    "errorMsg": "Outgress - Error",
    "errorType": "message",
    "url": "https://outgress.com/agents/{}",
    "urlMain": "https://outgress.com/",
    "username_claimed": "pylapp"
  },
  "PCGamer": {
    "errorMsg": "The specified member cannot be found. Please enter a member's entire name.",
    "errorType": "message",
@ -1580,12 +1661,31 @@
    "urlMain": "https://www.pinkbike.com/",
    "username_claimed": "blue"
  },
  "pixelfed.social": {
    "errorType": "status_code",
    "url": "https://pixelfed.social/{}/",
    "urlMain": "https://pixelfed.social",
    "username_claimed": "pylapp"
  },
  "PlayStore": {
    "errorType": "status_code",
    "url": "https://play.google.com/store/apps/developer?id={}",
    "urlMain": "https://play.google.com/store",
    "username_claimed": "Facebook"
  },
  "Playstrategy": {
    "errorType": "status_code",
    "url": "https://playstrategy.org/@/{}",
    "urlMain": "https://playstrategy.org",
    "username_claimed": "oruro"
  },
  "Plurk": {
    "errorMsg": "User Not Found!",
    "errorType": "message",
    "url": "https://www.plurk.com/{}",
    "urlMain": "https://www.plurk.com/",
    "username_claimed": "plurkoffice"
  },
  "PocketStars": {
    "errorMsg": "Join Your Favorite Adult Stars",
    "errorType": "message",
@ -1633,6 +1733,20 @@
    "urlMain": "https://www.producthunt.com/",
    "username_claimed": "jenny"
  },
  "programming.dev": {
    "errorMsg": "Error!",
    "errorType": "message",
    "url": "https://programming.dev/u/{}",
    "urlMain": "https://programming.dev",
    "username_claimed": "pylapp"
  },
  "Pychess": {
  "errorType": "message",
  "errorMsg": "404",
  "url": "https://www.pychess.org/@/{}",
  "urlMain": "https://www.pychess.org",
  "username_claimed": "gbtami"
  },
  "PromoDJ": {
    "errorType": "status_code",
    "url": "http://promodj.com/{}",
@ -1722,8 +1836,7 @@
    "username_claimed": "blue"
  },
  "Roblox": {
-    "errorMsg": "Page cannot be found or no longer exists",
+    "errorType": "status_code",
    "errorType": "message",
    "url": "https://www.roblox.com/user.aspx?username={}",
    "urlMain": "https://www.roblox.com/",
    "username_claimed": "bluewolfekiller"
@ -1831,7 +1944,7 @@
  },
  "SlideShare": {
    "errorType": "message",
-    "errorMsg": "<title>Username available</title>",
+    "errorMsg": "<title>Page no longer exists</title>",
    "url": "https://slideshare.net/{}",
    "urlMain": "https://slideshare.net/",
    "username_claimed": "blue"
@ -1865,6 +1978,13 @@
    "urlMain": "https://www.snapchat.com",
    "username_claimed": "teamsnapchat"
  },
  "SOOP": {
    "errorType": "status_code",
    "url": "https://www.sooplive.co.kr/station/{}",
    "urlMain": "https://www.sooplive.co.kr/",
    "urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station",
    "username_claimed": "udkn"
  },
  "SoundCloud": {
    "errorType": "status_code",
    "url": "https://soundcloud.com/{}",
@ -1884,6 +2004,12 @@
    "urlMain": "https://soylentnews.org",
    "username_claimed": "adam"
  },
  "SpeakerDeck": {
    "errorType": "status_code",
    "url": "https://speakerdeck.com/{}",
    "urlMain": "https://speakerdeck.com/",
    "username_claimed": "pylapp"
  },
  "Speedrun.com": {
    "errorType": "status_code",
    "url": "https://speedrun.com/users/{}",
@ -2025,6 +2151,12 @@
    "urlMain": "https://themeforest.net/",
    "username_claimed": "user"
  },
  "tistory": {
    "errorType": "status_code",
    "url": "https://{}.tistory.com/",
    "urlMain": "https://www.tistory.com/",
    "username_claimed": "notice"
  },
  "TnAFlix": {
    "errorType": "status_code",
    "isNSFW": true,
@ -2032,14 +2164,6 @@
    "urlMain": "https://www.tnaflix.com/",
    "username_claimed": "hacker"
  },
  "TorrentGalaxy": {
    "errorMsg": "<title>TGx:Can't show details</title>",
    "errorType": "message",
    "regexCheck": "^[A-Za-z0-9]{3,15}$",
    "url": "https://torrentgalaxy.to/profile/{}",
    "urlMain": "https://torrentgalaxy.to/",
    "username_claimed": "GalaxyRG"
  },
  "TradingView": {
    "errorType": "status_code",
    "request_method": "GET",
@ -2706,7 +2830,7 @@
    "username_claimed": "green"
  },
  "threads": {
-    "errorMsg": "<title>Threads</title>",
+    "errorMsg": "<title>Threads • Log in</title>",
    "errorType": "message",
    "headers": {
      "Sec-Fetch-Mode": "navigate"
@ -2721,12 +2845,24 @@
    "urlMain": "https://www.toster.ru/",
    "username_claimed": "adam"
  },
  "tumblr": {
    "errorType": "status_code",
    "url": "https://{}.tumblr.com/",
    "urlMain": "https://www.tumblr.com/",
    "username_claimed": "goku"
 },
  "uid": {
    "errorType": "status_code",
    "url": "http://uid.me/{}",
    "urlMain": "https://uid.me/",
    "username_claimed": "blue"
  },
  "write.as": {
    "errorType": "status_code",
    "url": "https://write.as/{}",
    "urlMain": "https://write.as",
    "username_claimed": "pylapp"
  },
  "xHamster": {
    "errorType": "status_code",
    "isNSFW": true,
@ -2747,5 +2883,13 @@
    "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social",
    "urlMain": "https://bsky.app/",
    "username_claimed": "mcuban"
  },
  "Platzi": {
    "errorType": "status_code",
    "errorCode": 404,
    "url": "https://platzi.com/p/{}/",
    "urlMain": "https://platzi.com/",
    "username_claimed": "freddier",
    "request_method": "GET"
  }
 }
--- a/sherlock_project/resources/data.schema.json
+++ b/sherlock_project/resources/data.schema.json
@ -1,80 +1,149 @@
 {
-    "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
-    "title": "Sherlock Target Manifest",
+  "title": "Sherlock Target Manifest",
-    "description": "Social media targets to probe for the existence of known usernames",
+  "description": "Social media targets to probe for the existence of known usernames",
-    "type": "object",
+  "type": "object",
-    "properties": {
+  "properties": {
-        "$schema": { "type": "string" }
+    "$schema": { "type": "string" }
-    },
+  },
-    "patternProperties": {
+  "patternProperties": {
-        "^(?!\\$).*?$": {
+    "^(?!\\$).*?$": {
-            "type": "object",
+      "type": "object",
-            "description": "Target name and associated information (key should be human readable name)",
+      "description": "Target name and associated information (key should be human readable name)",
-            "required": [ "url", "urlMain", "errorType", "username_claimed" ],
+      "required": ["url", "urlMain", "errorType", "username_claimed"],
-            "properties": {
+      "properties": {
-                "url": { "type": "string" },
+        "url": { "type": "string" },
-                "urlMain": { "type": "string" },
+        "urlMain": { "type": "string" },
-                "urlProbe": { "type": "string" },
+        "urlProbe": { "type": "string" },
-                "username_claimed": { "type": "string" },
+        "username_claimed": { "type": "string" },
-                "regexCheck": { "type": "string" },
+        "regexCheck": { "type": "string" },
-                "isNSFW": { "type": "boolean" },
+        "isNSFW": { "type": "boolean" },
-                "headers": { "type": "object" },
+        "headers": { "type": "object" },
-                "request_payload": { "type": "object" },
+        "request_payload": { "type": "object" },
-                "__comment__": {
+        "__comment__": {
-                    "type": "string",
+          "type": "string",
-                    "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
+          "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
-                },
+        },
-                "tags": {
+        "tags": {
-                    "oneOf": [
+          "oneOf": [
-                        { "$ref": "#/$defs/tag" },
+            { "$ref": "#/$defs/tag" },
-                        { "type": "array", "items": { "$ref": "#/$defs/tag" } }
+            { "type": "array", "items": { "$ref": "#/$defs/tag" } }
-                    ]
+          ]
-                },
+        },
-                "request_method": {
+        "request_method": {
-                    "type": "string",
+          "type": "string",
-                    "enum": [ "GET", "POST", "HEAD", "PUT" ]
+          "enum": ["GET", "POST", "HEAD", "PUT"]
-                },
+        },
        "errorType": {
          "oneOf": [
            {
              "type": "string",
              "enum": ["message", "response_url", "status_code"]
            },
            {
              "type": "array",
              "items": {
                "type": "string",
                "enum": ["message", "response_url", "status_code"]
              }
            }
          ]
        },
        "errorMsg": {
          "oneOf": [
            { "type": "string" },
            { "type": "array", "items": { "type": "string" } }
          ]
        },
        "errorCode": {
          "oneOf": [
            { "type": "integer" },
            { "type": "array", "items": { "type": "integer" } }
          ]
        },
        "errorUrl": { "type": "string" },
        "response_url": { "type": "string" }
      },
      "dependencies": {
        "errorMsg": {
          "oneOf": [
            { "properties": { "errorType": { "const": "message" } } },
            {
              "properties": {
                "errorType": {
-                    "type": "string",
+                  "type": "array",
-                    "enum": [ "message", "response_url", "status_code" ]
+                  "contains": { "const": "message" }
                },
                "errorMsg": {
                    "oneOf": [
                        { "type": "string" },
                        { "type": "array", "items": { "type": "string" } }
                    ]
                },
                "errorCode": {
                    "oneOf": [
                        { "type": "integer" },
                        { "type": "array", "items": { "type": "integer" } }
                    ]
                },
                "errorUrl": { "type": "string" },
                "response_url": { "type": "string" }
            },
            "dependencies": {
                "errorMsg": {
                    "properties" : { "errorType": { "const": "message" } }
                },
                "errorUrl": {
                    "properties": { "errorType": { "const": "response_url" } }
                },
                "errorCode": {
                    "properties": { "errorType": { "const": "status_code" } }
                }
-            },
+              }
-            "if": { "properties": { "errorType": { "const": "message" } } },
+            }
-            "then": { "required": [ "errorMsg" ] },
+          ]
-            "else": {
+        },
-                "if": { "properties": { "errorType": { "const": "response_url" } } },
+        "errorUrl": {
-                "then": { "required": [ "errorUrl" ] }
+          "oneOf": [
-            },
+            { "properties": { "errorType": { "const": "response_url" } } },
-            "additionalProperties": false
+            {
              "properties": {
                "errorType": {
                  "type": "array",
                  "contains": { "const": "response_url" }
                }
              }
            }
          ]
        },
        "errorCode": {
          "oneOf": [
            { "properties": { "errorType": { "const": "status_code" } } },
            {
              "properties": {
                "errorType": {
                  "type": "array",
                  "contains": { "const": "status_code" }
                }
              }
            }
          ]
        }
-    },
+      },
-    "additionalProperties": false,
+      "allOf": [
-    "$defs": {
+        {
-        "tag": { "type": "string", "enum": [ "adult", "gaming" ] }
+          "if": {
            "anyOf": [
              { "properties": { "errorType": { "const": "message" } } },
              {
                "properties": {
                  "errorType": {
                    "type": "array",
                    "contains": { "const": "message" }
                  }
                }
              }
            ]
          },
          "then": { "required": ["errorMsg"] }
        },
        {
          "if": {
            "anyOf": [
              { "properties": { "errorType": { "const": "response_url" } } },
              {
                "properties": {
                  "errorType": {
                    "type": "array",
                    "contains": { "const": "response_url" }
                  }
                }
              }
            ]
          },
          "then": { "required": ["errorUrl"] }
        }
      ],
      "additionalProperties": false
    }
  },
  "additionalProperties": false,
  "$defs": {
    "tag": { "type": "string", "enum": ["adult", "gaming"] }
  }
 }
--- a/sherlock_project/sherlock.py
+++ b/sherlock_project/sherlock.py
@ -169,14 +169,12 @@ def multiple_usernames(username):
 def sherlock(
    username: str,
-    site_data: dict,
+    site_data: dict[str, dict[str, str]],
    query_notify: QueryNotify,
    tor: bool = False,
    unique_tor: bool = False,
    dump_response: bool = False,
    proxy: Optional[str] = None,
    timeout: int = 60,
-):
+) -> dict[str, dict[str, str | QueryResult]]:
    """Run Sherlock Analysis.
    Checks for existence of username on various social media sites.
@ -188,8 +186,6 @@ def sherlock(
    query_notify           -- Object with base type of QueryNotify().
                              This will be used to notify the caller about
                              query results.
    tor                    -- Boolean indicating whether to use a tor circuit for the requests.
    unique_tor             -- Boolean indicating whether to use a new tor circuit for each request.
    proxy                  -- String indicating the proxy URL
    timeout                -- Time in seconds to wait before timing out request.
                              Default is 60 seconds.
@ -210,32 +206,9 @@ def sherlock(
    # Notify caller that we are starting the query.
    query_notify.start(username)
    # Create session based on request methodology
    if tor or unique_tor:
        try:
            from torrequest import TorRequest  # noqa: E402
        except ImportError:
            print("Important!")
            print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
            print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.")
            print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n")
            sys.exit(query_notify.finish())
-        print("Important!")
+    # Normal requests
-        print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.")
+    underlying_session = requests.session()
        # Requests using Tor obfuscation
        try:
            underlying_request = TorRequest()
        except OSError:
            print("Tor not found in system path. Unable to continue.\n")
            sys.exit(query_notify.finish())
        underlying_session = underlying_request.session
    else:
        # Normal requests
        underlying_session = requests.session()
        underlying_request = requests.Request()
    # Limit number of workers to 20.
    # This is probably vastly overkill.
@ -359,15 +332,10 @@ def sherlock(
            # Store future in data for access later
            net_info["request_future"] = future
            # Reset identify for tor (if needed)
            if unique_tor:
                underlying_request.reset_identity()
        # Add this site's results into final dictionary with all the other results.
        results_total[social_network] = results_site
    # Open the file containing account links
    # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
    for social_network, net_info in site_data.items():
        # Retrieve results again
        results_site = results_total.get(social_network)
@ -381,6 +349,8 @@ def sherlock(
        # Get the expected error type
        error_type = net_info["errorType"]
        if isinstance(error_type, str):
            error_type: list[str] = [error_type]
        # Retrieve future and ensure it has finished
        future = net_info["request_future"]
@ -425,58 +395,60 @@ def sherlock(
        elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
            query_status = QueryStatus.WAF
        elif error_type == "message":
            # error_flag True denotes no error found in the HTML
            # error_flag False denotes error found in the HTML
            error_flag = True
            errors = net_info.get("errorMsg")
            # errors will hold the error message
            # it can be string or list
            # by isinstance method we can detect that
            # and handle the case for strings as normal procedure
            # and if its list we can iterate the errors
            if isinstance(errors, str):
                # Checks if the error message is in the HTML
                # if error is present we will set flag to False
                if errors in r.text:
                    error_flag = False
            else:
                # If it's list, it will iterate all the error message
                for error in errors:
                    if error in r.text:
                        error_flag = False
                        break
            if error_flag:
                query_status = QueryStatus.CLAIMED
            else:
                query_status = QueryStatus.AVAILABLE
        elif error_type == "status_code":
            error_codes = net_info.get("errorCode")
            query_status = QueryStatus.CLAIMED
            # Type consistency, allowing for both singlets and lists in manifest
            if isinstance(error_codes, int):
                error_codes = [error_codes]
            if error_codes is not None and r.status_code in error_codes:
                query_status = QueryStatus.AVAILABLE
            elif r.status_code >= 300 or r.status_code < 200:
                query_status = QueryStatus.AVAILABLE
        elif error_type == "response_url":
            # For this detection method, we have turned off the redirect.
            # So, there is no need to check the response URL: it will always
            # match the request.  Instead, we will ensure that the response
            # code indicates that the request was successful (i.e. no 404, or
            # forward to some odd redirect).
            if 200 <= r.status_code < 300:
                query_status = QueryStatus.CLAIMED
            else:
                query_status = QueryStatus.AVAILABLE
        else:
-            # It should be impossible to ever get here...
+            if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
-            raise ValueError(
+                error_context = f"Unknown error type '{error_type}' for {social_network}"
-                f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
+                query_status = QueryStatus.UNKNOWN
-            )
+            else:
                if "message" in error_type:
                    # error_flag True denotes no error found in the HTML
                    # error_flag False denotes error found in the HTML
                    error_flag = True
                    errors = net_info.get("errorMsg")
                    # errors will hold the error message
                    # it can be string or list
                    # by isinstance method we can detect that
                    # and handle the case for strings as normal procedure
                    # and if its list we can iterate the errors
                    if isinstance(errors, str):
                        # Checks if the error message is in the HTML
                        # if error is present we will set flag to False
                        if errors in r.text:
                            error_flag = False
                    else:
                        # If it's list, it will iterate all the error message
                        for error in errors:
                            if error in r.text:
                                error_flag = False
                                break
                    if error_flag:
                        query_status = QueryStatus.CLAIMED
                    else:
                        query_status = QueryStatus.AVAILABLE
                if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
                    error_codes = net_info.get("errorCode")
                    query_status = QueryStatus.CLAIMED
                    # Type consistency, allowing for both singlets and lists in manifest
                    if isinstance(error_codes, int):
                        error_codes = [error_codes]
                    if error_codes is not None and r.status_code in error_codes:
                        query_status = QueryStatus.AVAILABLE
                    elif r.status_code >= 300 or r.status_code < 200:
                        query_status = QueryStatus.AVAILABLE
                if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
                    # For this detection method, we have turned off the redirect.
                    # So, there is no need to check the response URL: it will always
                    # match the request.  Instead, we will ensure that the response
                    # code indicates that the request was successful (i.e. no 404, or
                    # forward to some odd redirect).
                    if 200 <= r.status_code < 300:
                        query_status = QueryStatus.CLAIMED
                    else:
                        query_status = QueryStatus.AVAILABLE
        if dump_response:
            print("+++++++++++++++++++++")
@ -507,7 +479,7 @@ def sherlock(
            print("+++++++++++++++++++++")
        # Notify caller about results of query.
-        result = QueryResult(
+        result: QueryResult = QueryResult(
            username=username,
            site_name=social_network,
            site_url_user=url,
@ -596,22 +568,6 @@ def main():
        dest="output",
        help="If using single username, the output of the result will be saved to this file.",
    )
    parser.add_argument(
        "--tor",
        "-t",
        action="store_true",
        dest="tor",
        default=False,
        help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.",
    )
    parser.add_argument(
        "--unique-tor",
        "-u",
        action="store_true",
        dest="unique_tor",
        default=False,
        help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.",
    )
    parser.add_argument(
        "--csv",
        action="store_true",
@ -719,12 +675,30 @@ def main():
        help="Include checking of NSFW sites from default list.",
    )
    # TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed
    # in future release
    parser.add_argument(
        "--no-txt",
        action="store_true",
        dest="no_txt",
        default=False,
-        help="Disable creation of a txt file",
+        help="Disable creation of a txt file - WILL BE DEPRECATED",
    )
    parser.add_argument(
        "--txt",
        action="store_true",
        dest="output_txt",
        default=False,
        help="Enable creation of a txt file",
    )
    parser.add_argument(
        "--ignore-exclusions",
        action="store_true",
        dest="ignore_exclusions",
        default=False,
        help="Ignore upstream exclusions (may return more false positives)",
    )
    args = parser.parse_args()
@ -734,7 +708,7 @@ def main():
    # Check for newer version of Sherlock. If it exists, let the user know about it
    try:
-        latest_release_raw = requests.get(forge_api_latest_release).text
+        latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text
        latest_release_json = json_loads(latest_release_raw)
        latest_remote_tag = latest_release_json["tag_name"]
@ -747,22 +721,10 @@ def main():
    except Exception as error:
        print(f"A problem occurred while checking for an update: {error}")
    # Argument check
    # TODO regex check on args.proxy
    if args.tor and (args.proxy is not None):
        raise Exception("Tor and Proxy cannot be set at the same time.")
    # Make prompts
    if args.proxy is not None:
        print("Using the proxy: " + args.proxy)
    if args.tor or args.unique_tor:
        print("Using Tor to make requests")
        print(
            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
        )
    if args.no_color:
        # Disable color output.
        init(strip=True, convert=False)
@ -784,7 +746,8 @@ def main():
    try:
        if args.local:
            sites = SitesInformation(
-                os.path.join(os.path.dirname(__file__), "resources/data.json")
+                os.path.join(os.path.dirname(__file__), "resources/data.json"),
                honor_exclusions=False,
            )
        else:
            json_file_location = args.json_file
@ -793,7 +756,7 @@ def main():
                if args.json_file.isnumeric():
                    pull_number = args.json_file
                    pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}"
-                    pull_request_raw = requests.get(pull_url).text
+                    pull_request_raw = requests.get(pull_url, timeout=10).text
                    pull_request_json = json_loads(pull_request_raw)
                    # Check if it's a valid pull request
@ -804,7 +767,11 @@ def main():
                    head_commit_sha = pull_request_json["head"]["sha"]
                    json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
-            sites = SitesInformation(json_file_location)
+            sites = SitesInformation(
                data_file_path=json_file_location,
                honor_exclusions=not args.ignore_exclusions,
                do_not_exclude=args.site_list,
            )
    except Exception as error:
        print(f"ERROR:  {error}")
        sys.exit(1)
@ -858,8 +825,6 @@ def main():
            username,
            site_data,
            query_notify,
            tor=args.tor,
            unique_tor=args.unique_tor,
            dump_response=args.dump_response,
            proxy=args.proxy,
            timeout=args.timeout,
@ -875,7 +840,7 @@ def main():
        else:
            result_file = f"{username}.txt"
-        if not args.no_txt:
+        if args.output_txt:
            with open(result_file, "w", encoding="utf-8") as file:
                exists_counter = 0
                for website_name in results:
--- a/sherlock_project/sites.py
+++ b/sherlock_project/sites.py
@ -7,6 +7,10 @@ import json
 import requests
 import secrets
 MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
 EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
 class SiteInformation:
    def __init__(self, name, url_home, url_username_format, username_claimed,
                information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
@ -67,12 +71,17 @@ class SiteInformation:
        Return Value:
        Nicely formatted string to get information about this object.
        """
-        
+
        return f"{self.name} ({self.url_home})"
 class SitesInformation:
-    def __init__(self, data_file_path=None):
+    def __init__(
            self,
            data_file_path: str|None = None,
            honor_exclusions: bool = True,
            do_not_exclude: list[str] = [],
        ):
        """Create Sites Information Object.
        Contains information about all supported websites.
@ -110,7 +119,7 @@ class SitesInformation:
            # The default data file is the live data.json which is in the GitHub repo. The reason why we are using
            # this instead of the local one is so that the user has the most up-to-date data. This prevents
            # users from creating issue about false positives which has already been fixed or having outdated data
-            data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
+            data_file_path = MANIFEST_URL
        # Ensure that specified data file has correct extension.
        if not data_file_path.lower().endswith(".json"):
@ -120,7 +129,7 @@ class SitesInformation:
        if data_file_path.lower().startswith("http"):
            # Reference is to a URL.
            try:
-                response = requests.get(url=data_file_path)
+                response = requests.get(url=data_file_path, timeout=30)
            except Exception as error:
                raise FileNotFoundError(
                    f"Problem while attempting to access data file URL '{data_file_path}':  {error}"
@ -152,9 +161,31 @@ class SitesInformation:
                raise FileNotFoundError(f"Problem while attempting to access "
                                        f"data file '{data_file_path}'."
                                        )
-        
+
        site_data.pop('$schema', None)
        if honor_exclusions:
            try:
                response = requests.get(url=EXCLUSIONS_URL, timeout=10)
                if response.status_code == 200:
                    exclusions = response.text.splitlines()
                    exclusions = [exclusion.strip() for exclusion in exclusions]
                    for site in do_not_exclude:
                        if site in exclusions:
                            exclusions.remove(site)
                    for exclusion in exclusions:
                        try:
                            site_data.pop(exclusion, None)
                        except KeyError:
                            pass
            except Exception:
                # If there was any problem loading the exclusions, just continue without them
                print("Warning: Could not load exclusions, continuing without them.")
                honor_exclusions = False
        self.sites = {}
        # Add all site information from the json file to internal site list.
@ -194,7 +225,7 @@ class SitesInformation:
        for site in self.sites:
            if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
                continue
-            sites[site] = self.sites[site]  
+            sites[site] = self.sites[site]
        self.sites =  sites
    def site_name_list(self):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -4,6 +4,11 @@ import urllib
 import pytest
 from sherlock_project.sites import SitesInformation
 def fetch_local_manifest(honor_exclusions: bool = True) -> dict[str, dict[str, str]]:
    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"), honor_exclusions=honor_exclusions)
    sites_iterable: dict[str, dict[str, str]] = {site.name: site.information for site in sites_obj}
    return sites_iterable
@pytest.fixture()
 def sites_obj():
    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
@ -11,9 +16,7 @@ def sites_obj():
@pytest.fixture(scope="session")
 def sites_info():
-    sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json"))
+    yield fetch_local_manifest()
    sites_iterable = {site.name: site.information for site in sites_obj}
    yield sites_iterable
@pytest.fixture(scope="session")
 def remote_schema():
@ -21,3 +24,28 @@ def remote_schema():
    with urllib.request.urlopen(schema_url) as remoteschema:
        schemadat = json.load(remoteschema)
    yield schemadat
 def pytest_addoption(parser):
    parser.addoption(
        "--chunked-sites",
        action="store",
        default=None,
        help="For tests utilizing chunked sites, include only the (comma-separated) site(s) specified.",
    )
 def pytest_generate_tests(metafunc):
    if "chunked_sites" in metafunc.fixturenames:
        sites_info = fetch_local_manifest(honor_exclusions=False)
        # Ingest and apply site selections
        site_filter: str | None = metafunc.config.getoption("--chunked-sites")
        if site_filter:
            selected_sites: list[str] = [site.strip() for site in site_filter.split(",")]
            sites_info = {
                site: data for site, data in sites_info.items()
                if site in selected_sites
            }
        params = [{name: data} for name, data in sites_info.items()]
        ids = list(sites_info.keys())
        metafunc.parametrize("chunked_sites", params, ids=ids)
--- a/tests/test_manifest.py
+++ b/tests/test_manifest.py
@ -7,7 +7,7 @@ def test_validate_manifest_against_local_schema():
    """Ensures that the manifest matches the local schema, for situations where the schema is being changed."""
    json_relative: str = '../sherlock_project/resources/data.json'
    schema_relative: str = '../sherlock_project/resources/data.schema.json'
-    
+
    json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
    schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
--- a/tests/test_validate_targets.py
+++ b/tests/test_validate_targets.py
@ -0,0 +1,99 @@
 import pytest
 import re
 import rstr
 from sherlock_project.sherlock import sherlock
 from sherlock_project.notify import QueryNotify
 from sherlock_project.result import QueryResult, QueryStatus
 FALSE_POSITIVE_ATTEMPTS: int = 2    # Since the usernames are randomly generated, it's POSSIBLE that a real username can be hit
 FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND: int = 15  # If a pattern uses quantifiers such as `+` `*` or `{n,}`, limit the upper bound (0 to disable)
 FALSE_POSITIVE_DEFAULT_PATTERN: str = r'^[a-zA-Z0-9]{7,20}$'  # Used in absence of a regexCheck entry
 def set_pattern_upper_bound(pattern: str, upper_bound: int = FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND) -> str:
    """Set upper bound for regex patterns that use quantifiers such as `+` `*` or `{n,}`."""
    def replace_upper_bound(match: re.Match) -> str: # type: ignore
        lower_bound: int = int(match.group(1)) if match.group(1) else 0 # type: ignore
        upper_bound = upper_bound if lower_bound < upper_bound else lower_bound # type: ignore  # noqa: F823
        return f'{{{lower_bound},{upper_bound}}}'
    pattern = re.sub(r'(?<!\\)\{(\d+),\}', replace_upper_bound, pattern) # {n,} # type: ignore
    pattern = re.sub(r'(?<!\\)\+', f'{{1,{upper_bound}}}', pattern) # +
    pattern = re.sub(r'(?<!\\)\*', f'{{0,{upper_bound}}}', pattern) # *
    return pattern
 def false_positive_check(sites_info: dict[str, dict[str, str]], site: str, pattern: str) -> QueryStatus:
    """Check if a site is likely to produce false positives."""
    status: QueryStatus = QueryStatus.UNKNOWN
    for _ in range(FALSE_POSITIVE_ATTEMPTS):
        query_notify: QueryNotify = QueryNotify()
        username: str = rstr.xeger(pattern)
        result: QueryResult | str = sherlock(
            username=username,
            site_data=sites_info,
            query_notify=query_notify,
        )[site]['status']
        if not hasattr(result, 'status'):
            raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
        if type(result.status) is not QueryStatus: # type: ignore
            raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
        status = result.status # type: ignore
        if status in (QueryStatus.AVAILABLE, QueryStatus.WAF):
            return status
    return status
 def false_negative_check(sites_info: dict[str, dict[str, str]], site: str) -> QueryStatus:
    """Check if a site is likely to produce false negatives."""
    status: QueryStatus = QueryStatus.UNKNOWN
    query_notify: QueryNotify = QueryNotify()
    result: QueryResult | str = sherlock(
        username=sites_info[site]['username_claimed'],
        site_data=sites_info,
        query_notify=query_notify,
    )[site]['status']
    if not hasattr(result, 'status'):
            raise TypeError(f"Result for site {site} does not have 'status' attribute. Actual result: {result}")
    if type(result.status) is not QueryStatus: # type: ignore
        raise TypeError(f"Result status for site {site} is not of type QueryStatus. Actual type: {type(result.status)}") # type: ignore
    status = result.status # type: ignore
    return status
@pytest.mark.validate_targets
@pytest.mark.online
 class Test_All_Targets:
    @pytest.mark.validate_targets_fp
    def test_false_pos(self, chunked_sites: dict[str, dict[str, str]]):
        """Iterate through all sites in the manifest to discover possible false-positive inducting targets."""
        pattern: str
        for site in chunked_sites:
            try:
                pattern = chunked_sites[site]['regexCheck']
            except KeyError:
                pattern = FALSE_POSITIVE_DEFAULT_PATTERN
            if FALSE_POSITIVE_QUANTIFIER_UPPER_BOUND > 0:
                pattern = set_pattern_upper_bound(pattern)
            result: QueryStatus = false_positive_check(chunked_sites, site, pattern)
            assert result is QueryStatus.AVAILABLE, f"{site} produced false positive with pattern {pattern}, result was {result}"
    @pytest.mark.validate_targets_fn
    def test_false_neg(self, chunked_sites: dict[str, dict[str, str]]):
        """Iterate through all sites in the manifest to discover possible false-negative inducting targets."""
        for site in chunked_sites:
            result: QueryStatus = false_negative_check(chunked_sites, site)
            assert result is QueryStatus.CLAIMED, f"{site} produced false negative, result was {result}"
--- a/tox.ini
+++ b/tox.ini
@ -7,8 +7,6 @@ envlist =
    py312
    py311
    py310
    py39
    py38
 [testenv]
 description = Attempt to build and install the package
@ -16,6 +14,7 @@ deps =
    coverage
    jsonschema
    pytest
    rstr
 allowlist_externals = coverage
 commands =
    coverage run --source=sherlock_project --module pytest -v
@ -37,7 +36,7 @@ commands =
 [gh-actions]
 python =
    3.13: py313
    3.12: py312
    3.11: py311
    3.10: py310
    3.9: py39