diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index de024090..bb244511 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -17,29 +17,40 @@ jobs: - name: Checkout repository uses: actions/checkout@v5 with: + # Checkout the base branch but fetch all history to avoid a second fetch call ref: ${{ github.base_ref }} - fetch-depth: 1 + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v6 with: - python-version: '3.13' + python-version: "3.13" - name: Install Poetry uses: abatilo/actions-poetry@v4 with: - poetry-version: 'latest' + poetry-version: "latest" - name: Install dependencies run: | poetry install --no-interaction --with dev - - name: Drop in place updated manifest from base + - name: Prepare JSON versions for comparison run: | - cp sherlock_project/resources/data.json data.json.base - git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1 - git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json - cp sherlock_project/resources/data.json data.json.head + # Fetch only the PR's branch head (single network call in this step) + git fetch origin pull/${{ github.event.pull_request.number }}/head:pr + + # Find the merge-base commit between the target branch and the PR branch + MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr) + echo "Comparing PR head against merge-base commit: $MERGE_BASE" + + # Safely extract the file from the PR's head and the merge-base commit + git show pr:sherlock_project/resources/data.json > data.json.head + git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base + + # CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR + # This ensures that pytest runs against the new, updated file. + cp data.json.head sherlock_project/resources/data.json - name: Discover modified targets id: discover-modified @@ -47,8 +58,16 @@ jobs: CHANGED=$( python - <<'EOF' import json - with open("data.json.base") as f: base = json.load(f) - with open("data.json.head") as f: head = json.load(f) + import sys + try: + with open("data.json.base") as f: base = json.load(f) + with open("data.json.head") as f: head = json.load(f) + except FileNotFoundError as e: + print(f"Error: Could not find {e.filename}", file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr) + sys.exit(1) changed = [] for k, v in head.items(): @@ -63,6 +82,8 @@ jobs: echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')" echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT" + # --- The rest of the steps below are unchanged --- + - name: Validate modified targets if: steps.discover-modified.outputs.changed_targets != '' continue-on-error: true diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index a117740c..9d5c65fd 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -259,7 +259,8 @@ "username_claimed": "blue" }, "Blitz Tactics": { - "errorType": "status_code", + "errorMsg": "That page doesn't exist", + "errorType": "message", "url": "https://blitztactics.com/{}", "urlMain": "https://blitztactics.com/", "username_claimed": "Lance5500" @@ -278,14 +279,7 @@ "urlMain": "https://bsky.app/", "username_claimed": "mcuban" }, - "BoardGameGeek": { - "errorType": "message", - "regexCheck": "^[a-zA-Z0-9_]*$", - "errorMsg": "User not found", - "url": "https://boardgamegeek.com/user/{}", - "urlMain": "https://boardgamegeek.com", - "username_claimed": "blue" - }, + "BongaCams": { "errorType": "status_code", "isNSFW": true, @@ -299,6 +293,14 @@ "urlMain": "https://www.bookcrossing.com/", "username_claimed": "blue" }, + "BoardGameGeek": { + "errorMsg": "\"isValid\":true", + "errorType": "message", + "url": "https://boardgamegeek.com/user/{}", + "urlMain": "https://boardgamegeek.com/", + "urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}", + "username_claimed": "blue" + }, "BraveCommunity": { "errorType": "status_code", "url": "https://community.brave.com/u/{}/", @@ -505,6 +507,15 @@ "urlMain": "https://coderwall.com", "username_claimed": "hacker" }, + "Code Sandbox": { + "errorType": "message", + "errorMsg": "Could not find user with username", + "regexCheck": "^[a-zA-Z0-9_-]{3,30}$", + "url": "https://codesandbox.io/u/{}", + "urlProbe": "https://codesandbox.io/api/v1/users/{}", + "urlMain": "https://codesandbox.io", + "username_claimed": "icyjoseph" + }, "Codewars": { "errorType": "status_code", "url": "https://www.codewars.com/users/{}", @@ -537,6 +548,13 @@ "urlMain": "https://coroflot.com/", "username_claimed": "blue" }, + "Cplusplus": { + "errorType": "message", + "errorMsg": "404 Page Not Found", + "url": "https://cplusplus.com/user/{}", + "urlMain": "https://cplusplus.com", + "username_claimed": "mbozzi" + }, "Cracked": { "errorType": "response_url", "errorUrl": "https://www.cracked.com/", @@ -683,7 +701,6 @@ "Duolingo": { "errorMsg": "{\"users\":[]}", "errorType": "message", - "url": "https://www.duolingo.com/profile/{}", "urlMain": "https://duolingo.com/", "urlProbe": "https://www.duolingo.com/2017-06-30/users?username={}", @@ -1056,6 +1073,13 @@ "urlMain": "https://www.heavy-r.com/", "username_claimed": "kilroy222" }, + "Hive Blog": { + "errorMsg": "User Not Found - Hive", + "errorType": "message", + "url": "https://hive.blog/@{}", + "urlMain": "https://hive.blog/", + "username_claimed": "mango-juice" + }, "Holopin": { "errorMsg": "true", "errorType": "message", @@ -1217,6 +1241,13 @@ "urlMain": "https://discourse.joplinapp.org/", "username_claimed": "laurent" }, + "Jupyter Community Forum": { + "errorMsg": "Oops! That page doesn’t exist or is private.", + "errorType": "message", + "url": "https://discourse.jupyter.org/u/{}/summary", + "urlMain": "https://discourse.jupyter.org", + "username_claimed": "choldgraf" + }, "Kaggle": { "errorType": "status_code", "url": "https://www.kaggle.com/{}", @@ -1739,6 +1770,12 @@ "urlMain": "http://promodj.com/", "username_claimed": "blue" }, + "Pronouns.page": { + "errorType": "status_code", + "url": "https://pronouns.page/@{}", + "urlMain": "https://pronouns.page/", + "username_claimed": "andrea" + }, "PyPi": { "errorType": "status_code", "url": "https://pypi.org/user/{}", @@ -1746,6 +1783,13 @@ "urlMain": "https://pypi.org", "username_claimed": "Blue" }, + "Python.org Discussions": { + "errorMsg": "Oops! That page doesn’t exist or is private.", + "errorType": "message", + "url": "https://discuss.python.org/u/{}/summary", + "urlMain": "https://discuss.python.org", + "username_claimed": "pablogsal" + }, "Rajce.net": { "errorType": "status_code", "regexCheck": "^[\\w@-]+?$", @@ -1841,6 +1885,13 @@ "urlMain": "https://royalcams.com", "username_claimed": "asuna-black" }, + "Ruby Forums": { + "errorMsg": "Oops! That page doesn’t exist or is private.", + "errorType": "message", + "url": "https://ruby-forum.com/u/{}/summary", + "urlMain": "https://ruby-forums.com", + "username_claimed": "rishard" + }, "RubyGems": { "errorType": "status_code", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]{1,40}", @@ -2041,7 +2092,6 @@ }, "Spotify": { "errorType": "status_code", - "url": "https://open.spotify.com/user/{}", "urlMain": "https://open.spotify.com/", "username_claimed": "blue" @@ -2246,6 +2296,13 @@ "urlMain": "https://untappd.com/", "username_claimed": "untappd" }, + "Valorant Forums": { + "errorMsg": "The page you requested could not be found.", + "errorType": "message", + "url": "https://valorantforums.com/u/{}", + "urlMain": "https://valorantforums.com", + "username_claimed": "Wolves" + }, "VK": { "errorType": "response_url", "errorUrl": "https://www.quora.com/profile/{}", @@ -2320,6 +2377,12 @@ "urlMain": "https://discourse.wicg.io/", "username_claimed": "stefano" }, + "Wakatime": { + "errorType": "status_code", + "url": "https://wakatime.com/@{}", + "urlMain": "https://wakatime.com/", + "username_claimed": "blue" + }, "Warrior Forum": { "errorType": "status_code", "url": "https://www.warriorforum.com/members/{}.html", @@ -2464,7 +2527,6 @@ }, "YouTube": { "errorType": "status_code", - "url": "https://www.youtube.com/@{}", "urlMain": "https://www.youtube.com/", "username_claimed": "youtube"