Merge pull request #2601 from sherlock-project/feat/graceful-skip

feat: gracefully skip sites with invalid errorType
This commit is contained in:
Paul Pfeister 2025-10-04 20:23:07 -04:00 committed by GitHub
commit e44fe49c8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 47 additions and 50 deletions

View File

@ -429,61 +429,58 @@ def sherlock(
else: else:
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
# It should be impossible to ever get here... error_context = f"Unknown error type '{error_type}' for {social_network}"
raise ValueError( query_status = QueryStatus.UNKNOWN
f"Unknown Error Type '{error_type}' for " else:
f"site '{social_network}'" if "message" in error_type:
) # error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
if "message" in error_type: error_flag = True
# error_flag True denotes no error found in the HTML errors = net_info.get("errorMsg")
# error_flag False denotes error found in the HTML # errors will hold the error message
error_flag = True # it can be string or list
errors = net_info.get("errorMsg") # by isinstance method we can detect that
# errors will hold the error message # and handle the case for strings as normal procedure
# it can be string or list # and if its list we can iterate the errors
# by isinstance method we can detect that if isinstance(errors, str):
# and handle the case for strings as normal procedure # Checks if the error message is in the HTML
# and if its list we can iterate the errors # if error is present we will set flag to False
if isinstance(errors, str): if errors in r.text:
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False error_flag = False
break else:
if error_flag: # If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
error_codes = net_info.get("errorCode")
query_status = QueryStatus.CLAIMED query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: # Type consistency, allowing for both singlets and lists in manifest
error_codes = net_info.get("errorCode") if isinstance(error_codes, int):
query_status = QueryStatus.CLAIMED error_codes = [error_codes]
# Type consistency, allowing for both singlets and lists in manifest if error_codes is not None and r.status_code in error_codes:
if isinstance(error_codes, int): query_status = QueryStatus.AVAILABLE
error_codes = [error_codes] elif r.status_code >= 300 or r.status_code < 200:
query_status = QueryStatus.AVAILABLE
if error_codes is not None and r.status_code in error_codes: if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
query_status = QueryStatus.AVAILABLE # For this detection method, we have turned off the redirect.
elif r.status_code >= 300 or r.status_code < 200: # So, there is no need to check the response URL: it will always
query_status = QueryStatus.AVAILABLE # match the request. Instead, we will ensure that the response
# code indicates that the request was successful (i.e. no 404, or
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: # forward to some odd redirect).
# For this detection method, we have turned off the redirect. if 200 <= r.status_code < 300:
# So, there is no need to check the response URL: it will always query_status = QueryStatus.CLAIMED
# match the request. Instead, we will ensure that the response else:
# code indicates that the request was successful (i.e. no 404, or query_status = QueryStatus.AVAILABLE
# forward to some odd redirect).
if 200 <= r.status_code < 300:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if dump_response: if dump_response:
print("+++++++++++++++++++++") print("+++++++++++++++++++++")