Merge pull request #2601 from sherlock-project/feat/graceful-skip
feat: gracefully skip sites with invalid errorType
This commit is contained in:
commit
e44fe49c8f
|
|
@ -429,61 +429,58 @@ def sherlock(
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
|
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
|
||||||
# It should be impossible to ever get here...
|
error_context = f"Unknown error type '{error_type}' for {social_network}"
|
||||||
raise ValueError(
|
query_status = QueryStatus.UNKNOWN
|
||||||
f"Unknown Error Type '{error_type}' for "
|
else:
|
||||||
f"site '{social_network}'"
|
if "message" in error_type:
|
||||||
)
|
# error_flag True denotes no error found in the HTML
|
||||||
|
# error_flag False denotes error found in the HTML
|
||||||
if "message" in error_type:
|
error_flag = True
|
||||||
# error_flag True denotes no error found in the HTML
|
errors = net_info.get("errorMsg")
|
||||||
# error_flag False denotes error found in the HTML
|
# errors will hold the error message
|
||||||
error_flag = True
|
# it can be string or list
|
||||||
errors = net_info.get("errorMsg")
|
# by isinstance method we can detect that
|
||||||
# errors will hold the error message
|
# and handle the case for strings as normal procedure
|
||||||
# it can be string or list
|
# and if its list we can iterate the errors
|
||||||
# by isinstance method we can detect that
|
if isinstance(errors, str):
|
||||||
# and handle the case for strings as normal procedure
|
# Checks if the error message is in the HTML
|
||||||
# and if its list we can iterate the errors
|
# if error is present we will set flag to False
|
||||||
if isinstance(errors, str):
|
if errors in r.text:
|
||||||
# Checks if the error message is in the HTML
|
|
||||||
# if error is present we will set flag to False
|
|
||||||
if errors in r.text:
|
|
||||||
error_flag = False
|
|
||||||
else:
|
|
||||||
# If it's list, it will iterate all the error message
|
|
||||||
for error in errors:
|
|
||||||
if error in r.text:
|
|
||||||
error_flag = False
|
error_flag = False
|
||||||
break
|
else:
|
||||||
if error_flag:
|
# If it's list, it will iterate all the error message
|
||||||
|
for error in errors:
|
||||||
|
if error in r.text:
|
||||||
|
error_flag = False
|
||||||
|
break
|
||||||
|
if error_flag:
|
||||||
|
query_status = QueryStatus.CLAIMED
|
||||||
|
else:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
|
||||||
|
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||||
|
error_codes = net_info.get("errorCode")
|
||||||
query_status = QueryStatus.CLAIMED
|
query_status = QueryStatus.CLAIMED
|
||||||
else:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
|
|
||||||
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
|
# Type consistency, allowing for both singlets and lists in manifest
|
||||||
error_codes = net_info.get("errorCode")
|
if isinstance(error_codes, int):
|
||||||
query_status = QueryStatus.CLAIMED
|
error_codes = [error_codes]
|
||||||
|
|
||||||
# Type consistency, allowing for both singlets and lists in manifest
|
if error_codes is not None and r.status_code in error_codes:
|
||||||
if isinstance(error_codes, int):
|
query_status = QueryStatus.AVAILABLE
|
||||||
error_codes = [error_codes]
|
elif r.status_code >= 300 or r.status_code < 200:
|
||||||
|
query_status = QueryStatus.AVAILABLE
|
||||||
|
|
||||||
if error_codes is not None and r.status_code in error_codes:
|
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
|
||||||
query_status = QueryStatus.AVAILABLE
|
# For this detection method, we have turned off the redirect.
|
||||||
elif r.status_code >= 300 or r.status_code < 200:
|
# So, there is no need to check the response URL: it will always
|
||||||
query_status = QueryStatus.AVAILABLE
|
# match the request. Instead, we will ensure that the response
|
||||||
|
# code indicates that the request was successful (i.e. no 404, or
|
||||||
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
|
# forward to some odd redirect).
|
||||||
# For this detection method, we have turned off the redirect.
|
if 200 <= r.status_code < 300:
|
||||||
# So, there is no need to check the response URL: it will always
|
query_status = QueryStatus.CLAIMED
|
||||||
# match the request. Instead, we will ensure that the response
|
else:
|
||||||
# code indicates that the request was successful (i.e. no 404, or
|
query_status = QueryStatus.AVAILABLE
|
||||||
# forward to some odd redirect).
|
|
||||||
if 200 <= r.status_code < 300:
|
|
||||||
query_status = QueryStatus.CLAIMED
|
|
||||||
else:
|
|
||||||
query_status = QueryStatus.AVAILABLE
|
|
||||||
|
|
||||||
if dump_response:
|
if dump_response:
|
||||||
print("+++++++++++++++++++++")
|
print("+++++++++++++++++++++")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue