feat: support multiple errorTypes

This commit is contained in:
Paul Pfeister 2025-10-04 16:53:30 -04:00
parent a678bed154
commit 0794e02b52
No known key found for this signature in database
GPG Key ID: 70D33A96CBD7A994
2 changed files with 201 additions and 125 deletions

View File

@ -1,80 +1,149 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Sherlock Target Manifest",
"description": "Social media targets to probe for the existence of known usernames",
"type": "object",
"properties": {
"$schema": { "type": "string" }
},
"patternProperties": {
"^(?!\\$).*?$": {
"type": "object",
"description": "Target name and associated information (key should be human readable name)",
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
"properties": {
"url": { "type": "string" },
"urlMain": { "type": "string" },
"urlProbe": { "type": "string" },
"username_claimed": { "type": "string" },
"regexCheck": { "type": "string" },
"isNSFW": { "type": "boolean" },
"headers": { "type": "object" },
"request_payload": { "type": "object" },
"__comment__": {
"type": "string",
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
},
"tags": {
"oneOf": [
{ "$ref": "#/$defs/tag" },
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
]
},
"request_method": {
"type": "string",
"enum": [ "GET", "POST", "HEAD", "PUT" ]
},
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Sherlock Target Manifest",
"description": "Social media targets to probe for the existence of known usernames",
"type": "object",
"properties": {
"$schema": { "type": "string" }
},
"patternProperties": {
"^(?!\\$).*?$": {
"type": "object",
"description": "Target name and associated information (key should be human readable name)",
"required": ["url", "urlMain", "errorType", "username_claimed"],
"properties": {
"url": { "type": "string" },
"urlMain": { "type": "string" },
"urlProbe": { "type": "string" },
"username_claimed": { "type": "string" },
"regexCheck": { "type": "string" },
"isNSFW": { "type": "boolean" },
"headers": { "type": "object" },
"request_payload": { "type": "object" },
"__comment__": {
"type": "string",
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
},
"tags": {
"oneOf": [
{ "$ref": "#/$defs/tag" },
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
]
},
"request_method": {
"type": "string",
"enum": ["GET", "POST", "HEAD", "PUT"]
},
"errorType": {
"oneOf": [
{
"type": "string",
"enum": ["message", "response_url", "status_code"]
},
{
"type": "array",
"items": {
"type": "string",
"enum": ["message", "response_url", "status_code"]
}
}
]
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
},
"dependencies": {
"errorMsg": {
"oneOf": [
{ "properties": { "errorType": { "const": "message" } } },
{
"properties": {
"errorType": {
"type": "string",
"enum": [ "message", "response_url", "status_code" ]
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
},
"dependencies": {
"errorMsg": {
"properties" : { "errorType": { "const": "message" } }
},
"errorUrl": {
"properties": { "errorType": { "const": "response_url" } }
},
"errorCode": {
"properties": { "errorType": { "const": "status_code" } }
"type": "array",
"contains": { "const": "message" }
}
},
"if": { "properties": { "errorType": { "const": "message" } } },
"then": { "required": [ "errorMsg" ] },
"else": {
"if": { "properties": { "errorType": { "const": "response_url" } } },
"then": { "required": [ "errorUrl" ] }
},
"additionalProperties": false
}
}
]
},
"errorUrl": {
"oneOf": [
{ "properties": { "errorType": { "const": "response_url" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "response_url" }
}
}
}
]
},
"errorCode": {
"oneOf": [
{ "properties": { "errorType": { "const": "status_code" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "status_code" }
}
}
}
]
}
},
"additionalProperties": false,
"$defs": {
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
},
"allOf": [
{
"if": {
"anyOf": [
{ "properties": { "errorType": { "const": "message" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "message" }
}
}
}
]
},
"then": { "required": ["errorMsg"] }
},
{
"if": {
"anyOf": [
{ "properties": { "errorType": { "const": "response_url" } } },
{
"properties": {
"errorType": {
"type": "array",
"contains": { "const": "response_url" }
}
}
}
]
},
"then": { "required": ["errorUrl"] }
}
],
"additionalProperties": false
}
},
"additionalProperties": false,
"$defs": {
"tag": { "type": "string", "enum": ["adult", "gaming"] }
}
}

View File

@ -381,6 +381,8 @@ def sherlock(
# Get the expected error type
error_type = net_info["errorType"]
if isinstance(error_type, str):
error_type: list[str] = [error_type]
# Retrieve future and ensure it has finished
future = net_info["request_future"]
@ -425,58 +427,63 @@ def sherlock(
elif any(hitMsg in r.text for hitMsg in WAFHitMsgs):
query_status = QueryStatus.WAF
elif error_type == "message":
# error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
elif error_type == "status_code":
error_codes = net_info.get("errorCode")
query_status = QueryStatus.CLAIMED
# Type consistency, allowing for both singlets and lists in manifest
if isinstance(error_codes, int):
error_codes = [error_codes]
if error_codes is not None and r.status_code in error_codes:
query_status = QueryStatus.AVAILABLE
elif r.status_code >= 300 or r.status_code < 200:
query_status = QueryStatus.AVAILABLE
elif error_type == "response_url":
# For this detection method, we have turned off the redirect.
# So, there is no need to check the response URL: it will always
# match the request. Instead, we will ensure that the response
# code indicates that the request was successful (i.e. no 404, or
# forward to some odd redirect).
if 200 <= r.status_code < 300:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
else:
# It should be impossible to ever get here...
raise ValueError(
f"Unknown Error Type '{error_type}' for " f"site '{social_network}'"
)
if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type):
# It should be impossible to ever get here...
raise ValueError(
f"Unknown Error Type '{error_type}' for "
f"site '{social_network}'"
)
if "message" in error_type:
# error_flag True denotes no error found in the HTML
# error_flag False denotes error found in the HTML
error_flag = True
errors = net_info.get("errorMsg")
# errors will hold the error message
# it can be string or list
# by isinstance method we can detect that
# and handle the case for strings as normal procedure
# and if its list we can iterate the errors
if isinstance(errors, str):
# Checks if the error message is in the HTML
# if error is present we will set flag to False
if errors in r.text:
error_flag = False
else:
# If it's list, it will iterate all the error message
for error in errors:
if error in r.text:
error_flag = False
break
if error_flag:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE:
error_codes = net_info.get("errorCode")
query_status = QueryStatus.CLAIMED
# Type consistency, allowing for both singlets and lists in manifest
if isinstance(error_codes, int):
error_codes = [error_codes]
if error_codes is not None and r.status_code in error_codes:
query_status = QueryStatus.AVAILABLE
elif r.status_code >= 300 or r.status_code < 200:
query_status = QueryStatus.AVAILABLE
if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE:
# For this detection method, we have turned off the redirect.
# So, there is no need to check the response URL: it will always
# match the request. Instead, we will ensure that the response
# code indicates that the request was successful (i.e. no 404, or
# forward to some odd redirect).
if 200 <= r.status_code < 300:
query_status = QueryStatus.CLAIMED
else:
query_status = QueryStatus.AVAILABLE
if dump_response:
print("+++++++++++++++++++++")