diff --git a/sherlock_project/resources/data.schema.json b/sherlock_project/resources/data.schema.json index 216ffb62..c717cb25 100644 --- a/sherlock_project/resources/data.schema.json +++ b/sherlock_project/resources/data.schema.json @@ -1,80 +1,149 @@ { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "Sherlock Target Manifest", - "description": "Social media targets to probe for the existence of known usernames", - "type": "object", - "properties": { - "$schema": { "type": "string" } - }, - "patternProperties": { - "^(?!\\$).*?$": { - "type": "object", - "description": "Target name and associated information (key should be human readable name)", - "required": [ "url", "urlMain", "errorType", "username_claimed" ], - "properties": { - "url": { "type": "string" }, - "urlMain": { "type": "string" }, - "urlProbe": { "type": "string" }, - "username_claimed": { "type": "string" }, - "regexCheck": { "type": "string" }, - "isNSFW": { "type": "boolean" }, - "headers": { "type": "object" }, - "request_payload": { "type": "object" }, - "__comment__": { - "type": "string", - "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." - }, - "tags": { - "oneOf": [ - { "$ref": "#/$defs/tag" }, - { "type": "array", "items": { "$ref": "#/$defs/tag" } } - ] - }, - "request_method": { - "type": "string", - "enum": [ "GET", "POST", "HEAD", "PUT" ] - }, + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Sherlock Target Manifest", + "description": "Social media targets to probe for the existence of known usernames", + "type": "object", + "properties": { + "$schema": { "type": "string" } + }, + "patternProperties": { + "^(?!\\$).*?$": { + "type": "object", + "description": "Target name and associated information (key should be human readable name)", + "required": ["url", "urlMain", "errorType", "username_claimed"], + "properties": { + "url": { "type": "string" }, + "urlMain": { "type": "string" }, + "urlProbe": { "type": "string" }, + "username_claimed": { "type": "string" }, + "regexCheck": { "type": "string" }, + "isNSFW": { "type": "boolean" }, + "headers": { "type": "object" }, + "request_payload": { "type": "object" }, + "__comment__": { + "type": "string", + "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." + }, + "tags": { + "oneOf": [ + { "$ref": "#/$defs/tag" }, + { "type": "array", "items": { "$ref": "#/$defs/tag" } } + ] + }, + "request_method": { + "type": "string", + "enum": ["GET", "POST", "HEAD", "PUT"] + }, + "errorType": { + "oneOf": [ + { + "type": "string", + "enum": ["message", "response_url", "status_code"] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": ["message", "response_url", "status_code"] + } + } + ] + }, + "errorMsg": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "errorCode": { + "oneOf": [ + { "type": "integer" }, + { "type": "array", "items": { "type": "integer" } } + ] + }, + "errorUrl": { "type": "string" }, + "response_url": { "type": "string" } + }, + "dependencies": { + "errorMsg": { + "oneOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { "errorType": { - "type": "string", - "enum": [ "message", "response_url", "status_code" ] - }, - "errorMsg": { - "oneOf": [ - { "type": "string" }, - { "type": "array", "items": { "type": "string" } } - ] - }, - "errorCode": { - "oneOf": [ - { "type": "integer" }, - { "type": "array", "items": { "type": "integer" } } - ] - }, - "errorUrl": { "type": "string" }, - "response_url": { "type": "string" } - }, - "dependencies": { - "errorMsg": { - "properties" : { "errorType": { "const": "message" } } - }, - "errorUrl": { - "properties": { "errorType": { "const": "response_url" } } - }, - "errorCode": { - "properties": { "errorType": { "const": "status_code" } } + "type": "array", + "contains": { "const": "message" } } - }, - "if": { "properties": { "errorType": { "const": "message" } } }, - "then": { "required": [ "errorMsg" ] }, - "else": { - "if": { "properties": { "errorType": { "const": "response_url" } } }, - "then": { "required": [ "errorUrl" ] } - }, - "additionalProperties": false + } + } + ] + }, + "errorUrl": { + "oneOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } + } + } + ] + }, + "errorCode": { + "oneOf": [ + { "properties": { "errorType": { "const": "status_code" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "status_code" } + } + } + } + ] } - }, - "additionalProperties": false, - "$defs": { - "tag": { "type": "string", "enum": [ "adult", "gaming" ] } + }, + "allOf": [ + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "message" } + } + } + } + ] + }, + "then": { "required": ["errorMsg"] } + }, + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } + } + } + ] + }, + "then": { "required": ["errorUrl"] } + } + ], + "additionalProperties": false } + }, + "additionalProperties": false, + "$defs": { + "tag": { "type": "string", "enum": ["adult", "gaming"] } + } } diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 250175a5..a776d8c3 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -381,6 +381,8 @@ def sherlock( # Get the expected error type error_type = net_info["errorType"] + if isinstance(error_type, str): + error_type: list[str] = [error_type] # Retrieve future and ensure it has finished future = net_info["request_future"] @@ -425,58 +427,63 @@ def sherlock( elif any(hitMsg in r.text for hitMsg in WAFHitMsgs): query_status = QueryStatus.WAF - elif error_type == "message": - # error_flag True denotes no error found in the HTML - # error_flag False denotes error found in the HTML - error_flag = True - errors = net_info.get("errorMsg") - # errors will hold the error message - # it can be string or list - # by isinstance method we can detect that - # and handle the case for strings as normal procedure - # and if its list we can iterate the errors - if isinstance(errors, str): - # Checks if the error message is in the HTML - # if error is present we will set flag to False - if errors in r.text: - error_flag = False - else: - # If it's list, it will iterate all the error message - for error in errors: - if error in r.text: - error_flag = False - break - if error_flag: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE - elif error_type == "status_code": - error_codes = net_info.get("errorCode") - query_status = QueryStatus.CLAIMED - - # Type consistency, allowing for both singlets and lists in manifest - if isinstance(error_codes, int): - error_codes = [error_codes] - - if error_codes is not None and r.status_code in error_codes: - query_status = QueryStatus.AVAILABLE - elif r.status_code >= 300 or r.status_code < 200: - query_status = QueryStatus.AVAILABLE - elif error_type == "response_url": - # For this detection method, we have turned off the redirect. - # So, there is no need to check the response URL: it will always - # match the request. Instead, we will ensure that the response - # code indicates that the request was successful (i.e. no 404, or - # forward to some odd redirect). - if 200 <= r.status_code < 300: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE else: - # It should be impossible to ever get here... - raise ValueError( - f"Unknown Error Type '{error_type}' for " f"site '{social_network}'" - ) + if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): + # It should be impossible to ever get here... + raise ValueError( + f"Unknown Error Type '{error_type}' for " + f"site '{social_network}'" + ) + + if "message" in error_type: + # error_flag True denotes no error found in the HTML + # error_flag False denotes error found in the HTML + error_flag = True + errors = net_info.get("errorMsg") + # errors will hold the error message + # it can be string or list + # by isinstance method we can detect that + # and handle the case for strings as normal procedure + # and if its list we can iterate the errors + if isinstance(errors, str): + # Checks if the error message is in the HTML + # if error is present we will set flag to False + if errors in r.text: + error_flag = False + else: + # If it's list, it will iterate all the error message + for error in errors: + if error in r.text: + error_flag = False + break + if error_flag: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE + + if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: + error_codes = net_info.get("errorCode") + query_status = QueryStatus.CLAIMED + + # Type consistency, allowing for both singlets and lists in manifest + if isinstance(error_codes, int): + error_codes = [error_codes] + + if error_codes is not None and r.status_code in error_codes: + query_status = QueryStatus.AVAILABLE + elif r.status_code >= 300 or r.status_code < 200: + query_status = QueryStatus.AVAILABLE + + if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: + # For this detection method, we have turned off the redirect. + # So, there is no need to check the response URL: it will always + # match the request. Instead, we will ensure that the response + # code indicates that the request was successful (i.e. no 404, or + # forward to some odd redirect). + if 200 <= r.status_code < 300: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE if dump_response: print("+++++++++++++++++++++")