feat: honor automatic exclusions list
This commit is contained in:
parent
6d15f1319e
commit
8882310450
|
|
@ -76,5 +76,5 @@ jobs:
|
||||||
|
|
||||||
git add false_positive_exclusions.txt
|
git add false_positive_exclusions.txt
|
||||||
|
|
||||||
git commit -m "auto: Update exclusions list" || echo "No changes to commit"
|
git commit -m "auto: update exclusions list" || echo "No changes to commit"
|
||||||
git push origin exclusions
|
git push origin exclusions
|
||||||
|
|
|
||||||
|
|
@ -727,6 +727,14 @@ def main():
|
||||||
help="Disable creation of a txt file",
|
help="Disable creation of a txt file",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--ignore-exclusions",
|
||||||
|
action="store_true",
|
||||||
|
dest="ignore_exclusions",
|
||||||
|
default=False,
|
||||||
|
help="Ignore upstream exclusions (may return more false positives)",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# If the user presses CTRL-C, exit gracefully without throwing errors
|
# If the user presses CTRL-C, exit gracefully without throwing errors
|
||||||
|
|
@ -784,7 +792,8 @@ def main():
|
||||||
try:
|
try:
|
||||||
if args.local:
|
if args.local:
|
||||||
sites = SitesInformation(
|
sites = SitesInformation(
|
||||||
os.path.join(os.path.dirname(__file__), "resources/data.json")
|
os.path.join(os.path.dirname(__file__), "resources/data.json"),
|
||||||
|
honor_exclusions=False,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
json_file_location = args.json_file
|
json_file_location = args.json_file
|
||||||
|
|
@ -804,7 +813,11 @@ def main():
|
||||||
head_commit_sha = pull_request_json["head"]["sha"]
|
head_commit_sha = pull_request_json["head"]["sha"]
|
||||||
json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
|
json_file_location = f"https://raw.githubusercontent.com/sherlock-project/sherlock/{head_commit_sha}/sherlock_project/resources/data.json"
|
||||||
|
|
||||||
sites = SitesInformation(json_file_location)
|
sites = SitesInformation(
|
||||||
|
data_file_path=json_file_location,
|
||||||
|
honor_exclusions=not args.ignore_exclusions,
|
||||||
|
do_not_exclude=args.site_list,
|
||||||
|
)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
print(f"ERROR: {error}")
|
print(f"ERROR: {error}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,10 @@ import json
|
||||||
import requests
|
import requests
|
||||||
import secrets
|
import secrets
|
||||||
|
|
||||||
|
|
||||||
|
MANIFEST_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
|
||||||
|
EXCLUSIONS_URL = "https://raw.githubusercontent.com/sherlock-project/sherlock/refs/heads/exclusions/false_positive_exclusions.txt"
|
||||||
|
|
||||||
class SiteInformation:
|
class SiteInformation:
|
||||||
def __init__(self, name, url_home, url_username_format, username_claimed,
|
def __init__(self, name, url_home, url_username_format, username_claimed,
|
||||||
information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
|
information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)):
|
||||||
|
|
@ -67,12 +71,17 @@ class SiteInformation:
|
||||||
Return Value:
|
Return Value:
|
||||||
Nicely formatted string to get information about this object.
|
Nicely formatted string to get information about this object.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return f"{self.name} ({self.url_home})"
|
return f"{self.name} ({self.url_home})"
|
||||||
|
|
||||||
|
|
||||||
class SitesInformation:
|
class SitesInformation:
|
||||||
def __init__(self, data_file_path=None):
|
def __init__(
|
||||||
|
self,
|
||||||
|
data_file_path: str|None = None,
|
||||||
|
honor_exclusions: bool = True,
|
||||||
|
do_not_exclude: list[str] = [],
|
||||||
|
):
|
||||||
"""Create Sites Information Object.
|
"""Create Sites Information Object.
|
||||||
|
|
||||||
Contains information about all supported websites.
|
Contains information about all supported websites.
|
||||||
|
|
@ -110,7 +119,7 @@ class SitesInformation:
|
||||||
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using
|
# The default data file is the live data.json which is in the GitHub repo. The reason why we are using
|
||||||
# this instead of the local one is so that the user has the most up-to-date data. This prevents
|
# this instead of the local one is so that the user has the most up-to-date data. This prevents
|
||||||
# users from creating issue about false positives which has already been fixed or having outdated data
|
# users from creating issue about false positives which has already been fixed or having outdated data
|
||||||
data_file_path = "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.json"
|
data_file_path = MANIFEST_URL
|
||||||
|
|
||||||
# Ensure that specified data file has correct extension.
|
# Ensure that specified data file has correct extension.
|
||||||
if not data_file_path.lower().endswith(".json"):
|
if not data_file_path.lower().endswith(".json"):
|
||||||
|
|
@ -152,9 +161,31 @@ class SitesInformation:
|
||||||
raise FileNotFoundError(f"Problem while attempting to access "
|
raise FileNotFoundError(f"Problem while attempting to access "
|
||||||
f"data file '{data_file_path}'."
|
f"data file '{data_file_path}'."
|
||||||
)
|
)
|
||||||
|
|
||||||
site_data.pop('$schema', None)
|
site_data.pop('$schema', None)
|
||||||
|
|
||||||
|
if honor_exclusions:
|
||||||
|
try:
|
||||||
|
response = requests.get(url=EXCLUSIONS_URL)
|
||||||
|
if response.status_code == 200:
|
||||||
|
exclusions = response.text.splitlines()
|
||||||
|
exclusions = [exclusion.strip() for exclusion in exclusions]
|
||||||
|
|
||||||
|
for site in do_not_exclude:
|
||||||
|
if site in exclusions:
|
||||||
|
exclusions.remove(site)
|
||||||
|
|
||||||
|
for exclusion in exclusions:
|
||||||
|
try:
|
||||||
|
site_data.pop(exclusion, None)
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
# If there was any problem loading the exclusions, just continue without them
|
||||||
|
print("Warning: Could not load exclusions, continuing without them.")
|
||||||
|
honor_exclusions = False
|
||||||
|
|
||||||
self.sites = {}
|
self.sites = {}
|
||||||
|
|
||||||
# Add all site information from the json file to internal site list.
|
# Add all site information from the json file to internal site list.
|
||||||
|
|
@ -194,7 +225,7 @@ class SitesInformation:
|
||||||
for site in self.sites:
|
for site in self.sites:
|
||||||
if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
|
if self.sites[site].is_nsfw and site.casefold() not in do_not_remove:
|
||||||
continue
|
continue
|
||||||
sites[site] = self.sites[site]
|
sites[site] = self.sites[site]
|
||||||
self.sites = sites
|
self.sites = sites
|
||||||
|
|
||||||
def site_name_list(self):
|
def site_name_list(self):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue