diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..709c0480 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Jupyter Notebook +.ipynb_checkpoints +.ipynb diff --git a/data.json b/data.json index c64ebd02..a8d9d55e 100644 --- a/data.json +++ b/data.json @@ -11,8 +11,7 @@ }, "Facebook": { "url": "https://www.facebook.com/{}", - "errorType": "message", - "errorMsg": "not found" + "errorType": "status_code" }, "YouTube": { "url": "https://www.youtube.com/{}", @@ -35,12 +34,11 @@ "Pinterest": { "url": "https://www.pinterest.com/{}", "errorType": "response_url", - "errorMsgInUrl": "?show_error" + "errorUrl": "https://www.pinterest.com/?show_error=true" }, "GitHub": { "url": "https://www.github.com/{}", - "errorType": "message", - "errorMsg": "404 Not Found" + "errorType": "status_code" }, "Steam": { "url": "https://steamcommunity.com/id/{}", @@ -54,18 +52,11 @@ }, "SoundCloud": { "url": "https://soundcloud.com/{}", - "errorType": "message", - "errorMsg": "404 Not Found" - }, - "Tumblr": { - "url": "https://{}.tumblr.com", - "errorType": "message", - "errorMsg": " There's nothing here" + "errorType": "status_code" }, "Disqus": { "url": "https://disqus.com/{}", - "errorType": "message", - "errorMsg": "404 NOT FOUND" + "errorType": "status_code" }, "Medium": { "url": "https://medium.com/@{}", @@ -89,7 +80,8 @@ }, "Flipboard": { "url": "https://flipboard.com/@{}", - "errorType": "status_code" + "errorType": "message", + "errorMsg": "loading" }, "SlideShare": { "url": "https://slideshare.net/{}", @@ -128,13 +120,12 @@ }, "Gravatar": { "url": "http://en.gravatar.com/{}", - "errorType": "message", - "errorMsg": "We’re sorry, we couldn't find that profile" + "errorType": "status_code" }, "iMGSRC.RU": { "url": "https://imgsrc.ru/main/user.php?user={}", - "errorType": "message", - "errorMsg": "Rapidly growing community of over a million users, dedicated to sharing." + "errorType": "response_url", + "errorUrl": "https://imgsrc.ru/" }, "DailyMotion": { "url": "https://www.dailymotion.com/{}", @@ -147,8 +138,7 @@ }, "CashMe": { "url": "https://cash.me/{}", - "errorType": "message", - "errorMsg": "Oh no" + "errorType": "status_code" }, "Behance": { "url": "https://www.behance.net/{}", @@ -157,8 +147,7 @@ }, "GoodReads": { "url": "https://www.goodreads.com/{}", - "errorType": "message", - "errorMsg": "Sorry you lost your way." + "errorType": "status_code" }, "Instructables": { "url": "https://www.instructables.com/member/{}", @@ -167,8 +156,7 @@ }, "Keybase": { "url": "https://keybase.io/{}", - "errorType": "message", - "errorMsg": "Sorry, what you are looking for...it does not exist." + "errorType": "status_code" }, "Kongregate": { "url": "https://www.kongregate.com/accounts/{}", @@ -182,8 +170,7 @@ }, "VSCO": { "url": "https://vsco.co/{}", - "errorType": "message", - "errorMsg": "This page does not exist" + "errorType": "status_code" }, "AngelList": { "url": "https://angel.co/{}", @@ -208,12 +195,11 @@ "Pastebin": { "url": "https://pastebin.com/u/{}", "errorType": "response_url", - "errorMsgInUrl": "index" + "errorUrl": "https://pastebin.com/index" }, "Foursquare": { "url": "https://foursquare.com/{}", - "errorType": "message", - "errorMsg": "We couldn't find the page you're looking for." + "errorType": "status_code" }, "Gumroad": { "url": "https://www.gumroad.com/{}", @@ -222,8 +208,7 @@ }, "Newgrounds": { "url": "https://{}.newgrounds.com", - "errorType": "message", - "errorMsg": "ERROR — No user" + "errorType": "status_code" }, "Wattpad": { "url": "https://www.wattpad.com/user/{}", @@ -273,7 +258,7 @@ "BLIP.fm": { "url": "https://blip.fm/{}", "errorType": "message", - "errorMsg": "404 Page Not Found" + "errorMsg": "Page Not Found" }, "HackerNews": { "url": "https://news.ycombinator.com/user?id={}", @@ -317,8 +302,7 @@ }, "Slack": { "url": "https://{}.slack.com", - "errorType": "message", - "errorMsg": "There’s been a glitch…" + "errorType": "status_code" }, "Trip": { "url": "https://www.trip.skyscanner.com/user/{}", @@ -347,18 +331,16 @@ }, "Flickr": { "url": "https://www.flickr.com/people/{}", - "errorType": "message", - "errorMsg": "This is not the page you’re looking for" + "errorType": "status_code" }, "WordPress": { "url": "https://{}.wordpress.com", - "errorType": "message", - "errorMsg": "Do you want to register" + "errorType": "response_url", + "errorUrl": "wordpress.com/typo/?subdomain=" }, "Unsplash": { "url": "https://unsplash.com/@{}", - "errorType": "message", - "errorMsg": "Hm, the page you were looking for doesn't seem to exist anymore" + "errorType": "status_code" }, "Pexels": { "url": "https://www.pexels.com/@{}", diff --git a/sherlock.py b/sherlock.py index 79d8abec..92fca5fd 100644 --- a/sherlock.py +++ b/sherlock.py @@ -2,6 +2,8 @@ import requests import json import os +# TODO: fix tumblr + def write_to_file(url, fname): with open(fname, "a") as f: f.write(url+"\n") @@ -49,7 +51,7 @@ def main(): if error_type == "message": error = data.get(social_network).get("errorMsg") - + # Checks if the error message is in the HTML if not error in r.text: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) write_to_file(url, fname) @@ -58,7 +60,7 @@ def main(): print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) elif error_type == "status_code": - + # Checks if the status code of the repsonse is 404 if not r.status_code == 404: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) write_to_file(url, fname) @@ -67,8 +69,8 @@ def main(): print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network)) elif error_type == "response_url": - error = data.get(social_network).get("errorMsgInUrl") - + error = data.get(social_network).get("errorUrl") + # Checks if the redirect url is the same as the one defined in data.json if not error in r.url: print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url) write_to_file(url, fname)