fixed #2, added new method for response_url and gitignore
This commit is contained in:
parent
f0b4365f7c
commit
ad23efc785
|
|
@ -0,0 +1,3 @@
|
|||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
.ipynb
|
||||
64
data.json
64
data.json
|
|
@ -11,8 +11,7 @@
|
|||
},
|
||||
"Facebook": {
|
||||
"url": "https://www.facebook.com/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "not found"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"YouTube": {
|
||||
"url": "https://www.youtube.com/{}",
|
||||
|
|
@ -35,12 +34,11 @@
|
|||
"Pinterest": {
|
||||
"url": "https://www.pinterest.com/{}",
|
||||
"errorType": "response_url",
|
||||
"errorMsgInUrl": "?show_error"
|
||||
"errorUrl": "https://www.pinterest.com/?show_error=true"
|
||||
},
|
||||
"GitHub": {
|
||||
"url": "https://www.github.com/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "404 Not Found"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Steam": {
|
||||
"url": "https://steamcommunity.com/id/{}",
|
||||
|
|
@ -54,18 +52,11 @@
|
|||
},
|
||||
"SoundCloud": {
|
||||
"url": "https://soundcloud.com/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "404 Not Found"
|
||||
},
|
||||
"Tumblr": {
|
||||
"url": "https://{}.tumblr.com",
|
||||
"errorType": "message",
|
||||
"errorMsg": " There's nothing here"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Disqus": {
|
||||
"url": "https://disqus.com/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "404 NOT FOUND"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Medium": {
|
||||
"url": "https://medium.com/@{}",
|
||||
|
|
@ -89,7 +80,8 @@
|
|||
},
|
||||
"Flipboard": {
|
||||
"url": "https://flipboard.com/@{}",
|
||||
"errorType": "status_code"
|
||||
"errorType": "message",
|
||||
"errorMsg": "loading"
|
||||
},
|
||||
"SlideShare": {
|
||||
"url": "https://slideshare.net/{}",
|
||||
|
|
@ -128,13 +120,12 @@
|
|||
},
|
||||
"Gravatar": {
|
||||
"url": "http://en.gravatar.com/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "We’re sorry, we couldn't find that profile"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"iMGSRC.RU": {
|
||||
"url": "https://imgsrc.ru/main/user.php?user={}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "Rapidly growing community of over a million users, dedicated to sharing."
|
||||
"errorType": "response_url",
|
||||
"errorUrl": "https://imgsrc.ru/"
|
||||
},
|
||||
"DailyMotion": {
|
||||
"url": "https://www.dailymotion.com/{}",
|
||||
|
|
@ -147,8 +138,7 @@
|
|||
},
|
||||
"CashMe": {
|
||||
"url": "https://cash.me/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "Oh no"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Behance": {
|
||||
"url": "https://www.behance.net/{}",
|
||||
|
|
@ -157,8 +147,7 @@
|
|||
},
|
||||
"GoodReads": {
|
||||
"url": "https://www.goodreads.com/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "Sorry you lost your way."
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Instructables": {
|
||||
"url": "https://www.instructables.com/member/{}",
|
||||
|
|
@ -167,8 +156,7 @@
|
|||
},
|
||||
"Keybase": {
|
||||
"url": "https://keybase.io/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "Sorry, what you are looking for...it does not exist."
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Kongregate": {
|
||||
"url": "https://www.kongregate.com/accounts/{}",
|
||||
|
|
@ -182,8 +170,7 @@
|
|||
},
|
||||
"VSCO": {
|
||||
"url": "https://vsco.co/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "This page does not exist"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"AngelList": {
|
||||
"url": "https://angel.co/{}",
|
||||
|
|
@ -208,12 +195,11 @@
|
|||
"Pastebin": {
|
||||
"url": "https://pastebin.com/u/{}",
|
||||
"errorType": "response_url",
|
||||
"errorMsgInUrl": "index"
|
||||
"errorUrl": "https://pastebin.com/index"
|
||||
},
|
||||
"Foursquare": {
|
||||
"url": "https://foursquare.com/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "We couldn't find the page you're looking for."
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Gumroad": {
|
||||
"url": "https://www.gumroad.com/{}",
|
||||
|
|
@ -222,8 +208,7 @@
|
|||
},
|
||||
"Newgrounds": {
|
||||
"url": "https://{}.newgrounds.com",
|
||||
"errorType": "message",
|
||||
"errorMsg": "ERROR — No user"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Wattpad": {
|
||||
"url": "https://www.wattpad.com/user/{}",
|
||||
|
|
@ -273,7 +258,7 @@
|
|||
"BLIP.fm": {
|
||||
"url": "https://blip.fm/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "404 Page Not Found"
|
||||
"errorMsg": "Page Not Found"
|
||||
},
|
||||
"HackerNews": {
|
||||
"url": "https://news.ycombinator.com/user?id={}",
|
||||
|
|
@ -317,8 +302,7 @@
|
|||
},
|
||||
"Slack": {
|
||||
"url": "https://{}.slack.com",
|
||||
"errorType": "message",
|
||||
"errorMsg": "There’s been a glitch…"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Trip": {
|
||||
"url": "https://www.trip.skyscanner.com/user/{}",
|
||||
|
|
@ -347,18 +331,16 @@
|
|||
},
|
||||
"Flickr": {
|
||||
"url": "https://www.flickr.com/people/{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "This is not the page you’re looking for"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"WordPress": {
|
||||
"url": "https://{}.wordpress.com",
|
||||
"errorType": "message",
|
||||
"errorMsg": "Do you want to register"
|
||||
"errorType": "response_url",
|
||||
"errorUrl": "wordpress.com/typo/?subdomain="
|
||||
},
|
||||
"Unsplash": {
|
||||
"url": "https://unsplash.com/@{}",
|
||||
"errorType": "message",
|
||||
"errorMsg": "Hm, the page you were looking for doesn't seem to exist anymore"
|
||||
"errorType": "status_code"
|
||||
},
|
||||
"Pexels": {
|
||||
"url": "https://www.pexels.com/@{}",
|
||||
|
|
|
|||
10
sherlock.py
10
sherlock.py
|
|
@ -2,6 +2,8 @@ import requests
|
|||
import json
|
||||
import os
|
||||
|
||||
# TODO: fix tumblr
|
||||
|
||||
def write_to_file(url, fname):
|
||||
with open(fname, "a") as f:
|
||||
f.write(url+"\n")
|
||||
|
|
@ -49,7 +51,7 @@ def main():
|
|||
|
||||
if error_type == "message":
|
||||
error = data.get(social_network).get("errorMsg")
|
||||
|
||||
# Checks if the error message is in the HTML
|
||||
if not error in r.text:
|
||||
print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url)
|
||||
write_to_file(url, fname)
|
||||
|
|
@ -58,7 +60,7 @@ def main():
|
|||
print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network))
|
||||
|
||||
elif error_type == "status_code":
|
||||
|
||||
# Checks if the status code of the repsonse is 404
|
||||
if not r.status_code == 404:
|
||||
print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url)
|
||||
write_to_file(url, fname)
|
||||
|
|
@ -67,8 +69,8 @@ def main():
|
|||
print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network))
|
||||
|
||||
elif error_type == "response_url":
|
||||
error = data.get(social_network).get("errorMsgInUrl")
|
||||
|
||||
error = data.get(social_network).get("errorUrl")
|
||||
# Checks if the redirect url is the same as the one defined in data.json
|
||||
if not error in r.url:
|
||||
print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url)
|
||||
write_to_file(url, fname)
|
||||
|
|
|
|||
Loading…
Reference in New Issue