added requirements.txt and did some refactoring

This commit is contained in:
harismuneer 2019-06-29 17:48:26 +05:00 committed by Haris Muneer
parent 7fe4c959be
commit bc3d06b1ef
4 changed files with 34 additions and 23 deletions

View File

@ -1,2 +0,0 @@
email = "<YOUR FB EMAIL>"
password = "<YOUR FB PASSWORD>"

2
Code/credentials.txt Normal file
View File

@ -0,0 +1,2 @@
email = "03004599244"
password = "hondacity9400"

View File

@ -4,10 +4,6 @@ import platform
import sys
import urllib.request
import time
try:
import config
except ImportError:
raise RuntimeError("Please create config.py based on config-sample.py")
from selenium import webdriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException
@ -25,14 +21,14 @@ from selenium.webdriver.support.ui import WebDriverWait
driver = None
# whether to download photos or not
download_uploaded_photos = True
download_friends_photos = True
download_uploaded_photos = True
download_friends_photos = True
# whether to download the full image or its thumbnail (small size)
# if small size is True then it will be very quick else if its false then it will open each photo to download it
# and it will take much more time
friends_small_size = True
photos_small_size = True
friends_small_size = True
photos_small_size = True
total_scrolls = 5000
current_scrolls = 0
@ -496,7 +492,8 @@ def scrap_profile(ids):
try:
target_dir = os.path.join(folder, id.split('/')[-1])
while os.path.exists(target_dir):
input("A folder with the same profile name already exists. Kindly remove that folder first and press Return.")
input(
"A folder with the same profile name already exists. Kindly remove that folder first and press Return.")
os.mkdir(target_dir)
os.chdir(target_dir)
except:
@ -508,26 +505,27 @@ def scrap_profile(ids):
print("Friends..")
# setting parameters for scrap_data() to scrap friends
scan_list = ["All", "Mutual Friends", "Following", "Followers", "Work", "College", "Current City", "Hometown"]
section = ["/friends", "friends_mutual", "/following", "/followers", "/friends_work", "/friends_college", "/friends_current_city",
section = ["/friends", "friends_mutual", "/following", "/followers", "/friends_work", "/friends_college",
"/friends_current_city",
"/friends_hometown"]
elements_path = ["//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
"//*[contains(@class,'_3i9')][1]/div/div/ul/li[1]/div[2]/div/div/div/div/div[2]/ul/li/div/a",
"//*[contains(@class,'fbProfileBrowserListItem')]/div/a",
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a"]
file_names = ["All Friends.txt", "Mutual.txt", "Following.txt", "Followers.txt", "Work Friends.txt", "College Friends.txt",
file_names = ["All Friends.txt", "Mutual.txt", "Following.txt", "Followers.txt", "Work Friends.txt",
"College Friends.txt",
"Current City Friends.txt", "Hometown Friends.txt"]
save_status = 0
scrap_data(id, scan_list, section, elements_path, save_status, file_names)
print("Friends Done")
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
print("----------------------------------------")
print("Photos..")
print("Scraping Links..")
@ -586,7 +584,7 @@ def scrap_profile(ids):
print("Posts(Statuses) Done")
print("----------------------------------------")
# ----------------------------------------------------------------------------
print("\nProcess Completed.")
return
@ -601,6 +599,7 @@ def safe_find_element_by_id(driver, elem_id):
except NoSuchElementException:
return None
def login(email, password):
""" Logging into our own profile """
@ -623,9 +622,10 @@ def login(email, password):
driver = webdriver.Chrome(executable_path="./chromedriver.exe", options=options)
except:
print("Kindly replace the Chrome Web Driver with the latest one from "
"http://chromedriver.chromium.org/downloads"
"http://chromedriver.chromium.org/downloads "
"and also make sure you have the latest Chrome Browser version."
"\nYour OS: {}".format(platform_)
)
)
exit()
driver.get("https://en-gb.facebook.com")
@ -638,11 +638,13 @@ def login(email, password):
# clicking on login button
driver.find_element_by_id('loginbutton').click()
# multi factor authentication
# if your account uses multi factor authentication
mfa_code_input = safe_find_element_by_id(driver, 'approvals_code')
if mfa_code_input is None:
return
mfa_code_input.send_keys(input("MFA code: "))
mfa_code_input.send_keys(input("Enter MFA code: "))
driver.find_element_by_id('checkpointSubmitButton').click()
# there are so many screens asking you to verify things. Just skip them all
@ -663,16 +665,24 @@ def login(email, password):
# -----------------------------------------------------------------------------
def main():
with open('credentials.txt') as f:
email = f.readline().split('"')[1]
password = f.readline().split('"')[1]
if email == "" or password == "":
print("Your email or password is missing. Kindly write them in credentials.txt")
exit()
ids = ["https://en-gb.facebook.com/" + line.split("/")[-1] for line in open("input.txt", newline='\n')]
if len(ids) > 0:
print("\nStarting Scraping...")
login(config.email, config.password)
login(email, password)
scrap_profile(ids)
driver.close()
else:
print("Input file is empty..")
print("Input file is empty.")
# -------------------------------------------------------------

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
selenium==3.141.0