Added support for automatic chromedriver management. Removed chrome driver binaries

This commit is contained in:
Amit Yaffe 2020-04-06 11:01:59 +03:00
parent 4ed08fe3d2
commit a0f9c17c65
5 changed files with 26 additions and 39 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -14,6 +14,7 @@ from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
def get_facebook_images_url(img_links):
@ -118,13 +119,13 @@ def extract_and_write_posts(elements, filename):
status = utils.get_status(x, selectors)
if (
title.text
== driver.find_element_by_id(selectors.get("title_text")).text
title.text
== driver.find_element_by_id(selectors.get("title_text")).text
):
if status == "":
temp = utils.get_div_links(x, "img", selectors)
if (
temp == ""
temp == ""
): # no image tag which means . it is not a life event
link = utils.get_div_links(x, "a", selectors).get_attribute(
"href"
@ -156,13 +157,13 @@ def extract_and_write_posts(elements, filename):
status = utils.get_div_links(x, "a", selectors).text
elif (
title.text.find(" added ") != -1 and title.text.find("photo") != -1
title.text.find(" added ") != -1 and title.text.find("photo") != -1
):
type = "added photo"
link = utils.get_div_links(x, "a", selectors).get_attribute("href")
elif (
title.text.find(" added ") != -1 and title.text.find("video") != -1
title.text.find(" added ") != -1 and title.text.find("video") != -1
):
type = "added video"
link = utils.get_div_links(x, "a", selectors).get_attribute("href")
@ -177,16 +178,16 @@ def extract_and_write_posts(elements, filename):
title = title.replace("\n", " ")
line = (
str(time)
+ " || "
+ str(type)
+ " || "
+ str(title)
+ " || "
+ str(status)
+ " || "
+ str(link)
+ "\n"
str(time)
+ " || "
+ str(type)
+ " || "
+ str(title)
+ " || "
+ str(status)
+ " || "
+ str(link)
+ "\n"
)
try:
@ -404,7 +405,6 @@ def save_to_file(name, elements, status, current_section):
def scrape_data(user_id, scan_list, section, elements_path, save_status, file_names):
"""Given some parameters, this function can scrap friends/photos/videos/about/posts(statuses) of a profile"""
page = []
@ -418,7 +418,7 @@ def scrape_data(user_id, scan_list, section, elements_path, save_status, file_na
driver.get(page[i])
if (
(save_status == 0) or (save_status == 1) or (save_status == 2)
(save_status == 0) or (save_status == 1) or (save_status == 2)
): # Only run this for friends, photos and videos
# the bar which contains all the sections
@ -453,7 +453,7 @@ def scrape_data(user_id, scan_list, section, elements_path, save_status, file_na
def create_original_link(url):
if url.find(".php") != -1:
original_link = (
facebook_https_prefix + facebook_link_body + ((url.split("="))[1])
facebook_https_prefix + facebook_link_body + ((url.split("="))[1])
)
if original_link.find("&") != -1:
@ -461,15 +461,15 @@ def create_original_link(url):
elif url.find("fnr_t") != -1:
original_link = (
facebook_https_prefix
+ facebook_link_body
+ ((url.split("/"))[-1].split("?")[0])
facebook_https_prefix
+ facebook_link_body
+ ((url.split("/"))[-1].split("?")[0])
)
elif url.find("_tab") != -1:
original_link = (
facebook_https_prefix
+ facebook_link_body
+ (url.split("?")[0]).split("/")[-1]
facebook_https_prefix
+ facebook_link_body
+ (url.split("?")[0]).split("/")[-1]
)
else:
original_link = url
@ -555,20 +555,8 @@ def login(email, password):
try:
platform_ = platform.system().lower()
chromedriver_versions = {
"linux": os.path.join(
os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_linux64",
),
"darwin": os.path.join(
os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_mac64",
),
"windows": os.path.join(
os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_win32.exe",
),
}
driver = webdriver.Chrome(
executable_path=chromedriver_versions[platform_], options=options
executable_path=ChromeDriverManager().install(), options=options
)
except Exception:
print(
@ -649,7 +637,6 @@ def scraper(**kwargs):
# -------------------------------------------------------------
if __name__ == "__main__":
ap = argparse.ArgumentParser()
# PLS CHECK IF HELP CAN BE BETTER / LESS AMBIGUOUS
ap.add_argument(

View File

@ -27,7 +27,7 @@ setuptools.setup(
],
python_requires=">=3.7",
extras_require={"dev": ["black", "twine", "wheel"],},
install_requires=["selenium==3.141.0", "pyyaml"],
install_requires=["selenium==3.141.0", "pyyaml", "webdriver_manager"],
entry_points={
"console_scripts": ["ultimate-facebook-scraper=scraper.__main__:scraper",],
},