Added support for automatic chromedriver management. Removed chrome driver binaries

2020-04-06 11:01:59 +03:00 · 2020-04-06 11:01:59 +03:00 · a0f9c17c65
parent 4ed08fe3d2
commit a0f9c17c65
5 changed files with 26 additions and 39 deletions
--- a/bin/chromedriver_linux64
+++ b/bin/chromedriver_linux64
--- a/bin/chromedriver_mac64
+++ b/bin/chromedriver_mac64
--- a/bin/chromedriver_win32.exe
+++ b/bin/chromedriver_win32.exe
--- a/scraper/scraper.py
+++ b/scraper/scraper.py
@ -14,6 +14,7 @@ from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
+from webdriver_manager.chrome import ChromeDriverManager


 def get_facebook_images_url(img_links):
@ -118,13 +119,13 @@ def extract_and_write_posts(elements, filename):

                status = utils.get_status(x, selectors)
                if (
-                    title.text
-                    == driver.find_element_by_id(selectors.get("title_text")).text
+                        title.text
+                        == driver.find_element_by_id(selectors.get("title_text")).text
                ):
                    if status == "":
                        temp = utils.get_div_links(x, "img", selectors)
                        if (
-                            temp == ""
+                                temp == ""
                        ):  # no image tag which means . it is not a life event
                            link = utils.get_div_links(x, "a", selectors).get_attribute(
                                "href"
@ -156,13 +157,13 @@ def extract_and_write_posts(elements, filename):
                        status = utils.get_div_links(x, "a", selectors).text

                elif (
-                    title.text.find(" added ") != -1 and title.text.find("photo") != -1
+                        title.text.find(" added ") != -1 and title.text.find("photo") != -1
                ):
                    type = "added photo"
                    link = utils.get_div_links(x, "a", selectors).get_attribute("href")

                elif (
-                    title.text.find(" added ") != -1 and title.text.find("video") != -1
+                        title.text.find(" added ") != -1 and title.text.find("video") != -1
                ):
                    type = "added video"
                    link = utils.get_div_links(x, "a", selectors).get_attribute("href")
@ -177,16 +178,16 @@ def extract_and_write_posts(elements, filename):
                title = title.replace("\n", " ")

                line = (
-                    str(time)
-                    + " || "
-                    + str(type)
-                    + " || "
-                    + str(title)
-                    + " || "
-                    + str(status)
-                    + " || "
-                    + str(link)
-                    + "\n"
+                        str(time)
+                        + " || "
+                        + str(type)
+                        + " || "
+                        + str(title)
+                        + " || "
+                        + str(status)
+                        + " || "
+                        + str(link)
+                        + "\n"
                )

                try:
@ -404,7 +405,6 @@ def save_to_file(name, elements, status, current_section):


 def scrape_data(user_id, scan_list, section, elements_path, save_status, file_names):
-
    """Given some parameters, this function can scrap friends/photos/videos/about/posts(statuses) of a profile"""
    page = []

@ -418,7 +418,7 @@ def scrape_data(user_id, scan_list, section, elements_path, save_status, file_na
            driver.get(page[i])

            if (
-                (save_status == 0) or (save_status == 1) or (save_status == 2)
+                    (save_status == 0) or (save_status == 1) or (save_status == 2)
            ):  # Only run this for friends, photos and videos

                # the bar which contains all the sections
@ -453,7 +453,7 @@ def scrape_data(user_id, scan_list, section, elements_path, save_status, file_na
 def create_original_link(url):
    if url.find(".php") != -1:
        original_link = (
-            facebook_https_prefix + facebook_link_body + ((url.split("="))[1])
+                facebook_https_prefix + facebook_link_body + ((url.split("="))[1])
        )

        if original_link.find("&") != -1:
@ -461,15 +461,15 @@ def create_original_link(url):

    elif url.find("fnr_t") != -1:
        original_link = (
-            facebook_https_prefix
-            + facebook_link_body
-            + ((url.split("/"))[-1].split("?")[0])
+                facebook_https_prefix
+                + facebook_link_body
+                + ((url.split("/"))[-1].split("?")[0])
        )
    elif url.find("_tab") != -1:
        original_link = (
-            facebook_https_prefix
-            + facebook_link_body
-            + (url.split("?")[0]).split("/")[-1]
+                facebook_https_prefix
+                + facebook_link_body
+                + (url.split("?")[0]).split("/")[-1]
        )
    else:
        original_link = url
@ -555,20 +555,8 @@ def login(email, password):

        try:
            platform_ = platform.system().lower()
-            chromedriver_versions = {
-                "linux": os.path.join(
-                    os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_linux64",
-                ),
-                "darwin": os.path.join(
-                    os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_mac64",
-                ),
-                "windows": os.path.join(
-                    os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_win32.exe",
-                ),
-            }
-
            driver = webdriver.Chrome(
-                executable_path=chromedriver_versions[platform_], options=options
+                executable_path=ChromeDriverManager().install(), options=options
            )
        except Exception:
            print(
@ -649,7 +637,6 @@ def scraper(**kwargs):
 # -------------------------------------------------------------

 if __name__ == "__main__":
-
    ap = argparse.ArgumentParser()
    # PLS CHECK IF HELP CAN BE BETTER / LESS AMBIGUOUS
    ap.add_argument(
--- a/setup.py
+++ b/setup.py
@ -27,7 +27,7 @@ setuptools.setup(
    ],
    python_requires=">=3.7",
    extras_require={"dev": ["black", "twine", "wheel"],},
-    install_requires=["selenium==3.141.0", "pyyaml"],
+    install_requires=["selenium==3.141.0", "pyyaml", "webdriver_manager"],
    entry_points={
        "console_scripts": ["ultimate-facebook-scraper=scraper.__main__:scraper",],
    },