Parallelized updating alexa ranking
This commit is contained in:
parent
1442f333c2
commit
3db3f4558b
30
site_list.py
30
site_list.py
|
|
@ -7,6 +7,7 @@ import requests
|
|||
import threading
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from datetime import datetime
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
|
||||
pool = list()
|
||||
|
||||
|
|
@ -22,7 +23,15 @@ def get_rank(domain_to_query, dest):
|
|||
if strong.has_attr("class"):
|
||||
if "metrics-data" in strong["class"]:
|
||||
result = int(strong.text.strip().replace(',', ''))
|
||||
dest = result
|
||||
dest['rank'] = result
|
||||
|
||||
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
|
||||
)
|
||||
parser.add_argument("--rank","-r",
|
||||
action="store_true", dest="rank", default=False,
|
||||
help="Update all website ranks (not recommended)."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with open("data.json", "r", encoding="utf-8") as data_file:
|
||||
data = json.load(data_file)
|
||||
|
|
@ -33,19 +42,26 @@ with open("sites.md", "w") as site_file:
|
|||
|
||||
for social_network in data:
|
||||
url_main = data.get(social_network).get("urlMain")
|
||||
th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network)["rank"]))
|
||||
pool.append((url_main, th))
|
||||
th.start()
|
||||
data.get(social_network)["rank"] = 0
|
||||
if args.rank:
|
||||
th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network)))
|
||||
else:
|
||||
th = None
|
||||
pool.append((url_main, url_main, th))
|
||||
if args.rank:
|
||||
th.start()
|
||||
|
||||
index = 1
|
||||
for social_network, th in pool:
|
||||
th.join()
|
||||
for social_network, url_main, th in pool:
|
||||
if args.rank:
|
||||
th.join()
|
||||
site_file.write(f'{index}. [{social_network}]({url_main})\n')
|
||||
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
|
||||
sys.stdout.flush()
|
||||
index = index + 1
|
||||
|
||||
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
|
||||
if args.rank:
|
||||
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
|
||||
|
||||
sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue