import logging from typing import Iterable from typing import NamedTuple from typing import Set import requests from requests.adapters import HTTPAdapter from requests.adapters import Retry logging.basicConfig(level=logging.DEBUG) URL = "https://rosettacode.org/w/api.php" CM_QUERY = { "action": "query", "generator": "categorymembers", "format": "json", "formatversion": "2", "prop": "info", "inprop": "url|touched", "gcmlimit": 500, } class PageInfo(NamedTuple): pageid: int ns: int title: str contentmodel: str pagelanguage: str pagelanguagehtmlcode: str pagelanguagedir: str touched: str lastrevid: int length: int fullurl: str editurl: str canonicalurl: str def get_session() -> requests.Session: """Setup a requests.Session with retries.""" retry_strategy = Retry( total=5, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=["HEAD", "GET", "OPTIONS"], ) adapter = HTTPAdapter(max_retries=retry_strategy) session = requests.Session() session.mount("https://", adapter) session.mount("http://", adapter) return session def category_members(category: str, url: str = URL) -> Iterable[PageInfo]: params = {**CM_QUERY, "gcmtitle": category} session = get_session() response = session.get(url, params=params) response.raise_for_status() data = response.json() if not data.get("query"): # Empty category return for page in data["query"]["pages"]: yield PageInfo(**{k: v for k, v in page.items() if k in PageInfo._fields}) _continue = data.get("continue", {}).get("gcmcontinue") while _continue: params["continue"] = data["continue"]["continue"] params["gcmcontinue"] = _continue response = session.get(url, params=params) response.raise_for_status() data = response.json() for page in data["query"]["pages"]: yield PageInfo(**{k: v for k, v in page.items() if k in PageInfo._fields}) _continue = data.get("continue", {}).get("gcmcontinue") def lang_tasks(language: str) -> Set[PageInfo]: return set(category_members(f"Category:{language}")) def omitted_tasks(language: str) -> Set[PageInfo]: return set(category_members(f"Category:{language}/Omit")) def unimplemented_tasks( lang_tasks: Set[PageInfo], omitted: Set[PageInfo] ) -> Set[PageInfo]: tasks = set(category_members("Category:Programming Tasks")) return tasks.difference(lang_tasks).difference(omitted) def unimplemented_draft_tasks( lang_tasks: Set[PageInfo], omitted: Set[PageInfo] ) -> Set[PageInfo]: tasks = set(category_members("Category:Draft Programming Tasks")) return tasks.difference(lang_tasks).difference(omitted) def display(title: str, pages: Iterable[PageInfo]) -> None: print(title) for page in sorted(pages, key=lambda p: p.title): print(" ", page.title, page.canonicalurl) print("") if __name__ == "__main__": import sys if len(sys.argv) > 1: language = sys.argv[1] else: language = "Python" tasks = lang_tasks(language) omitted = omitted_tasks(language) display("Programming Tasks", unimplemented_tasks(tasks, omitted)) display("Draft Programming Tasks", unimplemented_draft_tasks(tasks, omitted)) display("Omitted Tasks", omitted)