(notonline)--> -- -- demo\rosetta\Count_examples.exw -- =============================== -- -- (This uses a few '&' instead of/as well as 'a', fairly obviously for everyone's sanity..) -- Counts no of "{{he&der|" (nb not "=={{he&der|") via web api (but gets tasks via scraping). -- Since downloading all the pages can be very slow, this uses a cache. -- Limiting (notdone) by "Phix" fairly obviously speeds it up tenfold :-) -- without js -- (fairly obviously this will never ever run in a browser!) constant include_drafts = true, sort_by_count = false, -- notlang = "Phix" -- or "" (ie a zero length string) for all notlang = "" include rosettacode_cache.e function count_tasks() if get_file_type("rc_cache")!=FILETYPE_DIRECTORY then if not create_directory("rc_cache") then crash("cannot create rc_cache directory") end if end if -- note this lot use web scraping (as cribbed from a similar task) ... sequence tasks = dewiki(open_category("Programming_Tasks")) if include_drafts then tasks &= dewiki(open_category("Draft_Programming_Tasks")) tasks = sort(tasks) end if if length(notlang) then -- filter already done in specified language string langurl = "http://rosettacode.org/wiki/Category:"&notlang sequence done = dewiki(open_download(notlang&".htm",langurl)) integer k = 0 for i=1 to length(tasks) do string ti = tasks[i] integer d = find(ti,done) if not d then k += 1 tasks[k] = ti else done[d..d] = {} end if end for tasks = tasks[1..k] done = {} end if progress("%d tasks found\n",{length(tasks)}) -- ... whereas the individual tasks use the web api instead (3x smaller/faster) integer total_count = 0 sequence task_counts = repeat(0,length(tasks)) for i=1 to length(tasks) do string ti = tasks[i], url = sprintf("http://rosettacode.org/mw/index.php?title=%s&action=raw",{ti}), contents = open_download(ti&".raw",url), prev = "", curr integer count = 0, start = 1 while true do start = match(`{{hea`&`der|`,contents,start) if start=0 then exit end if -- -- skip duplicates/we also have to cope with eg -- =={{he&der|Python}}== \ -- ==={{he&der|Python}} Original=== } count -- ==={{he&der|Python}} Succinct=== } once -- ==={{he&der|Python}} Recursive === / -- =={{he&der|Mathematica}} / {{he&der|Wolfram Language}}== \ -- =={{he&der|Icon}} and {{he&der|Unicon}}== } count -- == {{he&der|Icon}} and {{he&der|Unicon}} == / both -- == {{he&der|Java}}== -- etc. Note however that this /does/ count eg -- ==={{he&der|Applesoft BASIC}}=== \ -- ==={{he&der|BASIC256}}=== } count -- ==={{he&der|Commodore BASIC}}=== } 'em -- ==={{he&der|IS-BASIC}}=== } all -- ==={{he&der|Sinclair ZX81 BASIC}}=== / -- curr = contents[start..match(`}}`,contents,start+1)] if curr!=prev then count += 1 end if prev = curr start += length(`{{hea`&`der|`) end while if sort_by_count then task_counts[i] = count elsif length(notlang) or i<=2 or i>=length(tasks)-1 or mod(i,200)=0 then progress("%s: %d\n",{html_clean(ti),count}) end if total_count += count if get_key()=#1B then progress("escape keyed\n") exit end if end for curl_cleanup() if sort_by_count then sequence tags = custom_sort(task_counts,tagset(length(tasks))) for i=length(tags) to 1 by -1 do integer ti = tags[i] progress("%s: %d\n",{html_clean(tasks[ti]),task_counts[ti]}) end for end if return total_count end function progress("Total: %d\n",{count_tasks()})