107 lines
21 KiB
Plaintext
107 lines
21 KiB
Plaintext
(notonline)-->
|
|
<span style="color: #000080;font-style:italic;">--
|
|
-- demo\rosetta\Count_examples.exw
|
|
-- ===============================
|
|
--
|
|
-- (This uses a few '&' instead of/as well as 'a', fairly obviously for everyone's sanity..)
|
|
-- Counts no of "<nowiki>{{</nowiki>he&der|" (nb not "==<nowiki>{{</nowiki>he&der|") via web api (but gets tasks via scraping).
|
|
-- Since downloading all the pages can be very slow, this uses a cache.
|
|
-- Limiting (notdone) by "Phix" fairly obviously speeds it up tenfold :-)
|
|
--</span>
|
|
<span style="color: #008080;">without</span> <span style="color: #008080;">js</span> <span style="color: #000080;font-style:italic;">-- (fairly obviously this will never ever run in a browser!)</span>
|
|
<span style="color: #008080;">constant</span> <span style="color: #000000;">include_drafts</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span><span style="color: #0000FF;">,</span>
|
|
<span style="color: #000000;">sort_by_count</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span><span style="color: #0000FF;">,</span>
|
|
<span style="color: #000080;font-style:italic;">-- notlang = "Phix" -- or "" (ie a zero length string) for all</span>
|
|
<span style="color: #000000;">notlang</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
|
|
|
|
<span style="color: #008080;">include</span> <span style="color: #000000;">rosettacode_cache</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
|
|
|
|
<span style="color: #008080;">function</span> <span style="color: #000000;">count_tasks</span><span style="color: #0000FF;">()</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_file_type</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)!=</span><span style="color: #004600;">FILETYPE_DIRECTORY</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">create_directory</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rc_cache"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #7060A8;">crash</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"cannot create rc_cache directory"</span><span style="color: #0000FF;">)</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #000080;font-style:italic;">-- note this lot use web scraping (as cribbed from a similar task) ...</span>
|
|
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Programming_Tasks"</span><span style="color: #0000FF;">))</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #000000;">include_drafts</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_category</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Draft_Programming_Tasks"</span><span style="color: #0000FF;">))</span>
|
|
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">notlang</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #000080;font-style:italic;">-- filter already done in specified language</span>
|
|
<span style="color: #004080;">string</span> <span style="color: #000000;">langurl</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"http://rosettacode.org/wiki/Category:"</span><span style="color: #0000FF;">&</span><span style="color: #000000;">notlang</span>
|
|
<span style="color: #004080;">sequence</span> <span style="color: #000000;">done</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">dewiki</span><span style="color: #0000FF;">(</span><span style="color: #000000;">open_download</span><span style="color: #0000FF;">(</span><span style="color: #000000;">notlang</span><span style="color: #0000FF;">&</span><span style="color: #008000;">".htm"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">langurl</span><span style="color: #0000FF;">))</span>
|
|
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
|
|
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
|
<span style="color: #004080;">string</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
|
|
<span style="color: #004080;">integer</span> <span style="color: #000000;">d</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">,</span><span style="color: #000000;">done</span><span style="color: #0000FF;">)</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">d</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #000000;">k</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
|
|
<span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ti</span>
|
|
<span style="color: #008080;">else</span>
|
|
<span style="color: #000000;">done</span><span style="color: #0000FF;">[</span><span style="color: #000000;">d</span><span style="color: #0000FF;">..</span><span style="color: #000000;">d</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
|
<span style="color: #000000;">tasks</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span>
|
|
<span style="color: #000000;">done</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%d tasks found\n"</span><span style="color: #0000FF;">,{</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)})</span>
|
|
<span style="color: #000080;font-style:italic;">-- ... whereas the individual tasks use the web api instead (3x smaller/faster)</span>
|
|
<span style="color: #004080;">integer</span> <span style="color: #000000;">total_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
|
|
<span style="color: #004080;">sequence</span> <span style="color: #000000;">task_counts</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">))</span>
|
|
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
|
<span style="color: #004080;">string</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span>
|
|
<span style="color: #000000;">url</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"http://rosettacode.org/mw/index.php?title=%s&action=raw"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">}),</span>
|
|
<span style="color: #000000;">contents</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">open_download</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">&</span><span style="color: #008000;">".raw"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">url</span><span style="color: #0000FF;">),</span>
|
|
<span style="color: #000000;">prev</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">curr</span>
|
|
<span style="color: #004080;">integer</span> <span style="color: #000000;">count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
|
|
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
|
|
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<nowiki>{{</nowiki>hea`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`der|`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">)</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #000000;">start</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #000080;font-style:italic;">--
|
|
-- skip duplicates/we also have to cope with eg
|
|
-- ==<nowiki>{{</nowiki>he&der|Python<nowiki>}}</nowiki>== \
|
|
-- ===<nowiki>{{</nowiki>he&der|Python<nowiki>}}</nowiki> Original=== } count
|
|
-- ===<nowiki>{{</nowiki>he&der|Python<nowiki>}}</nowiki> Succinct=== } once
|
|
-- ===<nowiki>{{</nowiki>he&der|Python<nowiki>}}</nowiki> Recursive === /
|
|
-- ==<nowiki>{{</nowiki>he&der|Mathematica<nowiki>}}</nowiki> / <nowiki>{{</nowiki>he&der|Wolfram Language<nowiki>}}</nowiki>== \
|
|
-- ==<nowiki>{{</nowiki>he&der|Icon<nowiki>}}</nowiki> and <nowiki>{{</nowiki>he&der|Unicon<nowiki>}}</nowiki>== } count
|
|
-- == <nowiki>{{</nowiki>he&der|Icon<nowiki>}}</nowiki> and <nowiki>{{</nowiki>he&der|Unicon<nowiki>}}</nowiki> == / both
|
|
-- == <nowiki>{{</nowiki>he&der|Java<nowiki>}}</nowiki>==
|
|
-- etc. Note however that this /does/ count eg
|
|
-- ===<nowiki>{{</nowiki>he&der|Applesoft BASIC<nowiki>}}</nowiki>=== \
|
|
-- ===<nowiki>{{</nowiki>he&der|BASIC256<nowiki>}}</nowiki>=== } count
|
|
-- ===<nowiki>{{</nowiki>he&der|Commodore BASIC<nowiki>}}</nowiki>=== } 'em
|
|
-- ===<nowiki>{{</nowiki>he&der|IS-BASIC<nowiki>}}</nowiki>=== } all
|
|
-- ===<nowiki>{{</nowiki>he&der|Sinclair ZX81 BASIC<nowiki>}}</nowiki>=== /
|
|
--</span>
|
|
<span style="color: #000000;">curr</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">contents</span><span style="color: #0000FF;">[</span><span style="color: #000000;">start</span><span style="color: #0000FF;">..</span><span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<nowiki>}}</nowiki>`</span><span style="color: #0000FF;">,</span><span style="color: #000000;">contents</span><span style="color: #0000FF;">,</span><span style="color: #000000;">start</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)]</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #000000;">curr</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">prev</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #000000;">count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #000000;">prev</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">curr</span>
|
|
<span style="color: #000000;">start</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #008000;">`<nowiki>{{</nowiki>hea`</span><span style="color: #0000FF;">&</span><span style="color: #008000;">`der|`</span><span style="color: #0000FF;">)</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_count</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">count</span>
|
|
<span style="color: #008080;">elsif</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">notlang</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">or</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><=</span><span style="color: #000000;">2</span> <span style="color: #008080;">or</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">>=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span> <span style="color: #008080;">or</span> <span style="color: #7060A8;">mod</span><span style="color: #0000FF;">(</span><span style="color: #000000;">i</span><span style="color: #0000FF;">,</span><span style="color: #000000;">200</span><span style="color: #0000FF;">)=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s: %d\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">),</span><span style="color: #000000;">count</span><span style="color: #0000FF;">})</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #000000;">total_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">count</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #7060A8;">get_key</span><span style="color: #0000FF;">()=</span><span style="color: #000000;">#1B</span> <span style="color: #008080;">then</span> <span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"escape keyed\n"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
|
<span style="color: #000000;">curl_cleanup</span><span style="color: #0000FF;">()</span>
|
|
<span style="color: #008080;">if</span> <span style="color: #000000;">sort_by_count</span> <span style="color: #008080;">then</span>
|
|
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">custom_sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">)))</span>
|
|
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tags</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
|
|
<span style="color: #004080;">integer</span> <span style="color: #000000;">ti</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
|
|
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%s: %d\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">html_clean</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tasks</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">]),</span><span style="color: #000000;">task_counts</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ti</span><span style="color: #0000FF;">]})</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
|
<span style="color: #008080;">return</span> <span style="color: #000000;">total_count</span>
|
|
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
|
|
|
|
<span style="color: #7060A8;">progress</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"Total: %d\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">count_tasks</span><span style="color: #0000FF;">()})</span>
|
|
<!--
|