(notonline)--> -- -- demo\rosetta\rosettacode_cache.e -- ================================ -- -- Common routines for handling rc_cache etc. -- without js -- (libcurl, file i/o, peek, progress..) include builtins\timedate.e constant day = timedelta(days:=1) integer refresh_cache = 21*day -- 0 for always [NB refresh_cache += timedelta(days:=1) below] function days(atom delta) integer d = ceil(delta/day) return sprintf("%d day%s",{d,iff(d=1?"":"s")}) end function constant {hex,ascii} = columnize({{"%22",`"`}, {"%27","'"}, {"%2A","*"}, {"%2B","+"}, {"%3A",":"}, {"%5E",`^`}, {"%E2%80%93","-"}, {"%E2%80%99","'"}, {"%C3%A8","e"}, {"%C3%A9","e"}, {"%C3%B6","o"}, {"%C5%91","o"}, {""",`"`}, {"'","'"}, {"_"," "}}) global function html_clean(string s) return substitute_all(s,hex,ascii) end function include builtins\libcurl.e atom curl = NULL, pErrorBuffer function write_callback(atom pData, integer size, nmemb, fn) integer bytes_written = size * nmemb puts(fn,peek({pData,bytes_written})) return bytes_written end function constant write_cb = call_back({'+', write_callback}) global string wastitle = "" -- don't clobber "NEED EDITING"/Downloading messages global integer show_title = progress global function open_download(string filename, url, integer i=0, n=0) object text bool refetch = false string why = "not found" filename = join_path({"rc_cache",filename}) if file_exists(filename) then -- use existing file if <= refresh_cache days old sequence last_mod = get_file_date(filename) atom delta = timedate_diff(last_mod,date()) refetch = true if delta>refresh_cache and not match(".hist",filename) then why = days(delta) & " > " & days(refresh_cache) elsif get_file_size(filename)=0 then why = "filesize of 0" else text = trim(get_text(filename)) if not sequence(text) then why = "no text" elsif length(text)<10 then why = "<10 bytes" else refetch = false end if end if else refetch = true string directory = get_file_path(filename) if get_file_type(directory)!=FILETYPE_DIRECTORY then if not create_directory(directory,make_parent:=true) then crash("cannot create %s directory",{directory}) end if end if end if if refetch then wastitle = "x" -- don't clobber string nofn = iff(n?sprintf("(%d/%d, %.1f%%) ",{i,n,i/n*100}):""), title = sprintf("Downloading %s%s (%s)...",{nofn,html_clean(filename),why}) show_title(title) if curl=NULL then curl_global_init() curl = curl_easy_init() pErrorBuffer = allocate(CURL_ERROR_SIZE) curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer) curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb) end if url = substitute(url,"%3A",":") url = substitute(url,"%2A","*") curl_easy_setopt(curl, CURLOPT_URL, url) integer fn = open(filename,"wb") assert(fn!=-1,"cannot open "&filename) curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn) while true do CURLcode res = curl_easy_perform(curl) if res=CURLE_OK then exit end if string error = sprintf("%d",res) if res=CURLE_COULDNT_RESOLVE_HOST then error &= " [CURLE_COULDNT_RESOLVE_HOST]" end if progress("Error %s downloading file, retry?(Y/N):",{error}) if lower(wait_key())!='y' then abort(0) end if printf(1,"Y\n") end while close(fn) refresh_cache += timedelta(days:=1) -- did I mention it is slow? text = get_text(filename) end if return text end function global function open_category(string filename, integer i=0, n=0) return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename,i,n) end function global function dewiki(string s, sequence exclude={}) -- extract tasks from eg `<li><a href="/wiki/100_doors"` sequence tasks = {} integer start = 1, finish = match(`<div class="printfooter">`,s) s = s[1..finish-1] while true do start = match(`<li><a href="/wiki/`,s,start) if start=0 then exit end if start += length(`<li><a href="/wiki/`) finish = find('"',s,start) string task = s[start..finish-1] task = substitute_all(task,{"*",":"},{"%2A","%3A"}) tasks = append(tasks,task) start = finish+1 end while return tasks end function global procedure curl_cleanup() if curl!=NULL then curl_easy_cleanup(curl) free(pErrorBuffer) curl = NULL pErrorBuffer = NULL end if end procedure