(notonline)-->
--
-- demo\rosetta\rosettacode_cache.e
-- ================================
--
-- Common routines for handling rc_cache etc.
--
without js -- (libcurl, file i/o, peek, progress..)
include builtins\timedate.e
constant day = timedelta(days:=1)
integer refresh_cache = 21*day -- 0 for always [NB refresh_cache += timedelta(days:=1) below]
function days(atom delta)
integer d = ceil(delta/day)
return sprintf("%d day%s",{d,iff(d=1?"":"s")})
end function
constant {hex,ascii} = columnize({{"%22",`"`},
{"%27","'"},
{"%2A","*"},
{"%2B","+"},
{"%3A",":"},
{"%5E",`^`},
{"%E2%80%93","-"},
{"%E2%80%99","'"},
{"%C3%A8","e"},
{"%C3%A9","e"},
{"%C3%B6","o"},
{"%C5%91","o"},
{""",`"`},
{"'","'"},
{"_"," "}})
global function html_clean(string s)
return substitute_all(s,hex,ascii)
end function
include builtins\libcurl.e
atom curl = NULL, pErrorBuffer
function write_callback(atom pData, integer size, nmemb, fn)
integer bytes_written = size * nmemb
puts(fn,peek({pData,bytes_written}))
return bytes_written
end function
constant write_cb = call_back({'+', write_callback})
global string wastitle = "" -- don't clobber "NEED EDITING"/Downloading messages
global integer show_title = progress
global function open_download(string filename, url, integer i=0, n=0)
object text
bool refetch = false
string why = "not found"
filename = join_path({"rc_cache",filename})
if file_exists(filename) then
-- use existing file if <= refresh_cache days old
sequence last_mod = get_file_date(filename)
atom delta = timedate_diff(last_mod,date())
refetch = true
if delta>refresh_cache
and not match(".hist",filename) then
why = days(delta) & " > " & days(refresh_cache)
elsif get_file_size(filename)=0 then
why = "filesize of 0"
else
text = trim(get_text(filename))
if not sequence(text) then
why = "no text"
elsif length(text)<10 then
why = "<10 bytes"
else
refetch = false
end if
end if
else
refetch = true
string directory = get_file_path(filename)
if get_file_type(directory)!=FILETYPE_DIRECTORY then
if not create_directory(directory,make_parent:=true) then
crash("cannot create %s directory",{directory})
end if
end if
end if
if refetch then
wastitle = "x" -- don't clobber
string nofn = iff(n?sprintf("(%d/%d, %.1f%%) ",{i,n,i/n*100}):""),
title = sprintf("Downloading %s%s (%s)...",{nofn,html_clean(filename),why})
show_title(title)
if curl=NULL then
curl_global_init()
curl = curl_easy_init()
pErrorBuffer = allocate(CURL_ERROR_SIZE)
curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, pErrorBuffer)
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb)
end if
url = substitute(url,"%3A",":")
url = substitute(url,"%2A","*")
curl_easy_setopt(curl, CURLOPT_URL, url)
integer fn = open(filename,"wb")
assert(fn!=-1,"cannot open "&filename)
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fn)
while true do
CURLcode res = curl_easy_perform(curl)
if res=CURLE_OK then exit end if
string error = sprintf("%d",res)
if res=CURLE_COULDNT_RESOLVE_HOST then
error &= " [CURLE_COULDNT_RESOLVE_HOST]"
end if
progress("Error %s downloading file, retry?(Y/N):",{error})
if lower(wait_key())!='y' then abort(0) end if
printf(1,"Y\n")
end while
close(fn)
refresh_cache += timedelta(days:=1) -- did I mention it is slow?
text = get_text(filename)
end if
return text
end function
global function open_category(string filename, integer i=0, n=0)
return open_download(filename&".htm","http://rosettacode.org/wiki/Category:"&filename,i,n)
end function
global function dewiki(string s, sequence exclude={})
-- extract tasks from eg `<li><a href="/wiki/100_doors"`
sequence tasks = {}
integer start = 1, finish = match(`<div class="printfooter">`,s)
s = s[1..finish-1]
while true do
start = match(`<li><a href="/wiki/`,s,start)
if start=0 then exit end if
start += length(`<li><a href="/wiki/`)
finish = find('"',s,start)
string task = s[start..finish-1]
task = substitute_all(task,{"*",":"},{"%2A","%3A"})
tasks = append(tasks,task)
start = finish+1
end while
return tasks
end function
global procedure curl_cleanup()
if curl!=NULL then
curl_easy_cleanup(curl)
free(pErrorBuffer)
curl = NULL
pErrorBuffer = NULL
end if
end procedure