151 lines
40 KiB
Plaintext
151 lines
40 KiB
Plaintext
-->
|
||
<span style="color: #000080;font-style:italic;">--
|
||
-- demo/rosetta/Natural_sorting2.exw
|
||
--</span>
|
||
<span style="color: #008080;">function</span> <span style="color: #000000;">utf32ch</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
||
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">utf8_to_utf32</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">])[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
||
<span style="color: #008080;">return</span> <span style="color: #000000;">s</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
|
||
|
||
<span style="color: #008080;">constant</span> <span style="color: #000000;">common</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"the"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"it"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"to"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"a"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"of"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"is"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #000000;">al</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ac_replacements</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">columnize</span><span style="color: #0000FF;">({</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Æ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"AE"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"æ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"ae"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Þ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"TH"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"þ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"th"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Ð"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"TH"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ð"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"th"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ß"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"ss"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"<22>"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"fi"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"<22>"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"fl"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"<22>"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'s'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"’"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'z'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"À"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Á"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Â"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ã"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Ä"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Å"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'A'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"à"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"á"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"â"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ã"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ä"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"å"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'a'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Ç"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'C'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ç"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'c'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"È"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'E'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"É"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'E'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Ê"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'E'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ë"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'E'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"è"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'e'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"é"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'e'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"ê"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'e'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ë"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'e'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ì"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'I'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Í"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'I'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Î"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'I'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ï"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'I'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ì"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'i'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"í"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'i'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"î"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'i'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ï"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'i'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ò"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'O'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ó"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'O'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Ô"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'O'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Õ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'O'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ö"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'O'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ø"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'O'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"ò"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'o'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ó"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'o'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ô"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'o'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"õ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'o'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"ö"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'o'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ø"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'o'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ñ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'N'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ñ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'n'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Ù"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'U'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ú"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'U'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Û"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'U'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"Ü"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'U'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"ù"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'u'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ú"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'u'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"û"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'u'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ü"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'u'</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Ý"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'Y'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ÿ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'y'</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"ý"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">'y'</span><span style="color: #0000FF;">}}),</span>
|
||
<span style="color: #000000;">accents_and_ligatures</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">utf32ch</span><span style="color: #0000FF;">(</span><span style="color: #000000;">al</span><span style="color: #0000FF;">)</span>
|
||
|
||
<span style="color: #008080;">function</span> <span style="color: #000000;">normalise</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">utf32</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">utf8_to_utf32</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
|
||
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">prev</span>
|
||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">utf32</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
||
<span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">utf32</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" \t\r\n\x0b\x0c"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)></span><span style="color: #000000;">0</span> <span style="color: #008080;">and</span> <span style="color: #000000;">prev</span><span style="color: #0000FF;">!=</span><span style="color: #008000;">' '</span> <span style="color: #008080;">then</span>
|
||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #000000;">prev</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">' '</span>
|
||
<span style="color: #008080;">elsif</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"0123456789"</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)=</span><span style="color: #000000;">0</span> <span style="color: #008080;">or</span> <span style="color: #000000;">prev</span><span style="color: #0000FF;">!=</span><span style="color: #008000;">'0'</span> <span style="color: #008080;">then</span>
|
||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">ch</span><span style="color: #0000FF;">-</span><span style="color: #008000;">'0'</span>
|
||
<span style="color: #008080;">else</span>
|
||
<span style="color: #000000;">res</span><span style="color: #0000FF;">[$]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">[$]*</span><span style="color: #000000;">10</span><span style="color: #0000FF;">+</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">-</span><span style="color: #008000;">'0'</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #000000;">prev</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">'0'</span>
|
||
<span style="color: #008080;">else</span>
|
||
<span style="color: #004080;">object</span> <span style="color: #000000;">rep</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">,</span><span style="color: #000000;">accents_and_ligatures</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #000000;">rep</span> <span style="color: #008080;">then</span>
|
||
<span style="color: #000000;">rep</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ac_replacements</span><span style="color: #0000FF;">[</span><span style="color: #000000;">rep</span><span style="color: #0000FF;">])</span>
|
||
<span style="color: #008080;">else</span>
|
||
<span style="color: #000000;">rep</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ch</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #004080;">sequence</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">[$])</span> <span style="color: #008080;">then</span>
|
||
<span style="color: #000000;">res</span><span style="color: #0000FF;">[$]</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">rep</span>
|
||
<span style="color: #008080;">else</span>
|
||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #008000;">""</span><span style="color: #0000FF;">&</span><span style="color: #000000;">rep</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #000000;">prev</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ch</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">common</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
||
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
|
||
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">find</span><span style="color: #0000FF;">(</span><span style="color: #000000;">common</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">..</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]=-</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
|
||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">..$]</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">and</span> <span style="color: #000000;">prev</span><span style="color: #0000FF;">=</span><span style="color: #008000;">' '</span> <span style="color: #008080;">then</span>
|
||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">res</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..$-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
|
||
|
||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tests</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">" leading spaces: 4"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">" leading spaces: 3"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"leading spaces: 2"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">" leading spaces: 1"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"adjacent spaces: 3"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"adjacent spaces: 4"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"adjacent spaces: 1"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"adjacent spaces: 2"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"white space: 3-2"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"white\r space: 3-3"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"white\x0cspace: 3-1"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"white\x0bspace: 3+0"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"white\n space: 3+1"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"white\t space: 3+2"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"caSE independent: 3-1"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"cASE independent: 3-2"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"casE independent: 3+0"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"case independent: 3+1"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"foo1000bar99baz9.txt"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"foo100bar99baz0.txt"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"foo100bar10baz0.txt"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"foo1000bar99baz10.txt"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"foo1bar"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"foo100bar"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"foo bar"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"foo1000bar"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"The Wind in the Willows"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"The 40th step more"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"The 39 steps"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"Wanda"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"ignore ý accents: 2-2"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"ignore Ý accents: 2-1"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"ignore y accents: 2+0"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"ignore Y accents: 2+1"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">{</span><span style="color: #008000;">"Ball"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Card"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"above"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"aether"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"apple"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"autumn"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"außen"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"bald"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"car"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"e-mail"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"evoke"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"nina"</span><span style="color: #0000FF;">,</span>
|
||
<span style="color: #008000;">"niño"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Æon"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Évian"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"æon"</span><span style="color: #0000FF;">},</span>
|
||
<span style="color: #0000FF;">}</span>
|
||
|
||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tags</span>
|
||
|
||
<span style="color: #008080;">function</span> <span style="color: #000000;">natural</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #008080;">return</span> <span style="color: #000000;">compare</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">])</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
|
||
|
||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tests</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
||
<span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tests</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
|
||
<span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">))</span>
|
||
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
||
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">normalise</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">])</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
||
<span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">custom_sort</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">routine_id</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"natural"</span><span style="color: #0000FF;">),</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)))</span>
|
||
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">3</span> <span style="color: #008080;">then</span> <span style="color: #000080;font-style:italic;">-- clean up the whitespace mess</span>
|
||
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
||
<span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">],{</span><span style="color: #008000;">"\r"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\x0c"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\x0b"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\t"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"\\r"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\\x0c"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\\x0b"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\\n"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\\t"</span><span style="color: #0000FF;">})</span>
|
||
<span style="color: #000000;">n</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">substitute_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">n</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">],{</span><span style="color: #008000;">"\r"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\x0c"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\x0b"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\t"</span><span style="color: #0000FF;">},{</span><span style="color: #008000;">"\\r"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\\x0c"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\\x0b"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\\n"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\\t"</span><span style="color: #0000FF;">})</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
|
||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%-30s %-30s %-30s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">"original"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"normal"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"natural"</span><span style="color: #0000FF;">})</span>
|
||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%-30s %-30s %-30s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">"========"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"======"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"======="</span><span style="color: #0000FF;">})</span>
|
||
<span style="color: #008080;">for</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tags</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
|
||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%-30s|%-30s|%-30s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">],</span><span style="color: #000000;">n</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">],</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]]})</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
||
<span style="color: #7060A8;">puts</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)</span>
|
||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
|
||
<!--
|