RosettaCodeData/Task/Natural-sorting/Pascal/natural-sorting.pas

245 lines
13 KiB
ObjectPascal

Program Natural; Uses DOS, crt; {Simple selection.}
{Demonstrates a "natural" order of sorting text with nameish parts.}
Const null=#0; BS=#8; HT=#9; LF=#10{0A}; VT=#11{0B}; FF=#12{0C}; CR=#13{0D};
Procedure Croak(gasp: string);
Begin
WriteLn(Gasp);
HALT;
End;
Function Space(n: integer): string; {Can't use n*" " either.}
var text: string; {A scratchpad.}
var i: integer; {A stepper.}
Begin
if n > 255 then n:=255 {A value parameter,}
else if n < 0 then n:=0; {So this just messes with my copy.}
for i:=1 to n do text[i]:=' '; {Place some spaces.}
text[0]:=char(n); {Place the length thereof.}
Space:=text; {Take that.}
End; {of Space.}
Function DeFang(x: string): string; {Certain character codes cause action.}
var text: string; {A scratchpad, as using DeFang directly might imply recursion.}
var i: integer; {A stepper.}
var c: char; {Reduce repetition.}
Begin {I hope that appending is recognised by the compiler...}
text:=''; {Scrub the scratchpad.}
for i:=1 to Length(x) do {Step through the source text.}
begin {Inspecting each character.}
c:=char(x[i]); {Grab it.}
if c > CR then text:=text + c {Deemed not troublesome.}
else if c < BS then text:=text + c {Lacks an agreed alternative, and may not cause trouble.}
else text:=text + '!' + copy('btnvfr',ord(c) - ord(BS) + 1,1); {The alternative codes.}
end; {On to the next.}
DeFang:=text; {Alas, the "escape" convention lengthens the text.}
End; {of DeFang.} {But that only mars the layout, rather than ruining it.}
Const mEntry = 66; {Sufficient for demonstrations.}
Type EntryList = array[0..mEntry] of integer; {Identifies texts by their index.}
var EntryText: array[1..mEntry] of string; {Inbto this array.}
var nEntry: integer; {The current number.}
Function AddEntry(x: string): integer; {Add another text to the collection.}
Begin {Could extend to checking for duplicates via a sorted list...}
if nEntry >= mEntry then Croak('Too many entries!'); {Perhaps not!}
inc(nEntry); {So, another.}
EntryText[nEntry]:=x; {Placed.}
AddEntry:=nEntry; {The caller will want to know where.}
End; {of AddEntry.}
Function TextOrder(i,j: integer): boolean; {This is easy.}
Begin {But despite being only one statement, and simple at that,}
TextOrder:=EntryText[i] <= EntryText[j]; {Begin...End is insisted upon.}
End; {of TextOrder.}
Function NaturalOrder(e1,e2: integer): boolean;{Not so easy.}
const Article: array[1..3] of string[4] = ('A ','AN ','THE '); {Each with its trailing space.}
Function Crush(var c: char): char; {Suppresses divergence.}
Begin {To simplify comparisons.}
if c <= ' ' then Crush:=' ' {Crush the fancy control characters.}
else Crush:=UpCase(c); {Also crush a < A or a > A or a = A questions.}
End; {of Crush.}
var Wot: array[1..2] of integer; {Which text is being fingered.}
var Tail: array[1..2] of integer; {Which article has been found at the start.}
var l,lst: array[1..2] of integer; {Finger to the current point, and last character.}
Procedure Librarian; {Initial inspection of the texts.}
var Blocked: boolean; {Further progress may be obstructed.}
var a,is,i: integer; {Odds and ends.}
label Hic; {For escaping the search when a match is complete.}
Begin {There are two texts to inspect.}
for is:=1 to 2 do {Treat them alike.}
begin {This is the first encounter.}
l[is]:=1; {So start the scan with the first character.}
Tail[is]:=0; {No articles found.}
while (l[is] <= lst[is]) and (EntryText[wot[is]][l[is]] <= ' ') do inc(l[is]); {Leading spaceish.}
for a:=1 to 3 do {Try to match an article at the start of the text.}
begin {Each article's text has a trailing space to be matched also.}
i:=0; {Start a for-loop, but with early escape in mind.}
Repeat {Compare successive characters, for i:=0 to a...}
if l[is] + i > lst[is] then Blocked:=true {Probed past the end of text?}
else Blocked:=Crush(EntryText[wot[is]][l[is] + i]) <> Article[a][i + 1]; {No. Compare capitals.}
inc(i); {Stepping on to the next character.}
Until Blocked or (i > a); {Conveniently, Length(Article[a]) = a.}
if not Blocked then {Was a mismatch found?}
begin {No!}
Tail[is]:=a; {So, identify the discovery.}
l[is]:=l[is] + i; {And advance the scan to whatever follows.}
goto Hic; {Escape so as to consider the other text.}
end; {Since two texts are being considered separately.}
end; {Sigh. no "Next a" or similar syntax.}
Hic:dec(l[is]); {Backstep one, ready to advance later.}
end; {Likewise, no "for is:=1 to 2 do ... Next is" syntax.}
End; {of Librarian.}
var c: array[1..2] of string[1]; {Selected by Advance for comparison.}
var d: integer; {Their difference.}
type moody = (Done,Bored,Grist,Numeric); {Might as well have some mnemonics.}
var Mood: array[1..2] of moody; {As the scan proceeds, moods vary.}
var depth: array[1..2] of integer; {Digit depth.}
Procedure Another; {Choose a pair of characters to compare.}
{Digit sequences are special! But periods are ignored, also signs, avoiding confusion over "+6" and " 6".}
var is: integer; {Selects from one text or the other.}
var ll: integer; {Looks past the text into any Article.}
var d: char; {Possibly a digit.}
Begin
for is:=1 to 2 do {Same treatment for both texts.}
begin {Find the next character, and taste it.}
repeat {If already bored, slog through any following spaces.}
inc(l[is]); {So, advance one character onwards.}
ll:=l[is] - lst[is]; {Compare to the end of the normal text.}
if ll <= 0 then c[is]:=Crush(EntryText[wot[is]][l[is]]) {Still in the normal text.}
else if Tail[is] <= 0 then c[is]:='' {Perhaps there is no tail.}
else if ll <= 2 then c[is]:=copy(', ',ll,1) {If there is, this is the junction.}
else if ll <= 2 + Tail[is] then c[is]:=copy(Article[Tail[is]],ll - 2,1) {And this the tail.}
else c[is]:=''; {Actually, the copy would do this.}
until not ((c[is] = ' ') and (Mood[is] = Bored)); {Thus pass multiple enclosed spaces, but not the first.}
if length(c[is]) <= 0 then Mood[is]:=Done {Perhaps we ran off the end, even of the tail.}
else if c[is] = ' ' then Mood[is]:=Bored {The first taste of a space induces boredom.}
else if ('0' <= c[is]) and (c[is] <= '9') then Mood[is]:=Numeric {Paired, evokes special attention.}
else Mood[is]:=Grist; {All else is grist for my comparisons.}
end; {Switch to the next text.}
{Comparing digit sequences is to be done as if numbers. "007" vs "70" is to become vs. "070" by length matching.}
if (Mood[1] = Numeric) and (Mood[2] = Numeric) then {Are both texts yielding a digit?}
begin {Yes. Special treatment impends.}
if (Depth[1] = 0) and (Depth[2] = 0) then {Do I already know how many digits impend?}
for is:=1 to 2 do {No. So for each text,}
repeat {Keep looking until I stop seeing digits.}
inc(Depth[is]); {I am seeing a digit, so there will be one to count.}
ll:=l[is] + Depth[is]; {Finger the next position.}
if ll > lst[is] then d:=null {And if not off the end,}
else d:=EntryText[wot[is]][ll]; {Grab a potential digit.}
until (d < '0') or (d > '9'); {If it is one, probe again.}
if Depth[1] < Depth[2] then {Righto, if the first sequence has fewer digits,}
begin {Supply a free zero.}
dec(Depth[2]); {The second's digit will be consumed.}
dec(l[1]); {The first's will be re-encountered.}
c[1]:='0'; {Here is the zero}
end {For the comparison.}
else if Depth[2] < Depth[1] then {But if the second has fewer digits to come,}
begin {Don't dig into them yet.}
dec(Depth[1]); {The first's digit will be used.}
dec(l[2]); {But the second's seen again.}
c[2]:='0'; {After this has been used}
end {In the comparison.}
else {But if both have the same number of digits remaining,}
begin {Then the comparison is aligned.}
dec(Depth[1]); {So this digit will be used.}
dec(Depth[2]); {As will this.}
end; {In the comparison.}
end; {Thus, arbitrary-size numbers are allowed, as they're never numbers.}
End; {of Another.} {Possibly, the two characters will be the same, and another pair will be requested.}
Begin {of NaturalOrder.}
Wot[1]:=e1; Wot[2]:=e2; {Make the two texts accessible via indexing.}
lst[1]:=Length(EntryText[e1]); {The last character of the first text.}
lst[2]:=Length(EntryText[e2]); {And of the second. Saves on repetition.}
Mood[1]:=Bored; Mood[2]:=Bored; {Behave as if we have already seen a space.}
depth[1]:=0; depth[2]:=0; {And, no digits in concert have been seen.}
Librarian; {Start the inspection.}
repeat {Chug along, until a difference is found.}
Another; {To do so, choose another pair of characters to compare.}
d:=Length(c[2]) - Length(c[1]); {If one text has run out, favour the shorter.}
if (d = 0) and (Length(c[1]) > 0) then d:=ord(c[2][1]) - ord(c[1][1]); {Otherwise, their difference.}
until (d <> 0) or ((Mood[1] = Done) and (Mood[2] = Done)); {Well? Are we there yet?}
NaturalOrder:=d >= 0; {And so, does e1's text precede e2's?}
End; {of NatualOrder.}
var TextSort: boolean; {Because I can't pass a function as a parameter,}
Function InOrder(i,j: integer): boolean; {I can only use one function.}
Begin {Which messes with a selector.}
if TextSort then InOrder:=TextOrder(i,j) {So then,}
else InOrder:=NaturalOrder(i,j); {Which is it to be?}
End; {of InOrder.}
Procedure OrderEntry(var List: EntryList); {Passing a ordinary array is not Pascalish, damnit.}
{Crank up a Comb sort of the entries fingered by List. Working backwards, just for fun.}
{Caution: the H*10/13 means that H ought not be INTEGER*2. Otherwise, use H/1.3.}
var t: integer; {Same type as the elements of List.}
var N,i,h: integer; {Odds and ends.}
var happy: boolean; {To be attained.}
Begin
N:=List[0]; {Extract the count.}
h:=N - 1; {"Last" - "First", and not +1.}
if h <= 0 then exit; {Ha ha.}
Repeat {Start the pounding.}
h:=LongInt(h)*10 div 13; {Beware overflow, or, use /1.3.}
if h <= 0 then h:=1; {No "max" function, damnit.}
if (h = 9) or (h = 10) then h:=11; {A fiddle.}
happy:=true; {No disorder seen.}
for i:=N - h downto 1 do {So, go looking. If h = 1, this is a Bubblesort.}
if not InOrder(List[i],List[i + h]) then {How about this pair?}
begin {Alas.}
t:=List[i]; List[i]:=List[i + h]; List[i + h]:=t;{No Swap(a,b), damnit.}
happy:=false; {Disorder has been discovered.}
end; {On to the next comparison.}
Until happy and (h = 1); {No suspicion remains?}
End; {of OrderEntry.}
var Item,Fancy: EntryList; {Two lists of entry indices.}
var i: integer; {A stepper.}
var t1: string; {A scratchpad.}
BEGIN
nEntry:=0; {No entries are stored.}
i:=0; {Start a stepper.}
inc(i);Item[i]:=AddEntry('ignore leading spaces: 2-2');
inc(i);Item[i]:=AddEntry(' ignore leading spaces: 2-1');
inc(i);Item[i]:=AddEntry(' ignore leading spaces: 2+0');
inc(i);Item[i]:=AddEntry(' ignore leading spaces: 2+1');
inc(i);Item[i]:=AddEntry('ignore m.a.s spaces: 2-2');
inc(i);Item[i]:=AddEntry('ignore m.a.s spaces: 2-1');
inc(i);Item[i]:=AddEntry('ignore m.a.s spaces: 2+0');
inc(i);Item[i]:=AddEntry('ignore m.a.s spaces: 2+1');
inc(i);Item[i]:=AddEntry('Equiv.'+' '+'spaces: 3-3');
inc(i);Item[i]:=AddEntry('Equiv.'+CR+'spaces: 3-2'); {CR can't appear as itself.}
inc(i);Item[i]:=AddEntry('Equiv.'+FF+'spaces: 3-1'); {As it is used to mark line endings.}
inc(i);Item[i]:=AddEntry('Equiv.'+VT+'spaces: 3+0'); {And if typed in an editor,}
inc(i);Item[i]:=AddEntry('Equiv.'+LF+'spaces: 3+1'); {It is acted upon there and then.}
inc(i);Item[i]:=AddEntry('Equiv.'+HT+'spaces: 3+2'); {So, name instead of value.}
inc(i);Item[i]:=AddEntry('cASE INDEPENDENT: 3-2');
inc(i);Item[i]:=AddEntry('caSE INDEPENDENT: 3-1');
inc(i);Item[i]:=AddEntry('casE INDEPENDENT: 3+0');
inc(i);Item[i]:=AddEntry('case INDEPENDENT: 3+1');
inc(i);Item[i]:=AddEntry('foo100bar99baz0.txt');
inc(i);Item[i]:=AddEntry('foo100bar10baz0.txt');
inc(i);Item[i]:=AddEntry('foo1000bar99baz10.txt');
inc(i);Item[i]:=AddEntry('foo1000bar99baz9.txt');
inc(i);Item[i]:=AddEntry('The Wind in the Willows');
inc(i);Item[i]:=AddEntry('The 40th step more');
inc(i);Item[i]:=AddEntry('The 39 steps');
inc(i);Item[i]:=AddEntry('Wanda');
{inc(i);Item[i]:=AddEntry('The Worth of Wirth''s Way');}
Item[0]:=nEntry; {Complete the EntryList protocol.}
for i:=0 to nEntry do Fancy[i]:=Item[i]; {Sigh. Fancy:=Item.}
TextSort:=true; OrderEntry(Item); {Plain text ordering.}
TextSort:=false; OrderEntry(Fancy); {Natural order.}
WriteLn(' Text order Natural order');
for i:=1 to nEntry do
begin
t1:=DeFang(EntryText[Item[i]]);
WriteLn(Item[i]:3,'|',t1,Space(30 - length(t1)),' ',
Fancy[i]:3,'|',DeFang(EntryText[Fancy[i]]));
end;
END.