RosettaCodeData/Task/Multisplit/ALGOL-68/multisplit.alg

67 lines
2.9 KiB
Plaintext

# split a string based on a number of separators #
# MODE to hold the split results #
MODE SPLITINFO = STRUCT( STRING text # delimited string, may be empty #
, INT position # starting position of the token #
, STRING delimiter # the delimiter that terminated the token #
);
# calculates the length of string s #
OP LENGTH = ( STRING s )INT: ( UPB s + 1 ) - LWB s;
# returns TRUE if s starts with p, FALSE otherwise #
PRIO STARTSWITH = 5;
OP STARTSWITH = ( STRING s, p )BOOL: IF LENGTH p > LENGTH s THEN FALSE ELSE s[ LWB s : ( LWB s + LENGTH p ) - 1 ] = p FI;
# returns an array of SPLITINFO describing the tokens in str based on the delimiters #
# zero-length delimiters are ignored #
PRIO SPLIT = 5;
OP SPLIT = ( STRING str, []STRING delimiters )[]SPLITINFO:
BEGIN
# count the number of tokens #
# allow there to be as many tokens as characters in the string + 2 #
# that would cater for a string composed of delimiters only #
[ 1 : ( UPB str + 3 ) - LWB str ]SPLITINFO tokens;
INT token count := 0;
INT str pos := LWB str;
INT str max = UPB str;
BOOL token pending := FALSE;
# construct the tokens #
str pos := LWB str;
INT prev pos := LWB str;
token count := 0;
token pending := FALSE;
WHILE str pos <= str max
DO
BOOL found delimiter := FALSE;
FOR d FROM LWB delimiters TO UPB delimiters WHILE NOT found delimiter DO
IF LENGTH delimiters[ d ] > 0 THEN
IF found delimiter := str[ str pos : ] STARTSWITH delimiters[ d ] THEN
token count +:= 1;
tokens[ token count ] := ( str[ prev pos : str pos - 1 ], prev pos, delimiters[ d ] );
str pos +:= LENGTH delimiters[ d ];
prev pos := str pos;
token pending := FALSE
FI
FI
OD;
IF NOT found delimiter THEN
# the current character is part of s token #
token pending := TRUE;
str pos +:= 1
FI
OD;
IF token pending THEN
# there is an additional token after the final delimiter #
token count +:= 1;
tokens[ token count ] := ( str[ prev pos : ], prev pos, "" )
FI;
# return an array of the actual tokens #
tokens[ 1 : token count ]
END # SPLIT # ;
# test the SPLIT operator #
[]SPLITINFO test tokens = "a!===b=!=c" SPLIT []STRING( "==", "!=", "=" );
FOR t FROM LWB test tokens TO UPB test tokens DO
SPLITINFO token = test tokens[ t ];
print( ( "token: [", text OF token, "] at: ", whole( position OF token, 0 ), " delimiter: (", delimiter OF token, ")", newline ) )
OD