RosettaCodeData/Task/Tokenize-a-string-with-esca.../SNOBOL4/tokenize-a-string-with-esca...

76 lines
2.0 KiB
Plaintext

* Program: tokenize_with_escape.sbl
* To run: sbl tokenize_with_escape.sbl
* Description: Tokenize a string with escaping
* Comment: Tested using the Spitbol for Linux version of SNOBOL4
lf = substr(&alphabet,11,1) ;* New line or line feed
* Function tokenize will break parts out of a string, which are
* separated by c, which defaults to a comma, into
* an array. Parameter kp=1 to keep null parts, which is the default,
* and 0 to discard.
define('tokenize(s,c,kp)tokenizepat,part,t,i,j')
:(tokenize_end)
tokenize
c = (ident(c) ',', substr(c,1,1)) :f(freturn)
kp = (ident(kp) 1, eq(kp,0) 0, 1) :f(freturn)
t = table()
tokenizepat = breakx(c) . part c | (len(1) rem) . part
s ? eq(kp,1) rtab(1) c = s c
tokenize1
s ? tokenizepat = "" :f(tokenize2)
t[i = eq(kp,0) differ(part) i + 1] = part
t[i = eq(kp,1) i + 1] = part
:(tokenize1)
tokenize2
tokenize = array(i) :f(errr)
j = 0
tokenize3 tokenize[j = lt(j,i) j + 1] = t[j] :s(tokenize3)
:(return)
tokenize_end
* Function tokcan will a normalize a string by applying separator and escape
* rules to string ts. Parameter sep is the separator, while esc is the escape
* character. Parameter tesc is the new separator character to substitute for
* parameter sep. It defaults to a comma, ",".
define('tokcan(ts,sep,esc,tesc)tpat,part1,part2,notany') :(tokcan_end)
tokcan
tesc = (ident(tesc) ',', substr(tesc,1,1))
tpat = (breakx(sep esc) . part1
+ (sep | esc sep | esc esc | (esc len(1) . notany)) . part2
+ )
+ | (len(1) rem) . part1
tokcan1
ts ? tpat = :f(tokcan2)
part2 = (leq(part2,sep) tesc
+ ,leq(part2,esc sep) sep
+ ,leq(part2,esc esc) esc
+ ,differ(notany) leq(part2,esc notany) notany
+ )
tokcan = (ident(tokcan) "", tokcan) part1 part2
:(tokcan1)
tokcan2
:(return)
tokcan_end
test_string = "one^|uno||three^^^^|four^^^|^cuatro|"
sep = "|"
esc = "^"
hline = tokcan(test_string,sep,esc) :f(err)
output = " Input: " test_string lf
output = "Output1: " hline lf
output = "Output2: "
tokenized = tokenize(hline,",")
p1 output = "'" tokenized[z = z + 1] "'" :s(p1)
END