76 lines
2.0 KiB
Plaintext
76 lines
2.0 KiB
Plaintext
* Program: tokenize_with_escape.sbl
|
|
* To run: sbl tokenize_with_escape.sbl
|
|
* Description: Tokenize a string with escaping
|
|
* Comment: Tested using the Spitbol for Linux version of SNOBOL4
|
|
|
|
lf = substr(&alphabet,11,1) ;* New line or line feed
|
|
|
|
|
|
* Function tokenize will break parts out of a string, which are
|
|
* separated by c, which defaults to a comma, into
|
|
* an array. Parameter kp=1 to keep null parts, which is the default,
|
|
* and 0 to discard.
|
|
define('tokenize(s,c,kp)tokenizepat,part,t,i,j')
|
|
:(tokenize_end)
|
|
tokenize
|
|
c = (ident(c) ',', substr(c,1,1)) :f(freturn)
|
|
kp = (ident(kp) 1, eq(kp,0) 0, 1) :f(freturn)
|
|
t = table()
|
|
tokenizepat = breakx(c) . part c | (len(1) rem) . part
|
|
s ? eq(kp,1) rtab(1) c = s c
|
|
tokenize1
|
|
s ? tokenizepat = "" :f(tokenize2)
|
|
t[i = eq(kp,0) differ(part) i + 1] = part
|
|
t[i = eq(kp,1) i + 1] = part
|
|
:(tokenize1)
|
|
tokenize2
|
|
tokenize = array(i) :f(errr)
|
|
j = 0
|
|
tokenize3 tokenize[j = lt(j,i) j + 1] = t[j] :s(tokenize3)
|
|
:(return)
|
|
tokenize_end
|
|
|
|
|
|
* Function tokcan will a normalize a string by applying separator and escape
|
|
* rules to string ts. Parameter sep is the separator, while esc is the escape
|
|
* character. Parameter tesc is the new separator character to substitute for
|
|
* parameter sep. It defaults to a comma, ",".
|
|
define('tokcan(ts,sep,esc,tesc)tpat,part1,part2,notany') :(tokcan_end)
|
|
tokcan
|
|
tesc = (ident(tesc) ',', substr(tesc,1,1))
|
|
tpat = (breakx(sep esc) . part1
|
|
+ (sep | esc sep | esc esc | (esc len(1) . notany)) . part2
|
|
+ )
|
|
+ | (len(1) rem) . part1
|
|
|
|
tokcan1
|
|
ts ? tpat = :f(tokcan2)
|
|
part2 = (leq(part2,sep) tesc
|
|
+ ,leq(part2,esc sep) sep
|
|
+ ,leq(part2,esc esc) esc
|
|
+ ,differ(notany) leq(part2,esc notany) notany
|
|
+ )
|
|
tokcan = (ident(tokcan) "", tokcan) part1 part2
|
|
:(tokcan1)
|
|
tokcan2
|
|
:(return)
|
|
tokcan_end
|
|
|
|
|
|
test_string = "one^|uno||three^^^^|four^^^|^cuatro|"
|
|
sep = "|"
|
|
esc = "^"
|
|
|
|
hline = tokcan(test_string,sep,esc) :f(err)
|
|
|
|
|
|
output = " Input: " test_string lf
|
|
output = "Output1: " hline lf
|
|
|
|
output = "Output2: "
|
|
tokenized = tokenize(hline,",")
|
|
|
|
p1 output = "'" tokenized[z = z + 1] "'" :s(p1)
|
|
|
|
END
|