RosettaCodeData/Task/UTF-8-encode-and-decode/Forth/utf-8-encode-and-decode-2.fth

22 lines
685 B
Forth

-77 Constant UTF-8-err
$80 Constant max-single-byte
: u8@+ ( u8addr -- u8addr' u )
count dup max-single-byte u< ?EXIT \ special case ASCII
dup $C2 u< IF UTF-8-err throw THEN \ malformed character
$7F and $40 >r
BEGIN dup r@ and WHILE r@ xor
6 lshift r> 5 lshift >r >r count
dup $C0 and $80 <> IF UTF-8-err throw THEN
$3F and r> or
REPEAT rdrop ;
: u8!+ ( u u8addr -- u8addr' )
over max-single-byte u< IF tuck c! 1+ EXIT THEN \ special case ASCII
>r 0 swap $3F
BEGIN 2dup u> WHILE
2/ >r dup $3F and $80 or swap 6 rshift r>
REPEAT $7F xor 2* or r>
BEGIN over $80 u>= WHILE tuck c! 1+ REPEAT nip ;