20 lines
1.1 KiB
Common Lisp
20 lines
1.1 KiB
Common Lisp
(defun utf-8-to-unicode (byte-list)
|
|
"Take a list of one to four utf-8 encoded bytes (octets), return a code point."
|
|
(let ((b1 (car byte-list)))
|
|
(cond ((ascii-byte-p b1) b1) ; if a single byte, just return it.
|
|
((multi-byte-p b1)
|
|
(if (lead-byte-p b1)
|
|
(let ((n (n-trail-bytes b1))
|
|
;; Content bits we want to extract from each lead byte.
|
|
(lead-templates (list #b01111111 #b00011111 #b00001111 #b00000111))
|
|
;; Content bits we want to extract from each trail byte.
|
|
(trail-template #b00111111))
|
|
(if (= n (1- (list-length byte-list)))
|
|
;; add lead byte
|
|
(+ (ash (logand (nth 0 byte-list) (nth n lead-templates)) (* 6 n))
|
|
;; and the trail bytes
|
|
(loop for i from 1 to n sum
|
|
(ash (logand (nth i byte-list) trail-template) (* 6 (- n i)))))
|
|
(error "calculated number of bytes doesnt match the length of the byte list")))
|
|
(error "first byte in the list isnt a lead byte"))))))
|