44 lines
1.8 KiB
Scala
44 lines
1.8 KiB
Scala
import java.lang.Character._; import scala.annotation.tailrec
|
|
|
|
object CharacterCode extends App {
|
|
def intToChars(n: Int): Array[Char] = java.lang.Character.toChars(n)
|
|
|
|
def UnicodeToList(UTFstring: String) = {
|
|
@tailrec
|
|
def inner(str: List[Char], acc: List[String], surrogateHalf: Option[Char]): List[String] = {
|
|
(str, surrogateHalf) match {
|
|
case (Nil, _) => acc
|
|
case (ch :: rest, None) => if (ch.isSurrogate) inner(rest, acc, Some(ch))
|
|
else inner(rest, acc :+ ch.toString, None)
|
|
case (ch :: rest, Some(f)) => inner(rest, (acc :+ (f.toString + ch)), None)
|
|
}
|
|
}
|
|
inner(UTFstring.toList, Nil, None)
|
|
}
|
|
|
|
def UnicodeToInt(utf: String) = {
|
|
def charToInt(high: Char, low: Char) =
|
|
{ if (isSurrogatePair(high, low)) toCodePoint(high, low) else high.toInt }
|
|
charToInt(utf(0), if (utf.size > 1) utf(1) else 0)
|
|
}
|
|
|
|
def UTFtoHexString(utf: String) = { utf.map(ch => f"${ch.toInt}%04X").mkString("\"\\u", "\\u", "\"") }
|
|
|
|
def flags(ch: String) = { // Testing Unicode character properties
|
|
(if (ch matches "\\p{M}") "Y" else "N") + (if (ch matches "\\p{Mn}") "Y" else "N")
|
|
}
|
|
|
|
val str = '\uFEFF' /*big-endian BOM*/ + "\u0301a" +
|
|
"$áabcde¢£¤¥©ÇßIJijŁłʒλπक्तु•₠₡₢₣₤₥₦₧₨₩₪₫€₭₮₯₰₱₲₳₴₵℃←→⇒∙⌘☃☹☺☻ア字文𠀀" + intToChars(173733).mkString
|
|
|
|
println(s"Example string: $str")
|
|
println(""" | Chr C/C++/Java source Code Point Hex Dec Mn Name
|
|
!----+ --- ------------------------- ------- -------- -- """.stripMargin('!') + "-" * 27)
|
|
|
|
(UnicodeToList(str)).zipWithIndex.map {
|
|
case (coll, nr) =>
|
|
f"$nr%4d: $coll\t${UTFtoHexString(coll)}%27s U+${UnicodeToInt(coll)}%05X" +
|
|
f"${"(" + UnicodeToInt(coll).toString}%8s) ${flags(coll)} ${getName(coll(0).toInt)} "
|
|
}.foreach(println)
|
|
}
|