RosettaCodeData/Task/Character-codes/Scala/character-codes-2.scala

44 lines
1.8 KiB
Scala

import java.lang.Character._; import scala.annotation.tailrec
object CharacterCode extends App {
def intToChars(n: Int): Array[Char] = java.lang.Character.toChars(n)
def UnicodeToList(UTFstring: String) = {
@tailrec
def inner(str: List[Char], acc: List[String], surrogateHalf: Option[Char]): List[String] = {
(str, surrogateHalf) match {
case (Nil, _) => acc
case (ch :: rest, None) => if (ch.isSurrogate) inner(rest, acc, Some(ch))
else inner(rest, acc :+ ch.toString, None)
case (ch :: rest, Some(f)) => inner(rest, (acc :+ (f.toString + ch)), None)
}
}
inner(UTFstring.toList, Nil, None)
}
def UnicodeToInt(utf: String) = {
def charToInt(high: Char, low: Char) =
{ if (isSurrogatePair(high, low)) toCodePoint(high, low) else high.toInt }
charToInt(utf(0), if (utf.size > 1) utf(1) else 0)
}
def UTFtoHexString(utf: String) = { utf.map(ch => f"${ch.toInt}%04X").mkString("\"\\u", "\\u", "\"") }
def flags(ch: String) = { // Testing Unicode character properties
(if (ch matches "\\p{M}") "Y" else "N") + (if (ch matches "\\p{Mn}") "Y" else "N")
}
val str = '\uFEFF' /*big-endian BOM*/ + "\u0301a" +
"$áabcde¢£¤¥©ÇßIJijŁłʒλπक्तु•₠₡₢₣₤₥₦₧₨₩₪₫€₭₮₯₰₱₲₳₴₵℃←→⇒∙⌘☃☹☺☻ア字文𠀀" + intToChars(173733).mkString
println(s"Example string: $str")
println(""" | Chr C/C++/Java source Code Point Hex Dec Mn Name
!----+ --- ------------------------- ------- -------- -- """.stripMargin('!') + "-" * 27)
(UnicodeToList(str)).zipWithIndex.map {
case (coll, nr) =>
f"$nr%4d: $coll\t${UTFtoHexString(coll)}%27s U+${UnicodeToInt(coll)}%05X" +
f"${"(" + UnicodeToInt(coll).toString}%8s) ${flags(coll)} ${getName(coll(0).toInt)} "
}.foreach(println)
}