RosettaCodeData/Task/Natural-sorting/Kotlin/natural-sorting.kts

182 lines
5.1 KiB
Kotlin
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// version 1.1.4-3
val r2 = Regex("""[ ]{2,}""")
val r3 = Regex("""\s""") // \s represents any whitespace character
val r5 = Regex("""\d+""")
/** Only covers ISO-8859-1 accented characters plus (for consistency) Ÿ */
val ucAccented = arrayOf("ÀÁÂÃÄÅ", "Ç", "ÈÉÊË", "ÌÍÎÏ", "Ñ", "ÒÓÔÕÖØ", "ÙÚÛÜ", "ÝŸ")
val lcAccented = arrayOf("àáâãäå", "ç", "èéêë", "ìíîï", "ñ", "òóôõöø", "ùúûü", "ýÿ")
val ucNormal = "ACEINOUY"
val lcNormal = "aceinouy"
/** Only the commoner ligatures */
val ucLigatures = "ÆIJŒ"
val lcLigatures = "æijœ"
val ucSeparated = arrayOf("AE", "IJ", "OE")
val lcSeparated = arrayOf("ae", "ij", "oe")
/** Miscellaneous replacements */
val miscLetters = "ßſʒ"
val miscReplacements = arrayOf("ss", "s", "s")
/** Displays strings including whitespace as if the latter were literal characters */
fun String.toDisplayString(): String {
val whitespace = arrayOf("\t", "\n", "\u000b", "\u000c", "\r")
val whitespace2 = arrayOf("\\t", "\\n", "\\u000b", "\\u000c", "\\r")
var s = this
for (i in 0..4) s = s.replace(whitespace[i], whitespace2[i])
return s
}
/** Ignoring leading space(s) */
fun selector1(s: String) = s.trimStart(' ')
/** Ignoring multiple adjacent spaces i.e. condensing to a single space */
fun selector2(s: String) = s.replace(r2, " ")
/** Equivalent whitespace characters (equivalent to a space say) */
fun selector3(s: String) = s.replace(r3, " ")
/** Case independent sort */
fun selector4(s: String) = s.toLowerCase()
/** Numeric fields as numerics (deals with up to 20 digits) */
fun selector5(s: String) = r5.replace(s) { it.value.padStart(20, '0') }
/** Title sort */
fun selector6(s: String): String {
if (s.startsWith("the ", true)) return s.drop(4)
if (s.startsWith("an ", true)) return s.drop(3)
if (s.startsWith("a ", true)) return s.drop(2)
return s
}
/** Equivalent accented characters (and case) */
fun selector7(s: String): String {
val sb = StringBuilder()
outer@ for (c in s) {
for ((i, ucs) in ucAccented.withIndex()) {
if (c in ucs) {
sb.append(ucNormal[i])
continue@outer
}
}
for ((i, lcs) in lcAccented.withIndex()) {
if (c in lcs) {
sb.append(lcNormal[i])
continue@outer
}
}
sb.append(c)
}
return sb.toString().toLowerCase()
}
/** Separated ligatures */
fun selector8(s: String): String {
var ss = s
for ((i, c) in ucLigatures.withIndex()) ss = ss.replace(c.toString(), ucSeparated[i])
for ((i, c) in lcLigatures.withIndex()) ss = ss.replace(c.toString(), lcSeparated[i])
return ss
}
/** Character replacements */
fun selector9(s: String): String {
var ss = s
for ((i, c) in miscLetters.withIndex()) ss = ss.replace(c.toString(), miscReplacements[i])
return ss
}
fun main(args: Array<String>) {
println("The 9 string lists, sorted 'naturally':\n")
val s1 = arrayOf(
"ignore leading spaces: 2-2",
" ignore leading spaces: 2-1",
" ignore leading spaces: 2+0",
" ignore leading spaces: 2+1"
)
s1.sortBy(::selector1)
println(s1.map { "'$it'" }.joinToString("\n"))
val s2 = arrayOf(
"ignore m.a.s spaces: 2-2",
"ignore m.a.s spaces: 2-1",
"ignore m.a.s spaces: 2+0",
"ignore m.a.s spaces: 2+1"
)
println()
s2.sortBy(::selector2)
println(s2.map { "'$it'" }.joinToString("\n"))
val s3 = arrayOf(
"Equiv. spaces: 3-3",
"Equiv.\rspaces: 3-2",
"Equiv.\u000cspaces: 3-1",
"Equiv.\u000bspaces: 3+0",
"Equiv.\nspaces: 3+1",
"Equiv.\tspaces: 3+2"
)
println()
s3.sortBy(::selector3)
println(s3.map { "'$it'".toDisplayString() }.joinToString("\n"))
val s4 = arrayOf(
"cASE INDEPENENT: 3-2",
"caSE INDEPENENT: 3-1",
"casE INDEPENENT: 3+0",
"case INDEPENENT: 3+1"
)
println()
s4.sortBy(::selector4)
println(s4.map { "'$it'" }.joinToString("\n"))
val s5 = arrayOf(
"foo100bar99baz0.txt",
"foo100bar10baz0.txt",
"foo1000bar99baz10.txt",
"foo1000bar99baz9.txt"
)
println()
s5.sortBy(::selector5)
println(s5.map { "'$it'" }.joinToString("\n"))
val s6 = arrayOf(
"The Wind in the Willows",
"The 40th step more",
"The 39 steps",
"Wanda"
)
println()
s6.sortBy(::selector6)
println(s6.map { "'$it'" }.joinToString("\n"))
val s7 = arrayOf(
"Equiv. ý accents: 2-2",
"Equiv. Ý accents: 2-1",
"Equiv. y accents: 2+0",
"Equiv. Y accents: 2+1"
)
println()
s7.sortBy(::selector7)
println(s7.map { "'$it'" }.joinToString("\n"))
val s8 = arrayOf(
"IJ ligatured ij",
"no ligature"
)
println()
s8.sortBy(::selector8)
println(s8.map { "'$it'" }.joinToString("\n"))
val s9 = arrayOf(
"Start with an ʒ: 2-2",
"Start with an ſ: 2-1",
"Start with an ß: 2+0",
"Start with an s: 2+1"
)
println()
s9.sortBy(::selector9)
println(s9.map { "'$it'" }.joinToString("\n"))
}