RosettaCodeData/Task/Text-processing-1/Scala/text-processing-1-1.scala

59 lines
2.4 KiB
Scala

object DataMunging {
import scala.io.Source
def spans[A](list: List[A]) = list.tail.foldLeft(List((list.head, 1))) {
case ((a, n) :: tail, b) if a == b => (a, n + 1) :: tail
case (l, b) => (b, 1) :: l
}
type Flag = ((Boolean, Int), String)
type Flags = List[Flag]
type LineIterator = Iterator[Option[(Double, Int, Flags)]]
val pattern = """^(\d+-\d+-\d+)""" + """\s+(\d+\.\d+)\s+(-?\d+)""" * 24 + "$" r;
def linesIterator(file: java.io.File) = Source.fromFile(file).getLines().map(
pattern findFirstMatchIn _ map (
_.subgroups match {
case List(date, rawData @ _*) =>
val dataset = (rawData map (_ toDouble) iterator) grouped 2 toList;
val valid = dataset filter (_.last > 0) map (_.head)
val validSize = valid length;
val validSum = valid sum;
val flags = spans(dataset map (_.last > 0)) map ((_, date))
println("Line: %11s Reject: %2d Accept: %2d Line_tot: %10.3f Line_avg: %10.3f" format
(date, 24 - validSize, validSize, validSum, validSum / validSize))
(validSum, validSize, flags)
}
)
)
def totalizeLines(fileIterator: LineIterator) =
fileIterator.foldLeft(0.0, 0, List[Flag]()) {
case ((totalSum, totalSize, ((flag, size), date) :: tail), Some((validSum, validSize, flags))) =>
val ((firstFlag, firstSize), _) = flags.last
if (firstFlag == flag) {
(totalSum + validSum, totalSize + validSize, flags.init ::: ((flag, size + firstSize), date) :: tail)
} else {
(totalSum + validSum, totalSize + validSize, flags ::: ((flag, size), date) :: tail)
}
case ((_, _, Nil), Some(partials)) => partials
case (totals, None) => totals
}
def main(args: Array[String]) {
val files = args map (new java.io.File(_)) filter (file => file.isFile && file.canRead)
val lines = files.iterator flatMap linesIterator
val (totalSum, totalSize, flags) = totalizeLines(lines)
val ((_, invalidCount), startDate) = flags.filter(!_._1._1).max
val report = """|
|File(s) = %s
|Total = %10.3f
|Readings = %6d
|Average = %10.3f
|
|Maximum run(s) of %d consecutive false readings began at %s""".stripMargin
println(report format (files mkString " ", totalSum, totalSize, totalSum / totalSize, invalidCount, startDate))
}
}