40 lines
1.6 KiB
Scala
40 lines
1.6 KiB
Scala
object DataMunging2 {
|
|
import scala.io.Source
|
|
import scala.collection.immutable.{TreeMap => Map}
|
|
|
|
val pattern = """^(\d+-\d+-\d+)""" + """\s+(\d+\.\d+)\s+(-?\d+)""" * 24 + "$" r;
|
|
|
|
def main(args: Array[String]) {
|
|
val files = args map (new java.io.File(_)) filter (file => file.isFile && file.canRead)
|
|
val (numFormatErrors, numValidRecords, dateMap) =
|
|
files.iterator.flatMap(file => Source fromFile file getLines ()).
|
|
foldLeft((0, 0, new Map[String, Int] withDefaultValue 0)) {
|
|
case ((nFE, nVR, dM), line) => pattern findFirstMatchIn line map (_.subgroups) match {
|
|
case Some(List(date, rawData @ _*)) =>
|
|
val allValid = (rawData map (_ toDouble) iterator) grouped 2 forall (_.last > 0)
|
|
(nFE, nVR + (if (allValid) 1 else 0), dM(date) += 1)
|
|
case None => (nFE + 1, nVR, dM)
|
|
}
|
|
}
|
|
|
|
dateMap foreach {
|
|
case (date, repetitions) if repetitions > 1 => println(date+": "+repetitions+" repetitions")
|
|
case _ =>
|
|
}
|
|
|
|
println("""|
|
|
|Valid records: %d
|
|
|Duplicated dates: %d
|
|
|Duplicated records: %d
|
|
|Data format errors: %d
|
|
|Invalid data records: %d
|
|
|Total records: %d""".stripMargin format (
|
|
numValidRecords,
|
|
dateMap filter { case (_, repetitions) => repetitions > 1 } size,
|
|
dateMap.valuesIterable filter (_ > 1) map (_ - 1) sum,
|
|
numFormatErrors,
|
|
dateMap.valuesIterable.sum - numValidRecords,
|
|
dateMap.valuesIterable.sum))
|
|
}
|
|
}
|