64 lines
2.1 KiB
Clojure
64 lines
2.1 KiB
Clojure
(ns rosettacode.textprocessing1
|
|
(:require [clojure.string :as str]))
|
|
|
|
(defn parse-line [s]
|
|
(let [[date & data-toks] (str/split s #"\s+")]
|
|
{:date date
|
|
:hour-vals (for [[v flag] (partition 2 data-toks)]
|
|
{:val (Double. v)
|
|
:flag (Long. flag)})}))
|
|
|
|
(defn analyze-line [m]
|
|
(let [valid? (fn [rec] (pos? (:flag rec)))
|
|
data (->> (filter valid? (:hour-vals m))
|
|
(map :val))
|
|
n-vals (count data)
|
|
sum (reduce + data)]
|
|
{:date (:date m)
|
|
:n-vals n-vals
|
|
:sum (double sum)
|
|
:avg (if (zero? n-vals) 0.0 (/ sum n-vals))
|
|
:gaps (for [hr (:hour-vals m)]
|
|
{:gap? (not (valid? hr)) :date (:date m)})}))
|
|
|
|
(defn print-line [m]
|
|
(println (format "%s: %d valid, sum: %7.3f, mean: %6.3f"
|
|
(:date m)
|
|
(:n-vals m)
|
|
(:sum m)
|
|
(:avg m))))
|
|
|
|
(defn process-line [s]
|
|
(let [m (parse-line s)
|
|
line-info (analyze-line m)]
|
|
(print-line line-info)
|
|
line-info))
|
|
|
|
(defn update-file-stats [file-m line-m]
|
|
(let [append (fn [a b] (reduce conj a b))]
|
|
(-> file-m
|
|
(update-in [:sum] + (:sum line-m))
|
|
(update-in [:n-vals] + (:n-vals line-m))
|
|
(update-in [:gap-recs] append (:gaps line-m)))))
|
|
|
|
(defn process-file [path]
|
|
(let [file-lines (->> (slurp path)
|
|
str/split-lines)
|
|
summary (reduce (fn [res line]
|
|
(update-file-stats res (process-line line)))
|
|
{:sum 0
|
|
:n-vals 0
|
|
:gap-recs []}
|
|
file-lines)
|
|
max-gap (->> (partition-by :gap? (:gap-recs summary))
|
|
(filter #(:gap? (first %)))
|
|
(sort-by count >)
|
|
first)]
|
|
(println (format "Sum: %f\n# Values: %d\nAvg: %f"
|
|
(:sum summary)
|
|
(:n-vals summary)
|
|
(/ (:sum summary) (:n-vals summary))))
|
|
(println (format "Max gap of %d recs started on %s"
|
|
(count max-gap)
|
|
(:date (first max-gap))))))
|