RosettaCodeData/Task/Text-processing-1/Clojure/text-processing-1.clj

64 lines
2.1 KiB
Clojure

(ns rosettacode.textprocessing1
(:require [clojure.string :as str]))
(defn parse-line [s]
(let [[date & data-toks] (str/split s #"\s+")]
{:date date
:hour-vals (for [[v flag] (partition 2 data-toks)]
{:val (Double. v)
:flag (Long. flag)})}))
(defn analyze-line [m]
(let [valid? (fn [rec] (pos? (:flag rec)))
data (->> (filter valid? (:hour-vals m))
(map :val))
n-vals (count data)
sum (reduce + data)]
{:date (:date m)
:n-vals n-vals
:sum (double sum)
:avg (if (zero? n-vals) 0.0 (/ sum n-vals))
:gaps (for [hr (:hour-vals m)]
{:gap? (not (valid? hr)) :date (:date m)})}))
(defn print-line [m]
(println (format "%s: %d valid, sum: %7.3f, mean: %6.3f"
(:date m)
(:n-vals m)
(:sum m)
(:avg m))))
(defn process-line [s]
(let [m (parse-line s)
line-info (analyze-line m)]
(print-line line-info)
line-info))
(defn update-file-stats [file-m line-m]
(let [append (fn [a b] (reduce conj a b))]
(-> file-m
(update-in [:sum] + (:sum line-m))
(update-in [:n-vals] + (:n-vals line-m))
(update-in [:gap-recs] append (:gaps line-m)))))
(defn process-file [path]
(let [file-lines (->> (slurp path)
str/split-lines)
summary (reduce (fn [res line]
(update-file-stats res (process-line line)))
{:sum 0
:n-vals 0
:gap-recs []}
file-lines)
max-gap (->> (partition-by :gap? (:gap-recs summary))
(filter #(:gap? (first %)))
(sort-by count >)
first)]
(println (format "Sum: %f\n# Values: %d\nAvg: %f"
(:sum summary)
(:n-vals summary)
(/ (:sum summary) (:n-vals summary))))
(println (format "Max gap of %d recs started on %s"
(count max-gap)
(:date (first max-gap))))))