55 lines
2.0 KiB
Plaintext
55 lines
2.0 KiB
Plaintext
using DataFrames
|
|
|
|
function mungdata(filename)
|
|
lines = readlines(filename)
|
|
numlines = length(lines)
|
|
dates = Array{DateTime, 1}(numlines)
|
|
means = zeros(Float64, numlines)
|
|
numvalid = zeros(Int, numlines)
|
|
invalidlength = zeros(Int, numlines)
|
|
invalidpos = zeros(Int, numlines)
|
|
datamatrix = Array{Float64,2}(numlines, 24)
|
|
datamatrix .= NaN
|
|
totalsum = 0.0
|
|
totalgood = 0
|
|
for (linenum,line) in enumerate(lines)
|
|
data = split(line)
|
|
validcount = badlength = 0
|
|
validsum = 0.0
|
|
for i in 2:2:length(data)-1
|
|
if parse(Int, data[i+1]) >= 0
|
|
validsum += (datamatrix[linenum, Int(i/2)] = parse(Float64, data[i]))
|
|
validcount += 1
|
|
badlength = 0
|
|
else
|
|
badlength += 1
|
|
if badlength > invalidlength[linenum]
|
|
invalidlength[linenum] = badlength
|
|
invalidpos[linenum] = Int(i/2) - invalidlength[linenum] + 1
|
|
end
|
|
end
|
|
end
|
|
dates[linenum] = DateTime(data[1], "y-m-d")
|
|
means[linenum] = validsum / validcount
|
|
numvalid[linenum] = validcount
|
|
totalsum += validsum
|
|
totalgood += validcount
|
|
end
|
|
dt = DataFrame(Date = dates, Mean = means, ValidValues = numvalid,
|
|
MaximumGap = invalidlength, GapPosition = invalidpos)
|
|
for i in 1:size(datamatrix)[2]
|
|
dt[Symbol("$(i-1):00")] = datamatrix[:,i]
|
|
end
|
|
dt, totalsum/totalgood
|
|
end
|
|
|
|
datafilename = "data.txt" # this is taken from the example listed on the task, since the actual text file is not available
|
|
df, dmean = mungdata(datafilename)
|
|
println(df)
|
|
println("The overall mean is $dmean")
|
|
maxbadline = indmax(df[:MaximumGap])
|
|
maxbadval = df[:MaximumGap][maxbadline]
|
|
maxbadtime = df[:GapPosition][maxbadline] - 1
|
|
maxbaddate = replace("$(df[:Date][maxbadline])", r"T.+$", "")
|
|
println("The largest run of bad values is $(maxbadval), on $(maxbaddate) beginning at $(maxbadtime):00 hours.")
|