RosettaCodeData/Task/Text-processing-2/Ruby/text-processing-2.rb

require 'set'

def munge2(readings, debug=false)
   datePat = /^\d{4}-\d{2}-\d{2}/
   valuPat = /^[-+]?\d+\.\d+/
   statPat = /^-?\d+/
   totalLines = 0
   dupdate, badform, badlen, badreading = Set[], Set[], Set[], 0
   datestamps = Set[[]]
   for line in readings
      totalLines += 1
      fields = line.split(/\t/)
      date = fields.shift
      pairs = fields.enum_slice(2).to_a

      lineFormatOk = date =~ datePat &&
        pairs.all? { |x,y| x =~ valuPat && y =~ statPat }
      if !lineFormatOk
         puts 'Bad formatting ' + line if debug
         badform << date
      end

      if pairs.length != 24 ||
           pairs.any? { |x,y| y.to_i < 1 }
         puts 'Missing values ' + line if debug
      end
      if pairs.length != 24
         badlen << date
      end
      if pairs.any? { |x,y| y.to_i < 1 }
         badreading += 1
      end

      if datestamps.include?(date)
         puts 'Duplicate datestamp ' + line if debug
         dupdate << date
      end

      datestamps << date
   end

   puts 'Duplicate dates:', dupdate.sort.map { |x| '  ' + x }
   puts 'Bad format:', badform.sort.map { |x| '  ' + x }
   puts 'Bad number of fields:', badlen.sort.map { |x| '  ' + x }
   puts 'Records with good readings: %i = %5.2f%%' % [
      totalLines-badreading, (totalLines-badreading)/totalLines.to_f*100 ]
   puts
   puts 'Total records:  %d' % totalLines
end

open('readings.txt','r') do |readings|
   munge2(readings)
end