42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
import re
|
|
import zipfile
|
|
import StringIO
|
|
|
|
def munge2(readings):
|
|
|
|
datePat = re.compile(r'\d{4}-\d{2}-\d{2}')
|
|
valuPat = re.compile(r'[-+]?\d+\.\d+')
|
|
statPat = re.compile(r'-?\d+')
|
|
allOk, totalLines = 0, 0
|
|
datestamps = set([])
|
|
for line in readings:
|
|
totalLines += 1
|
|
fields = line.split('\t')
|
|
date = fields[0]
|
|
pairs = [(fields[i],fields[i+1]) for i in range(1,len(fields),2)]
|
|
|
|
lineFormatOk = datePat.match(date) and \
|
|
all( valuPat.match(p[0]) for p in pairs ) and \
|
|
all( statPat.match(p[1]) for p in pairs )
|
|
if not lineFormatOk:
|
|
print 'Bad formatting', line
|
|
continue
|
|
|
|
if len(pairs)!=24 or any( int(p[1]) < 1 for p in pairs ):
|
|
print 'Missing values', line
|
|
continue
|
|
|
|
if date in datestamps:
|
|
print 'Duplicate datestamp', line
|
|
continue
|
|
datestamps.add(date)
|
|
allOk += 1
|
|
|
|
print 'Lines with all readings: ', allOk
|
|
print 'Total records: ', totalLines
|
|
|
|
#zfs = zipfile.ZipFile('readings.zip','r')
|
|
#readings = StringIO.StringIO(zfs.read('readings.txt'))
|
|
readings = open('readings.txt','r')
|
|
munge2(readings)
|