RosettaCodeData/Task/Text-processing-2/Python/text-processing-2-1.py

42 lines
1.1 KiB
Python

import re
import zipfile
import StringIO
def munge2(readings):
datePat = re.compile(r'\d{4}-\d{2}-\d{2}')
valuPat = re.compile(r'[-+]?\d+\.\d+')
statPat = re.compile(r'-?\d+')
allOk, totalLines = 0, 0
datestamps = set([])
for line in readings:
totalLines += 1
fields = line.split('\t')
date = fields[0]
pairs = [(fields[i],fields[i+1]) for i in range(1,len(fields),2)]
lineFormatOk = datePat.match(date) and \
all( valuPat.match(p[0]) for p in pairs ) and \
all( statPat.match(p[1]) for p in pairs )
if not lineFormatOk:
print 'Bad formatting', line
continue
if len(pairs)!=24 or any( int(p[1]) < 1 for p in pairs ):
print 'Missing values', line
continue
if date in datestamps:
print 'Duplicate datestamp', line
continue
datestamps.add(date)
allOk += 1
print 'Lines with all readings: ', allOk
print 'Total records: ', totalLines
#zfs = zipfile.ZipFile('readings.zip','r')
#readings = StringIO.StringIO(zfs.read('readings.txt'))
readings = open('readings.txt','r')
munge2(readings)