Advertisement
nullzero

For P'Fai 2

Jun 24th, 2017
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.74 KB | None | 0 0
  1. import os
  2. import xml.etree.ElementTree as ET
  3. import re
  4. import csv
  5.  
  6. def format_time(secs):
  7.     mins = secs / 60
  8.     hrs = int(mins / 60)
  9.     mins = int(mins - hrs * 60)
  10.     secs -= (hrs * 3600 + mins * 60)
  11.     return "{:02d}:{:02d}:{:05.2f}".format(hrs, mins, secs)
  12.  
  13. fieldnames = ["Start time", "End time", "Duration (sec.)", "Row name", "Instance number", "Num. of Labels", "Instance ID", "Instance note", "No Group"]
  14.  
  15. for f in os.listdir('.'):
  16.     if f.endswith('.xml'):
  17.         print "Processing {}".format(f)
  18.         with open(f) as fp:
  19.             content = fp.read().replace('\x00', '').replace('\r', '\n')[2:]
  20.         content = re.sub("< */?.*?>", lambda x: x.group().lower(), content)
  21.         e = ET.fromstring(content)
  22.         with open(f + '.csv', 'w') as csvfile:
  23.             writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC)
  24.             writer.writeheader()
  25.             for instance in e.iter('instance'):
  26.                 idv = int(instance.find('id').text)
  27.                 start = float(instance.find('start').text)
  28.                 end = float(instance.find('end').text)
  29.                 code = instance.find('code').text
  30.                 labels = [l.text for l in instance.iter('text')]
  31.                 writer.writerow({
  32.                     "Start time": format_time(start),
  33.                     "End time": format_time(end),
  34.                     "Duration (sec.)": "{:.2f}".format(end - start),
  35.                     "Row name": code,
  36.                     "Instance number": 0, # TODO: edit here
  37.                     "Num. of Labels": len(labels),
  38.                     "Instance ID": idv,
  39.                     "Instance note": "",
  40.                     "No Group": '\n'.join(labels)
  41.                 })
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement