Text to XML Parser

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re
import datetime
import codecs
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup


start = datetime.datetime.now()
file = codecs.open('150326.tjd','r','Windows-1250')
source = file.read()

print "Starting parser...."

xml = re.sub(r'&',r'&amp;', source)
xml = re.sub(r'-NAZOV-(.*?(\n))',r'<article>\n<name>\1</name>\n', xml)
xml = re.sub(r'-ZDROJ-(.*?(\n))',r'<source>\1</source>\n', xml)
xml = re.sub(r'-CISLO-(.*?(\n))',r'<number>\1</number>\n', xml)
xml = re.sub(r'-STRANA-(.*?(\n))',r'<page>\1</page>\n', xml)
xml = re.sub(r'-DATUM-(.*?(\n))',r'<date>\1</date>\n', xml)
xml = re.sub(r'-AUTOR-(.*?(\n))',r'<author>\1</author>\n', xml)
xml = re.sub(r'-POZN-(.*?(\n))',r'<description>\1</description>\n', xml)
xml = re.sub(r'-PRILOHA-(.*?(\n))',r'<attachment>\1</attachment>\n', xml)
xml = re.sub(r'-REGION-(.*?(\n))',r'<region>\1</region>\n', xml)
xml = re.sub(r'-TEXT-(.*?(\n))',r'<text>', xml)
xml = re.sub(r'-END-(.*?(\n))',r'</text>\n</article>', xml)

xml = "<?xml version=\"1.0\"?>\n<import>\n" + xml + "</import>"

print "Finishing parser...."

doc = BeautifulSoup(xml, 'xml')
output = codecs.open( 'xmlout.xml', 'w','UTF-8' )
output.write( doc.prettify() )
output.close()

print "Working on parsing\n"

tree = ET.parse('xmlout.xml')
root = tree.getroot()

def get_values(num):
    article_name = root[num][0].text
    print "Name: ", article_name.encode('UTF-8')
    article_source = root[num][1].text
    print "Source: ",article_source.encode('UTF-8')
    article_number = root[num][2].text
    print "Number: ", article_number.encode('UTF-8')
    article_page = root[num][3].text
    print "Page: ", article_page.encode('UTF-8')
    article_date = root[num][4].text
    print "Date: ", article_date.encode('UTF-8')
    article_author = root[num][5].text
    print "Author: ", article_author.encode('UTF-8')
    article_description = root[num][6].text
    print "Description:", article_description.encode('UTF-8')
    article_attachment = root[num][7].text
    print "Attachment: ", article_attachment.encode('UTF-8')
    article_region = root[num][8].text
    print "Region: ", article_region.encode('UTF-8')
    article_text = root[num][9].text
    print "Text: ", article_text.encode('UTF-8')

num = 0
for child in root:
    print "Article number from source file: " , num +1
    get_values(num)

    num += 1

print "Number of articles : ", len(root.findall("./"))

stop = datetime.datetime.now()
took = stop - start

print "All tasks finished. It took: ", int(took.total_seconds() * 1000), " milliseconds"