Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- # -*- coding: UTF-8 -*-
- import re
- import os
- import collections
- import dicttoxml
- import xmltodict, json
- from collections import defaultdict
- from time import gmtime, strftime
- from reportlab.lib.units import inch
- from reportlab.lib.pagesizes import A4
- from reportlab.lib.styles import ParagraphStyle
- from reportlab.lib.styles import getSampleStyleSheet
- from reportlab.platypus import SimpleDocTemplate, Spacer, Paragraph, PageBreak
- style = ParagraphStyle(
- name='Normal',
- fontName='Helvetica-Bold',
- fontSize=9,
- )
- print strftime("%Y-%m-%d %H:%M:%S", gmtime())
- filesindir = os.listdir(os.getcwd())
- def returnasstring(arr):
- str = ''
- for s in arr:
- str += s + ' '
- return str.strip()
- def isEmpty(dictionary):
- for element in dictionary:
- if element:
- return True
- return False
- def rdicombine(filesindir):
- pages = ''
- rowcount = 0
- filsescount = 0
- for file in filesindir:
- if file.endswith('txt'):
- print u'loading...' + str(file)
- filsescount += 1
- with open(file) as f:
- page = f.readlines()
- for row in page:
- rowcount += 1
- row = returnasstring(re.sub('\s\s+', ' ', row.strip()).split(' '))
- pages += row + '\n'
- if filsescount == 0:
- raise u'no datebase files found heare pal!'
- return pages.strip()
- def billparser(billslist):
- id = ''
- blokcs = []
- dicts = {}
- for row in billslist:
- variable = row.split()[0]
- data = returnasstring(row.split()[1:])
- if variable == 'DT_PARTNER-PARTNER':
- id = data
- if variable == 'DGV_END':
- dicts[id] = blokcs
- blokcs = []
- else:
- blokcs.append(data)
- #print str(billcount) + ' bills found'
- return dicts
- def parstoxml(mainlist):
- print 'build xml loading...'
- xml = dicttoxml.dicttoxml(mainlist)
- xml = re.sub('type="str"', '', xml)
- xml = re.sub('type="list"', '', xml)
- xml = re.sub('type="dict"', '', xml)
- return xml
- def xmltojson(xml):
- j = xmltodict.parse(xml)
- json_data = json.dumps(j)
- return json_data
- def docbuilder(json_data, docname, l, r, t, b):
- frame = {'leftMargin': l, 'rightMargin': r, 'topMargin': t, 'bottomMargin': b}
- doc = SimpleDocTemplate(docname + '.pdf', pagesize=A4, **frame)
- elements = []
- for each in json_data:
- client_id = each
- data = each
- #elements.append(Spacer(1, 0.2 * inch))
- #ements.append(Paragraph("Paragraphs are a kind of Flowable. ", style))
- #elements.append(Spacer(1, 0.2 * inch))
- #elements.append(PageBreak())
- #doc.build(elements)
- baselists = rdicombine(filesindir).split('\n')
- mainlist = billparser(baselists)
- xml = parstoxml(mainlist)
- json_data = xmltojson(xml)
- docbuilder(mainlist, u'! Test', 0, 0, 0.3, 0.3)
- with open('sw_database_xml.xml', 'w') as file:
- print 'save xml...'
- file.write(xml)
- # file.write(re.sub('type="str"', '', xml))
- print 'done :)'
- print strftime("%Y-%m-%d %H:%M:%S", gmtime())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement