Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- """
- Inspired by: http://www.sebastianviereck.de/mysql-php-umlaute-sonderzeichen-utf8-iso/
- """
- # suppress deprecation warnings
- import warnings
- warnings.filterwarnings("ignore", category=DeprecationWarning)
- import sys
- from StringIO import StringIO
- import csv
- # translation table
- TABLE = {
- 'ü': 'ü',
- 'ä': 'ä',
- 'ö': 'ö',
- 'Ö': 'Ö',
- 'ß': 'ß',
- 'Ã ': 'à',
- 'á': 'á',
- 'â': 'â',
- 'ã': 'ã',
- 'ù': 'ù',
- 'ú': 'ú',
- 'û': 'û',
- 'Ù': 'Ù',
- 'Ú': 'Ú',
- 'Û': 'Û',
- 'Ãœ': 'Ü',
- 'ò': 'ò',
- 'ó': 'ó',
- 'ô': 'ô',
- 'è': 'è',
- 'é': 'é',
- 'ê': 'ê',
- 'ë': 'ë',
- 'À': 'À',
- 'Ã': 'Á',
- 'Â': 'Â',
- 'Ã': 'Ã',
- 'Ä': 'Ä',
- 'Ã…': 'Å',
- 'Ç': 'Ç',
- 'È': 'È',
- 'É': 'É',
- 'Ê': 'Ê',
- 'Ë': 'Ë',
- 'ÃŒ': 'Ì',
- 'Ã': 'Í',
- 'ÃŽ': 'Î',
- 'Ã': 'Ï',
- 'Ñ': 'Ñ',
- 'Ã’': 'Ò',
- 'Ó': 'Ó',
- 'Ô': 'Ô',
- 'Õ': 'Õ',
- 'Ø': 'Ø',
- 'Ã¥': 'å',
- 'æ': 'æ',
- 'ç': 'ç',
- 'ì': 'ì',
- 'Ã': 'í',
- 'î': 'î',
- 'ï': 'ï',
- 'ð': 'ð',
- 'ñ': 'ñ',
- 'õ': 'õ',
- 'ø': 'ø',
- 'ý': 'ý',
- 'ÿ': 'ÿ',
- '€': '€'
- }
- def encode(s, coding="iso-8859-15"):
- return s.encode(coding)
- def decode(s, coding="utf-8"):
- return s.decode(coding)
- def main(inFile, outFile):
- # read whole file
- with open(inFile, 'r') as f:
- data = f.read()
- data = decode(data)
- # replace characters
- for x, y in TABLE.items():
- data = data.replace(decode(x), decode(y))
- data = encode(data)
- # parse data as CSV file
- data = StringIO(data)
- reader = csv.reader(data, delimiter=':', quoting=csv.QUOTE_NONE)
- # add column to CSV
- csv_ = []
- # no do something that is not really important to understand this issue...
- for r, row in enumerate(reader):
- row_ = []
- for c, col in enumerate(row):
- # add extra column before column no. 6
- if c == 5:
- # add header field
- if r == 0:
- row_.append("JobId")
- else:
- row_.append(col.split('_')[0])
- row_.append(col)
- csv_.append(row_)
- # write as CSV using Excel dialect
- with open(outFile, 'wb') as w:
- writer = csv.writer(w, dialect=csv.excel, delimiter=';', quoting=csv.QUOTE_MINIMAL)
- writer.writerows(csv_)
- if __name__ == "__main__":
- inFile = sys.argv[1]
- outFile = sys.argv[2]
- main(inFile, outFile)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement