Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # An adapter that takes CSV as input, performs a MIME/utf-8 decoding, then returns the CSV results
- import csv,sys,commands,socket,email.header,re,codecs,traceback
- def decode_string(encoded_string):
- match = re.search('(=\?.*?\?(Q|B)\?)', encoded_string)
- if not match:
- return encoded_string
- try:
- header_string = email.header.decode_header(encoded_string)
- except:
- return encoded_string
- try:
- codecs.lookup(header_string[0][1])
- except:
- return "Codec not found"
- decoded = header_string[0][0].decode(header_string[0][1],"replace")
- decoded_utf8 = decoded.encode('utf-8')
- return decoded_utf8
- def main():
- if len(sys.argv) != 3:
- print "Usage: python decode_string.py [encoded field] [decoded field]"
- sys.exit(0)
- encoded_stringf = sys.argv[1]
- decoded_stringf = sys.argv[2]
- r = csv.reader(sys.stdin)
- w = None
- header = []
- first = True
- for line in r:
- if first:
- header = line
- if encoded_stringf not in header or decoded_stringf not in header:
- print "Encoded string field and decoded string field must exist in CSV data"
- sys.exit(0)
- csv.writer(sys.stdout).writerow(header)
- w = csv.DictWriter(sys.stdout, header)
- first = False
- continue
- # Read the result
- result = {}
- i = 0
- while i < len(header):
- if i < len(line):
- result[header[i]] = line[i]
- else:
- result[header[i]] = ''
- i += 1
- if len(result[encoded_stringf]):
- result[decoded_stringf] = decode_string(result[encoded_stringf])
- if len(result[decoded_stringf]):
- w.writerow(result)
- if __name__=="__main__":
- try:
- main()
- except:
- traceback.print_exc(file=open("/tmp/errlog.txt","a"))
- sys.exit(1)
Advertisement
Add Comment
Please, Sign In to add comment