Advertisement
Guest User

Untitled

a guest
Feb 23rd, 2020
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.85 KB | None | 0 0
  1. import csv
  2. import binascii
  3.  
  4. DATABASE_FOLDER = 'database'
  5. OUT_FILE = 'out.txt'
  6.  
  7. def bitstring_to_bytes(s):
  8.     v = int(s, 2)
  9.     b = bytearray()
  10.     while v:
  11.         b.append(v & 0xff)
  12.         v >>= 8
  13.     return bytes(b[::-1])
  14.  
  15. def bistring_to_char(s):
  16.     return (bitstring_to_bytes(s).decode('utf-8'))
  17.  
  18. def remove_encoding_problems(text):
  19.     out_text = ''
  20.  
  21.     stack = []
  22.  
  23.     for letter in text:
  24.         binary = bin(int(binascii.hexlify(letter.encode('utf-8')), 16))
  25.         if len(binary) != 18:
  26.             out_text += letter
  27.             continue
  28.         else:
  29.             stack.append(binary[2:])
  30.             if len(stack) == 2:
  31.                 correct_bits = stack[0][0:8] + stack[1][8:16]
  32.                 correct_char = bistring_to_char(correct_bits)
  33.                 out_text += correct_char
  34.                 stack = []
  35.     return out_text
  36.  
  37. def from_csv(file):
  38.     bixos = []
  39.     bixos_info = []
  40.     spamreader = csv.reader(file, delimiter=',', quotechar='|')
  41.     for row in spamreader:
  42.         NUSP = row[1]
  43.         nome = row[2]
  44.         email = row[5].replace(' ','')
  45.         bixos.append(NUSP)
  46.         bixos_info.append([NUSP, nome, email])
  47.     return bixos, bixos_info
  48.  
  49.  
  50. def write_lines(lines, file_name):
  51.     file = open(file_name, 'w')
  52.     for line in lines:
  53.         file.write(line + '\n')
  54.  
  55.     file.close()
  56.     pass
  57.  
  58.  
  59. def main():
  60.     file = open('file.csv')
  61.     text = file.read()
  62.     file.close()
  63.  
  64.     file = open('file_clean.csv', 'w')
  65.  
  66.     text_ok = remove_encoding_problems(text)
  67.     # text_ok = remove_encoding_problems(text)
  68.     file.write(text_ok)
  69.     file.close()
  70.  
  71.     file = open('file_clean.csv', 'r')
  72.     bixos, bixos_info = from_csv(file)
  73.     file.close()
  74.  
  75.     write_lines(bixos, 'bixos')
  76.     for bixo_info in bixos_info:
  77.         write_lines(bixo_info, bixo_info[0])
  78.  
  79.  
  80.  
  81.  
  82. if __name__ == '__main__':
  83.     import pathlib
  84.     folder = pathlib.Path(__file__).parent.absolute().parts[-1]
  85.     if folder != DATABASE_FOLDER:
  86.         print('please use this in database folder')
  87.     else:
  88.         main()
  89. else:
  90.     print('DO NOT IMPORT "main.py"')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement