Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import binascii
- DATABASE_FOLDER = 'database'
- OUT_FILE = 'out.txt'
- def bitstring_to_bytes(s):
- v = int(s, 2)
- b = bytearray()
- while v:
- b.append(v & 0xff)
- v >>= 8
- return bytes(b[::-1])
- def bistring_to_char(s):
- return (bitstring_to_bytes(s).decode('utf-8'))
- def remove_encoding_problems(text):
- out_text = ''
- stack = []
- for letter in text:
- binary = bin(int(binascii.hexlify(letter.encode('utf-8')), 16))
- if len(binary) != 18:
- out_text += letter
- continue
- else:
- stack.append(binary[2:])
- if len(stack) == 2:
- correct_bits = stack[0][0:8] + stack[1][8:16]
- correct_char = bistring_to_char(correct_bits)
- out_text += correct_char
- stack = []
- return out_text
- def from_csv(file):
- bixos = []
- bixos_info = []
- spamreader = csv.reader(file, delimiter=',', quotechar='|')
- for row in spamreader:
- NUSP = row[1]
- nome = row[2]
- email = row[5].replace(' ','')
- bixos.append(NUSP)
- bixos_info.append([NUSP, nome, email])
- return bixos, bixos_info
- def write_lines(lines, file_name):
- file = open(file_name, 'w')
- for line in lines:
- file.write(line + '\n')
- file.close()
- pass
- def main():
- file = open('file.csv')
- text = file.read()
- file.close()
- file = open('file_clean.csv', 'w')
- text_ok = remove_encoding_problems(text)
- # text_ok = remove_encoding_problems(text)
- file.write(text_ok)
- file.close()
- file = open('file_clean.csv', 'r')
- bixos, bixos_info = from_csv(file)
- file.close()
- write_lines(bixos, 'bixos')
- for bixo_info in bixos_info:
- write_lines(bixo_info, bixo_info[0])
- if __name__ == '__main__':
- import pathlib
- folder = pathlib.Path(__file__).parent.absolute().parts[-1]
- if folder != DATABASE_FOLDER:
- print('please use this in database folder')
- else:
- main()
- else:
- print('DO NOT IMPORT "main.py"')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement