Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from fuzzywuzzy import fuzz
- CRITERIAS = [40, 45, 50, 55, 60]
- CRITERIA_OUTPUTS = [open("/Users/rustamislamnurov/Downloads/report_criteria_%s.tsv" % c, "w+") for c in CRITERIAS]
- with open("/Users/rustamislamnurov/Downloads/report1.tsv", "r") as f:
- content = f.read()
- lines = content.split("\n")
- for line in lines:
- print(line)
- date, time, address_from, address_to, price, p = line.split("\t")
- rmax1 = 0
- rmax_rec1 = 0
- rmax2 = 0
- rmax_rec2 = 0
- for it in id_list:
- r1 = fuzz.ratio(address_from, it[1])
- if r1 > rmax1:
- rmax1 = r1
- rmax_rec1 = it
- r2 = fuzz.ratio(address_to, it[1])
- if r2 > rmax2:
- rmax2 = r2
- rmax_rec2 = it
- for idx, c in enumerate(CRITERIAS):
- if rmax1 >= c and rmax2 >= c:
- CRITERIA_OUTPUTS[idx].write(f'date\t{time}\t{rmax_rec1[0]}\t{rmax_rec2[0]}\n')
- # print(line)
- # print(rmax1, rmax_rec1)
- for f in CRITERIA_OUTPUTS:
- f.flush()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement