Advertisement
Guest User

Untitled

a guest
Jun 25th, 2019
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.09 KB | None | 0 0
  1. from fuzzywuzzy import fuzz
  2. CRITERIAS = [40, 45, 50, 55, 60]
  3. CRITERIA_OUTPUTS = [open("/Users/rustamislamnurov/Downloads/report_criteria_%s.tsv" % c, "w+") for c in CRITERIAS]
  4.  
  5. with open("/Users/rustamislamnurov/Downloads/report1.tsv", "r") as f:
  6. content = f.read()
  7. lines = content.split("\n")
  8. for line in lines:
  9. print(line)
  10. date, time, address_from, address_to, price, p = line.split("\t")
  11. rmax1 = 0
  12. rmax_rec1 = 0
  13. rmax2 = 0
  14. rmax_rec2 = 0
  15. for it in id_list:
  16. r1 = fuzz.ratio(address_from, it[1])
  17. if r1 > rmax1:
  18. rmax1 = r1
  19. rmax_rec1 = it
  20.  
  21. r2 = fuzz.ratio(address_to, it[1])
  22. if r2 > rmax2:
  23. rmax2 = r2
  24. rmax_rec2 = it
  25.  
  26. for idx, c in enumerate(CRITERIAS):
  27. if rmax1 >= c and rmax2 >= c:
  28. CRITERIA_OUTPUTS[idx].write(f'date\t{time}\t{rmax_rec1[0]}\t{rmax_rec2[0]}\n')
  29. # print(line)
  30. # print(rmax1, rmax_rec1)
  31. for f in CRITERIA_OUTPUTS:
  32. f.flush()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement