SHARE
TWEET

Untitled

a guest Jun 25th, 2019 59 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from fuzzywuzzy import fuzz
  2. CRITERIAS = [40, 45, 50, 55, 60]
  3. CRITERIA_OUTPUTS = [open("/Users/rustamislamnurov/Downloads/report_criteria_%s.tsv" % c, "w+") for c in CRITERIAS]
  4.  
  5. with open("/Users/rustamislamnurov/Downloads/report1.tsv", "r") as f:
  6.     content = f.read()
  7.     lines = content.split("\n")
  8.     for line in lines:
  9.         print(line)
  10.         date, time, address_from, address_to, price, p = line.split("\t")
  11.         rmax1 = 0
  12.         rmax_rec1 = 0
  13.         rmax2 = 0
  14.         rmax_rec2 = 0
  15.         for it in id_list:
  16.             r1 = fuzz.ratio(address_from, it[1])
  17.             if r1 > rmax1:
  18.                 rmax1 = r1
  19.                 rmax_rec1 = it
  20.  
  21.             r2 = fuzz.ratio(address_to, it[1])
  22.             if r2 > rmax2:
  23.                 rmax2 = r2
  24.                 rmax_rec2 = it
  25.  
  26.         for idx, c in enumerate(CRITERIAS):
  27.             if rmax1 >= c and rmax2 >= c:
  28.                 CRITERIA_OUTPUTS[idx].write(f'date\t{time}\t{rmax_rec1[0]}\t{rmax_rec2[0]}\n')
  29.                 # print(line)
  30.                 # print(rmax1, rmax_rec1)
  31. for f in CRITERIA_OUTPUTS:
  32.     f.flush()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top