Advertisement
Guest User

CSV match

a guest
Jun 9th, 2017
242
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.05 KB | None | 0 0
  1. import pandas as pd
  2. from fuzzywuzzy import fuzz
  3.  
  4. df1 = pd.read_csv("JonnyTheBoy10.csv", usecols=['ART_TIPO', 'ART_DESIG', 'PORTA', 'CP4', 'CP3', 'LOCALIDADE'], low_memory=False)
  5. df2 = pd.read_csv("JonnyTheBoyFull.csv", usecols=['ART_TIPO', 'ART_DESIG', 'PORTA', 'CP4', 'CP3', 'LOCALIDADE'], low_memory=False)
  6.  
  7. print("Total rows: {0}".format(len(df2)))
  8. print("Total rows: {0}".format(len(df1)))
  9.  
  10. for line in df1.iterrows():
  11. for row in df2.iterrows():
  12. if df1['CP4'] == df2['CP4'] and df1["CP3"] == df2["CP3"]:
  13.  
  14. # Write all the match in file
  15.  
  16. if row == 1:
  17. df2.to_csv('Norm.csv')
  18. print("ok")
  19.  
  20. # csv csv
  21. f = open("clean_csv.csv", "w")
  22. f.truncate()
  23. f.close()
  24. else:
  25. c = fuzz.token_sort_ratio(line, row)
  26. print("teste 1:", c)
  27. if c >= 93:
  28. df2.to_csv('Norm.csv')
  29. print("2 ok")
  30.  
  31. # csv csv
  32. f = open("clean_csv.csv", "w")
  33. f.truncate()
  34. f.close()
  35. else:
  36. c = fuzz.token_sort_ratio(line, row)
  37. print("Down of 93%\n\n")
  38. if c >= 87: # Done
  39. df2.to_csv('Norm.csv')
  40.  
  41. # csv csv
  42. f = open("clean_csv.csv", "w")
  43. f.truncate()
  44. f.close()
  45. else: # Done
  46. print("Down of 85%\n\n")
  47. df1.to_csv('AI.csv')
  48.  
  49. # csv csv
  50. f = open("clean_csv.csv", "w")
  51. f.truncate()
  52. f.close()
  53.  
  54. elif df1['CP4'] == df2['CP4']: # Done
  55.  
  56. # Write all the match in csv
  57.  
  58. c = fuzz.token_sort_ratio(line, row)
  59.  
  60. if c >= 87: # Done
  61. df2.to_csv('Norm.csv')
  62.  
  63. # clean csv
  64. f = open("clean_csv.csv", "w")
  65. f.truncate()
  66. f.close()
  67. else: # Done
  68. print("Down of 85%\n\n")
  69. df1.to_csv('AI.csv')
  70.  
  71. # clean csv
  72. f = open("clean_csv.csv", "w")
  73. f.truncate()
  74. f.close()
  75.  
  76. elif df1['LOCALIDADE'] == df2['LOCALIDADE']:
  77.  
  78. # Write all the match in csv
  79.  
  80. c = fuzz.token_sort_ratio(line, row)
  81.  
  82. if c >= 85: # Done
  83. df2.to_csv('Norm.csv')
  84.  
  85. # clean csv
  86. f = open("clean_csv.csv", "w")
  87. f.truncate()
  88. f.close()
  89. else: # Done
  90. print("Down of 85%\n\n")
  91. df1.to_csv('AI.csv')
  92.  
  93. # clean csv
  94. f = open("clean_csv.csv", "w")
  95. f.truncate()
  96. f.close()
  97.  
  98. else: # Done
  99. pass
  100.  
  101. print("Done")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement