Advertisement
Guest User

Untitled

a guest
Jan 22nd, 2017
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.40 KB | None | 0 0
  1. import pandas as pd
  2. from operator import itemgetter
  3. from fuzzywuzzy import fuzz
  4. from fuzzywuzzy import process
  5. beall_df = pd.read_csv('Beall_list.txt', header=None, sep='\t', names=['names'])
  6. j1_df = pd.read_csv('J1.csv', header=None, names=['names'])
  7. j2_df = pd.read_csv('J2.csv', header=None, names=['names'])
  8. j3_df = pd.read_csv('J3.csv', header=None, names=['names'])
  9. j4_df = pd.read_csv('J4.csv', header=None, names=['names'])
  10. j5_df = pd.read_csv('J5.csv', header=None, names=['names'])
  11.  
  12. beall_df_names = beall_df.names.str.replace(' ','').str.lower()
  13. j1_df_names = j1_df.names.str.replace(' ','').str.lower().tolist()
  14. j2_df_names = j2_df.names.str.replace(' ','').str.lower().tolist()
  15. j3_df_names = j3_df.names.str.replace(' ','').str.lower().tolist()
  16. j4_df_names = j4_df.names.str.replace(' ','').str.lower().tolist()
  17. j5_df_names = j5_df.names.str.replace(' ','').str.lower().tolist()
  18.  
  19.  
  20. j1_matches = [('xxxxx',0)]
  21. j2_matches = [('xxxxx',0)]
  22. j3_matches = [('xxxxx',0)]
  23. j4_matches = [('xxxxx',0)]
  24. j5_matches = [('xxxxx',0)]
  25.  
  26.  
  27. for name in beall_df_names:
  28. match = process.extractOne(name, j1_df_names)
  29. j1_matches.append(match)
  30.  
  31. match = process.extractOne(name, j2_df_names)
  32. j2_matches.append(match)
  33.  
  34. match = process.extractOne(name, j3_df_names)
  35. j3_matches.append(match)
  36.  
  37. match = process.extractOne(name, j4_df_names)
  38. j4_matches.append(match)
  39.  
  40. match = process.extractOne(name, j5_df_names)
  41. j5_matches.append(match)
  42.  
  43. with open('j1_matches.csv', 'w') as fh:
  44. for item in j1_matches:
  45. if item:
  46. fh.write("{},{}\n".format(item[0], item[1]))
  47.  
  48. with open('j2_matches.csv', 'w') as fh:
  49. for item in j2_matches:
  50. if item:
  51. fh.write("{},{}\n".format(item[0], item[1]))
  52.  
  53. with open('j3_matches.csv', 'w') as fh:
  54. for item in j3_matches:
  55. if item:
  56. fh.write("{},{}\n".format(item[0], item[1]))
  57.  
  58.  
  59. with open('j4_matches.csv', 'w') as fh:
  60. for item in j4_matches:
  61. if item:
  62. fh.write("{},{}\n".format(item[0], item[1]))
  63.  
  64.  
  65. with open('j5_matches.csv', 'w') as fh:
  66. for item in j5_matches:
  67. if item:
  68. fh.write("{},{}\n".format(item[0], item[1]))
  69. """
  70. Run:
  71.  
  72. $ sort --field-separator=',' -k2 -n j1_matches.csv
  73. $ sort --field-separator=',' -k2 -n j2_matches.csv
  74. $ sort --field-separator=',' -k2 -n j3_matches.csv
  75. $ sort --field-separator=',' -k2 -n j4_matches.csv
  76. $ sort --field-separator=',' -k2 -n j5_matches.csv
  77.  
  78.  
  79.  
  80. """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement