SHARE
TWEET

Untitled

a guest Oct 13th, 2019 89 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ## Transform string to list in order to simplify the analysis.
  2. output = open("/home/felix/Documents/TokyoTech/NaturalLanguageProcessing/Assignment3/dictionnaire.txt", "a+")
  3.  
  4. with open("/home/felix/Documents/TokyoTech/NaturalLanguageProcessing/Assignment3/atraiter.txt") as f:
  5.     content = f.readlines()
  6.  
  7. content = [x.strip() for x in content]
  8. content_reforme = []
  9.  
  10. for k in content:
  11.     content_reforme.append(k.split())
  12.  
  13. ## Creation of a 'dictionnary' giving the most used POS for a given word.
  14. # For the sake of simplicity, the tables table_of_words and table_of_POS are
  15. # also created.
  16. n = len (content_reforme)
  17.  
  18. max_current = int(content_reforme[0][0])
  19. num_max = 0
  20.  
  21. table_of_words = [] # Table of words. The are unique, and their index match
  22.                     # the index of their POS in table_of_POS.
  23. table_of_POS = [] # Table of the most used POS for a word.
  24.  
  25. for i in range(n-1):
  26.     if content_reforme[i][1] == content_reforme[i+1][1]:
  27.         if int(content_reforme[i+1][0]) > max_current:
  28.             max_current = int(content_reforme[i+1][0])
  29.             num_max = i+1
  30.    
  31.     else:
  32.         output.write(content_reforme[num_max][1]+' '+content_reforme[num_max][2]+'\n')
  33.         table_of_words.append(content_reforme[num_max][1])
  34.         table_of_POS.append(content_reforme[num_max][2])
  35.         max_current = int(content_reforme[i+1][0])
  36.         num_max = i+1
  37.        
  38.         if i+1 == n-1:
  39.             output.write(content_reforme[i+1][1]+' '+content_reforme[i+1][2]+'\n')
  40.             table_of_words.append(content_reforme[i+1][1])
  41.             table_of_POS.append(content_reforme[i+1][2])
  42. ## Close the files
  43. output.close()
  44. f.close()
  45.  
  46. ## Creation of POS tags using baseline method
  47. output = open("/home/felix/Documents/TokyoTech/NaturalLanguageProcessing/Assignment3/POS_baseline.txt", "a+")
  48.  
  49. with open("/home/felix/Documents/TokyoTech/NaturalLanguageProcessing/Assignment3/wordsToFindPOS.txt") as f:
  50.     content = f.readlines()
  51.  
  52. content = [x.strip() for x in content]
  53.  
  54. for k in content:
  55.     i = table_of_words.index(k)
  56.     pos = table_of_POS[i]
  57.     output.write(k+' '+pos+'\n')
  58.  
  59. output.close()
  60.  
  61. ## Close the files
  62. output.close()
  63. f.close()
  64.  
  65. ## Compare the baseline POS to the original POS
  66. with open("/home/felix/Documents/TokyoTech/NaturalLanguageProcessing/Assignment3/POS_baseline.txt") as f:
  67.     originalPOS = f.readlines()
  68.  
  69. originalPOS = [x.strip() for x in originalPOS]
  70. originalPOS_reforme = []
  71.  
  72. for k in originalPOS:
  73.     originalPOS_reforme.append(k.split())
  74.  
  75. f.close()
  76.  
  77. with open("/home/felix/Documents/TokyoTech/NaturalLanguageProcessing/Assignment3/POS_trainSet.txt") as f:
  78.     baselinePOS = f.readlines()
  79.  
  80. baselinePOS = [x.strip() for x in baselinePOS]
  81. baselinePOS_reforme = []
  82.  
  83. for k in baselinePOS:
  84.     baselinePOS_reforme.append(k.split())
  85.    
  86. f.close()
  87.  
  88. n = len(baselinePOS_reforme)
  89. nb_error = 0
  90.  
  91. for i in range(n):
  92.     if baselinePOS_reforme[i][1] != originalPOS_reforme[i][1]:
  93.         nb_error = nb_error + 1
  94.  
  95. print(nb_error/n)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top