desdemona

lang_mapper.py

May 30th, 2016
443
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.35 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import sys
  4. import string
  5.  
  6. printable = set(string.printable)
  7. number_of_most_popular_words = 200
  8.  
  9. def read_file_to_dictionary(filename, dict):
  10.     f = open(filename)
  11.     filecontent = f.readlines()
  12.     f.close()
  13.  
  14.     for line in filecontent:
  15.         try:
  16.             word, weight = line.split()
  17.             weight = int(weight)
  18.             dict[word] = weight
  19.         except:
  20.             pass
  21.  
  22. # f = open('en_200_most_frequent.txt')
  23. # filecontent = f.readlines()
  24. # f.close()
  25. # en_word_list = {}
  26. # for line in filecontent:
  27. #     try:
  28. #         word, weight = line.split()
  29. #         weight = int(weight)
  30. #         en_word_list[word] = weight
  31. #     except:
  32. #         pass
  33.  
  34. en_word_list = {}
  35. read_file_to_dictionary('en_200_most_frequent.txt', en_word_list)
  36.  
  37.  
  38. pl_word_list = {}
  39. read_file_to_dictionary('pl_200_most_frequent.txt', pl_word_list)
  40.  
  41. hu_word_list = {}
  42. read_file_to_dictionary('hu_200_most_frequent.txt', hu_word_list)
  43.  
  44. de_word_list = {}
  45. read_file_to_dictionary('de_200_most_frequent.txt', de_word_list)
  46.  
  47. sv_word_list = {}
  48. read_file_to_dictionary('sv_200_most_frequent.txt', sv_word_list)
  49.  
  50. nl_word_list = {}
  51. read_file_to_dictionary('nl_200_most_frequent.txt', nl_word_list)
  52.  
  53.  
  54.  
  55. for line in sys.stdin:
  56.     line = filter(lambda x: x in printable, line)
  57.     line = line.strip()
  58.     word, position = line.split()
  59.  
  60.     try:
  61.         position = int(position)
  62.     except:
  63.         pass
  64.  
  65.     en = pl = hu = sv = nl = de = 0
  66.     word = word.strip()
  67.     if len(word) > 2:
  68.         continue
  69.  
  70.     if en_word_list.has_key(word):
  71.         en += number_of_most_popular_words + 1 - (en_word_list[word] - position)
  72.  
  73.     if pl_word_list.has_key(word):
  74.         pl += number_of_most_popular_words + 1 - (pl_word_list[word] - position)
  75.  
  76.     if hu_word_list.has_key(word):
  77.         hu += number_of_most_popular_words + 1 - (hu_word_list[word] - position)
  78.  
  79.     if de_word_list.has_key(word):
  80.         de += number_of_most_popular_words + 1 - (de_word_list[word] - position)
  81.  
  82.     if sv_word_list.has_key(word):
  83.         sv += number_of_most_popular_words + 1 - (sv_word_list[word] - position)
  84.  
  85.     if nl_word_list.has_key(word):
  86.         nl += number_of_most_popular_words + 1 - (nl_word_list[word] - position)
  87.  
  88.     print word + "\t" + str(en) + "\t" + str(pl) + "\t" + str(hu) + "\t" + str(de) + "\t" + str(sv) + "\t" + str(nl)
Add Comment
Please, Sign In to add comment