lang_mapper.py

#!/usr/bin/env python

import sys
import string

printable = set(string.printable)
number_of_most_popular_words = 200

def read_file_to_dictionary(filename, dict):
    f = open(filename)
    filecontent = f.readlines()
    f.close()

    for line in filecontent:
        try:
            word, weight = line.split()
            weight = int(weight)
            dict[word] = weight
        except:
            pass

# f = open('en_200_most_frequent.txt')
# filecontent = f.readlines()
# f.close()
# en_word_list = {}
# for line in filecontent:
#     try:
#         word, weight = line.split()
#         weight = int(weight)
#         en_word_list[word] = weight
#     except:
#         pass

en_word_list = {}
read_file_to_dictionary('en_200_most_frequent.txt', en_word_list)


pl_word_list = {}
read_file_to_dictionary('pl_200_most_frequent.txt', pl_word_list)

hu_word_list = {}
read_file_to_dictionary('hu_200_most_frequent.txt', hu_word_list)

de_word_list = {}
read_file_to_dictionary('de_200_most_frequent.txt', de_word_list)

sv_word_list = {}
read_file_to_dictionary('sv_200_most_frequent.txt', sv_word_list)

nl_word_list = {}
read_file_to_dictionary('nl_200_most_frequent.txt', nl_word_list)


for line in sys.stdin:
    line = filter(lambda x: x in printable, line)
    line = line.strip()
    word, position = line.split()

    try:
        position = int(position)
    except:
        pass

    en = pl = hu = sv = nl = de = 0
    word = word.strip()
    if len(word) > 2:
        continue

    if en_word_list.has_key(word):
        en += number_of_most_popular_words + 1 - (en_word_list[word] - position)

    if pl_word_list.has_key(word):
        pl += number_of_most_popular_words + 1 - (pl_word_list[word] - position)

    if hu_word_list.has_key(word):
        hu += number_of_most_popular_words + 1 - (hu_word_list[word] - position)

    if de_word_list.has_key(word):
        de += number_of_most_popular_words + 1 - (de_word_list[word] - position)

    if sv_word_list.has_key(word):
        sv += number_of_most_popular_words + 1 - (sv_word_list[word] - position)

    if nl_word_list.has_key(word):
        nl += number_of_most_popular_words + 1 - (nl_word_list[word] - position)

    print word + "\t" + str(en) + "\t" + str(pl) + "\t" + str(hu) + "\t" + str(de) + "\t" + str(sv) + "\t" + str(nl)