Advertisement
shubhamgoyal

Ranking code

Sep 8th, 2015
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.82 KB | None | 0 0
  1. def flatten_job_query_and_remove_duplicates(job_query):
  2.         modified_job_query = []
  3.         #print "job_query = ",  job_query
  4.         #print "Length of job query is ", len(job_query)
  5.         weighted_keywords_dict = {}
  6.         weight_current = 0.5
  7.         for i in range(0, len(job_query)):
  8.                 query_string = job_query[i].decode('utf-8')
  9.                 #print "query_string = ", query_string.encode('utf-8')
  10.                 for word in query_string.split(" "):
  11.                         if word not in modified_job_query:
  12.                                 modified_job_query.append(word)
  13.                                 weighted_keywords_dict[word] = weight_current
  14.                 weight_current = weight_current / 2
  15.         #print "Flattened job query = ", modified_job_query
  16.         return weighted_keywords_dict
  17.  
  18. def get_keyword_score(job_query, keywords):
  19.         num_keywords_in_query = 0
  20.         tweet_text_in_lower_case = job_query.lower()
  21.         keywords_lower_case = {}
  22.         for keyword in keywords:
  23.                 keywords_lower_case[keyword.lower()] = keywords[keyword]
  24.         for keyword in keywords_lower_case:
  25.                 if keyword in tweet_text_in_lower_case:
  26.                         num_keywords_in_query = num_keywords_in_query + keywords_lower_case[keyword]
  27.         return num_keywords_in_query
  28.  
  29. def compute_rank_of_tweet(tweet, job_query):
  30.         len_tweet = len(tweet["text"])
  31.         tweet_len_score = 0.3 * len_tweet / 140.0
  32.         # print tweet["text"]
  33.         print len_tweet
  34.         keywords = flatten_job_query_and_remove_duplicates(job_query)
  35.         print keywords
  36.         # print tweet["text"]
  37.         keyword_score = 0.7 * get_keyword_score(tweet["text"], keywords)
  38.         print keyword_score
  39.         score = tweet_len_score + keyword_score
  40.         return score
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement