Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def flatten_job_query_and_remove_duplicates(job_query):
- modified_job_query = []
- #print "job_query = ", job_query
- #print "Length of job query is ", len(job_query)
- weighted_keywords_dict = {}
- weight_current = 0.5
- for i in range(0, len(job_query)):
- query_string = job_query[i].decode('utf-8')
- #print "query_string = ", query_string.encode('utf-8')
- for word in query_string.split(" "):
- if word not in modified_job_query:
- modified_job_query.append(word)
- weighted_keywords_dict[word] = weight_current
- weight_current = weight_current / 2
- #print "Flattened job query = ", modified_job_query
- return weighted_keywords_dict
- def get_keyword_score(job_query, keywords):
- num_keywords_in_query = 0
- tweet_text_in_lower_case = job_query.lower()
- keywords_lower_case = {}
- for keyword in keywords:
- keywords_lower_case[keyword.lower()] = keywords[keyword]
- for keyword in keywords_lower_case:
- if keyword in tweet_text_in_lower_case:
- num_keywords_in_query = num_keywords_in_query + keywords_lower_case[keyword]
- return num_keywords_in_query
- def compute_rank_of_tweet(tweet, job_query):
- len_tweet = len(tweet["text"])
- tweet_len_score = 0.3 * len_tweet / 140.0
- # print tweet["text"]
- print len_tweet
- keywords = flatten_job_query_and_remove_duplicates(job_query)
- print keywords
- # print tweet["text"]
- keyword_score = 0.7 * get_keyword_score(tweet["text"], keywords)
- print keyword_score
- score = tweet_len_score + keyword_score
- return score
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement