Advertisement
Guest User

Untitled

a guest
Oct 21st, 2016
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.56 KB | None | 0 0
  1. def filterTweet(str1,str2,str3):
  2.     PUNCTUATION = '!"$%\'()*+,-./:;<=>?[\\]^_`{|}~' + u"\u2014" + u"\u2026"
  3.    
  4.     STOP_WORDS_SHORT = set(["a", "an", "the", "this", "that", "of", "for", "or", "and", "on", "to", "be", "if", "we", "you", "in", "is", "at", "it", "rt", "mt"])
  5.    
  6.     STOP_WORDS = {"basic":STOP_WORDS_SHORT,
  7.  
  8.               "hrc":set(["clinton", "hillary", "tim", "timothy", "kaine"]).union(STOP_WORDS_SHORT),
  9.  
  10.               "djt": set(["donald", "trump", "mike", "michael", "pence"]).union(STOP_WORDS_SHORT),
  11.  
  12.               "both": STOP_WORDS_SHORT.union(set(["clinton", "hillary", "donald", "trump", "tim", "timothy", "kaine", "mike", "michael", "pence"])),
  13.  
  14.               "none": set([])}
  15.              
  16.     STOP_PREFIXES  = {"default": set(["@", "#", "http", "&amp"]),
  17.  
  18.                   "hashtags_only": set(["#"]),
  19.  
  20.                   "none": set([])}
  21.                  
  22.     HRC_STOP_WORDS = STOP_WORDS["hrc"]
  23.  
  24.     DT_STOP_WORDS = STOP_WORDS["djt"]
  25.  
  26.     BOTH_CAND_STOP_WORDS = STOP_WORDS["both"]
  27.              
  28.     returnList = []
  29.    
  30.     for word in ((''.join((x for x in str1 if x not in PUNCTUATION))).split(' ')):
  31.         if word.lower() not in STOP_WORDS[str2] and (word.lower())[0] not in STOP_PREFIXES[str3] and (word.lower())[:4] not in STOP_PREFIXES[str3]:
  32.             returnList.append(word)
  33.     return returnList
  34.            
  35. def main(s1,s2,s3):
  36.     print filterTweet(s1,s2,s3)
  37.    
  38. hrctweet = "RT @HillaryforIA: The #iacaucus starts in 24 hours! #ImWithHer https://t.co/bF5fyfKbFt"
  39.  
  40. main(hrctweet, "basic", "default")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement