Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- filename = "C:/code/IntelligentSuit/tweet.csv"
- df_in = pd.DataFrame()
- def printdiffs(df_in):
- df_out = find_diffs(df_in)
- return df_out
- def find_diffs(df_in):
- diff_df = pd.DataFrame()
- df_latest = pd.read_csv(filename)
- diff_df = pd.merge(df_latest, df_in, how='outer', indicator='Exist')
- diff_df = diff_df.loc[diff_df['Exist'] != 'both']
- return diff_df
- def find_populartweet(dif_ls):
- spy = 0
- kaboom = 0
- dropin = 0
- sneaky = 0
- if (dif_ls.shape[0] > 0):
- dif = dif_ls
- dif.columns =['number','tweet','exists']
- kaboom = dif.tweet.str.count("#IMEC18 kaboom").sum()
- print "kaboom: " + str(kaboom)
- dropin = dif.tweet.str.count("#IMEC18 drop").sum()
- print "dropin: " + str(dropin)
- sneaky = dif.tweet.str.count("#IMEC18 sneaky").sum()
- print "sneaky: " + str(sneaky)
- if (kaboom > dropin and kaboom > sneaky):
- print "kaboom wins"
- if (dropin > sneaky and dropin > kaboom):
- print "dropin wins"
- if (sneaky > kaboom and sneaky > dropin):
- print "sneaky wins"
- if __name__== '__main__':
- df_last = pd.DataFrame()
- df_in = pd.read_csv(filename)
- print "first df in " + str(df_in.shape[0])
- i=0
- while True:
- print("Polling Tweets")
- dif_ls= printdiffs(df_in)
- # print "dataframe rows " + str(dif_ls.shape[0])
- # print "dataframe cols" + str(len(dif_ls.columns))
- print str(dif_ls.columns[2])
- if (dif_ls.shape[0] > 0):
- find_populartweet(diff_df)
- print str((dif_ls._slice(slice(0, None))))
- if ('#IMEC18 spy' in str(dif_ls._slice(slice(0, None)))):
- print ("show spy")
- if ('#IMEC18 kaboom' in str((dif_ls._slice(slice(0, None))))):
- print("show kaboom")
- if ('#IMEC18 drop' in str((dif_ls._slice(slice(0, None))))):
- print("show drop-in")
- if ('#IMEC18 sneaky' in str((dif_ls._slice(slice(0, None))))):
- print("show sneaky")
- df_in = pd.read_csv(filename)
- print "waiting 10 seconds"
- time.sleep(20)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement