Untitled

import pandas
import ast

df = pandas.read_csv("twitter_cleanedsample.csv")
all_words = []
for raw_stopwords in df["Tweet_stopped"]:
    #raw_stopwords is a string that _looks_ like a list of strings, for example:
    #"['micosapiens', 'faqstv', 'hannahbcn', 'joancbaez', 'tvcat']"
    #This looks like a list with a length of 5, but if you called len on it,
    #you would actually get 60, because that's how many characters it has,
    #counting the brackets and commas and quote marks and such.
    #this is useless to us. If we want sensible length data, we need to convert to an actual list.
    #ast.literal_eval is an effective way of turning list-looking strings into actual lists
    #without opening us up to security problems. So let's use that.
    stopwords = ast.literal_eval(raw_stopwords)

    #now add the words to the list of all words.
    all_words.extend(stopwords)

print ("Found {} words.".format(len(all_words)))
#result:
#Found 7489 words.