Untitled

import sys
import csv
import os.path
import operator
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np


# Candidate information (as described in assignment writeup)
CANDIDATE_NAMES = {"bush":      "Jeb Bush",
                   "carson":    "Ben Carson",
                   "christie":  "Chris Christie",
                   "cruz":      "Ted Cruz",
                   "fiorina":   "Carly Fiorina",
                   "gilmore":   "Jim Gilmore",
                   "graham":    "Lindsey Graham",
                   "huckabee":  "Mike Huckabee",
                   "jindal":    "Bobby Jindal",
                   "kasich":    "John Kasich",
                   "pataki":    "George Pataki",
                   "paul":      "Rand Paul",
                   "perry":     "Rick Perry",
                   "rubio":     "Marco Rubio",
                   "santorum":  "Rick Santorum",
                   "trump":     "Donald Trump",
                   "walker":    "Scott Walker",
                   "chafee":    "Lincoln Chafee",
                   "clinton":   "Hillary Clinton",
                   "omalley":   "Martin O'Malley",
                   "sanders":   "Bernie Sanders",
                   "webb":      "Jim Webb"}

GOP_CANDIDATES = ['bush', 'carson', 'christie', 'cruz', 'fiorina', 'gilmore', 'graham', 'huckabee',
                  'jindal', 'kasich', 'pataki', 'paul', 'perry', 'rubio', 'santorum', 'trump', 'walker']

DEM_CANDIDATES = ['chafee', 'clinton', 'omalley', 'sanders', 'webb']

ALL_CANDIDATES = GOP_CANDIDATES + DEM_CANDIDATES


# Size of the figures (these are the values you should pass
# in parameter "figsize" of matplotlib's "figure" function)
# Note: For task 4, use FIGWIDTH*2
FIGWIDTH = 12
FIGHEIGHT = 8


# Start and end time (in seconds) of the debate
DEBATE_START = 86400
DEBATE_END = 97200
# Maximum time (in seconds) of the dataset
MAX_TIME = 183600


# This function generates colors that can be passed to matplotlib functions
# that accept a list of colors. The function takes one parameter: the number
# of colors to generate. Using this function should result in the same colors
# shown in the assignment writeup.
def get_nice_colors(n_colors):
    return cm.Accent( [1 - (i/n_colors) for i in range(n_colors)] )


################################################
#
# Your functions go here
#
# Call your functions from the __main__ block
#
################################################

class Tweet:
    def __init__(self, seconds, length, candidates, retweet, when, \
      polarity, subjectivity, longitude, lat):
        self.seconds = seconds
        self.length = length
        self.candidates = candidates
        self.retweet  = retweet
        self.when = when
        self.polarity = polarity
        self.subjectivity = subjectivity
        self.longitude = longitude
        self.lat = lat

def read_csv(filename):
    tweets = []
    with open(filename) as f:
        reader = csv.DictReader(f)
        for row in reader:
          tweets.append(Tweet(row['seconds'], row['length'], \
            list(set(row['candidates'].split('|'))), row['retweet'], \
            row['when'], row['polarity'], row['subjectivity'], \
            row['long'], row['lat']))
    return tweets


def count_candidates_per_tweet(tweets_list):
    counts_per_tweet = {}
    for tweet in tweets_list:
        if len(tweet.candidates) not in counts_per_tweet:
            counts_per_tweet[len(tweet.candidates)] = 0
        counts_per_tweet[len(tweet.candidates)] += 1
    return counts_per_tweet


def count_candidate_pairs(tweets_list):
    candidate_pairs_count = {}
    candidate_pairs_top_ten = {}
    for tweets in tweets_list:
        for name1 in tweets.candidates:
            for name2 in tweets.candidates:
                if name1 != name2:
                    pair = tuple(sorted([name1, name2]))
                    if pair not in candidate_pairs_count:
                        candidate_pairs_count[pair] = 0
                    candidate_pairs_count[pair] += 1
    # make dictionary of top ten counts
    candidate_pairs_count = {y:x for x, y in candidate_pairs_count.items()}
    counts = sorted(candidate_pairs_count.values())[:10]
    for i in counts:
        candidate_pairs_top_ten[i] = candidate_pairs_count_flip[i]
    candidate_pairs_top_ten = {y:x for x, y in candidate_pairs_top_ten.items()}
    return candidate_pairs_top_ten


def count_candidate_mentions(tweets_list):
    candidate_count = {}
    candidate_percentage = {}
    total = 0
    other = 0
    for tweet in tweets_list:
        for name in tweet.candidates:
            if name not in candidate_count:
                candidate_count[name] = 0
            candidate_count[name] += 1
            total += 1
    for name in candidate_count:
        candidate_count[name] = candidate_count[name] / total
        if candidate_count[name] < 0.03:
            other += candidate_count[name]
            candidate_count[name] = 'rem'
        if candidate_count[name] != 'rem':
            candidate_percentage[name] = candidate_count[name]
    return candidate_percentage


def count_candidate_mentions_per_min(tweets_list):
    mentions_per_min = {}
    all_seconds = tweets_list[:, 0]
    min_sec = min(all_seconds)  # are these 2 lines of code ok?
    max_sec = max(all_seconds)
    sec = min_sec
    while sec <= max_sec:
        mentions_per_min[sec] = {}  # each key is lower bound
        sec += 60
    for tweet in tweets_list:
        for sec in tweet.seconds:
            # binary search???
            pass


def convert_dict_to_lists(dictionary):
    keys = list(dictionary.keys())
    values = list(dictionary.values())
    return keys, values


def plot_bar_graph(xs, ys, filename, plot_title = None, x_label = None, \
  y_label = None, x_label_rot = None):
    xnum = np.arange(len(xs))

#    fig = plt.figure()
    plt.figure()
    plt.bar(xnum, ys, color = 'blue')

    if plot_title != None:
        plt.title('{}'.format(plot_title))
    if x_label != None:
        plt.ylabel('{}'.format(y_label))
    if y_label != None:
        plt.ylabel('{}'.format(y_label))

    xtickslocs = xnum + 0.4
    if x_label_rot != None:
        plt.xticks(xtickslocs, xs, rotation = x_label_rot)
    else:
        plt.xticks(xtickslocs, xs)

    plt.show()
 #   fig.savefig('{}.png'.format(filename))

if __name__ == "__main__":

    # The following code parses the command-line parameters.
    # There is one required parameter (the CSV file) and an optional
    # parameter (the directory where the PNG files will be created;
    # if not specified, this defaults to "output/").
    #
    # This code results in two variables:
    #
    #  - csv_file: The data file to read
    #  - output_dir: The directory where the images should be generated

    if not 2 <= len(sys.argv) <= 3:
        print("Usage: python3 {} <data file> [<output directory>]".format(sys.argv[0]))
        sys.exit(1)
    else:
        csv_file = sys.argv[1]
        if not os.path.exists(csv_file) or not os.path.isfile(csv_file):
            print("{} does not exist or is not a file.".format(csv_file))
            sys.exit(1)
        if len(sys.argv) == 3:
            output_dir = sys.argv[2]
            if not os.path.exists(output_dir) or not os.path.isdir(output_dir):
                print("{} does not exist or is not a directory.".format(output_dir))
                sys.exit(1)
        else:
            output_dir = "./output"

    # Use the following file names to generate the plots
    TASK1_FILE = "{}/bar_num_mentions.png".format(output_dir)

    TASK2_GOP_FILE = "{}/bar_candidates_together_gop.png".format(output_dir)
    TASK2_ALL_FILE = "{}/bar_candidates_together_all.png".format(output_dir)

    TASK3_GOP_FILE = "{}/candidates_gop.png".format(output_dir)
    TASK3_ALL_FILE = "{}/candidates_all.png".format(output_dir)

    TASK4A_DURING_FILE = "{}/mentions_over_time_during.png".format(output_dir)
    TASK4A_FULL_FILE = "{}/mentions_over_time.png".format(output_dir)

    TASK4B_FILE = "{}/stackplot.png".format(output_dir)


    # Your code goes here, BUT NOT **ALL** YOUR CODE.
    #
    # You should write functions that do all the work, and then
    # call them from here.