Advertisement
Guest User

Untitled

a guest
Nov 24th, 2015
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.28 KB | None | 0 0
  1. import sys
  2. import csv
  3. import os.path
  4. import operator
  5. import matplotlib.pyplot as plt
  6. import matplotlib.cm as cm
  7. import numpy as np
  8.  
  9.  
  10. # Candidate information (as described in assignment writeup)
  11. CANDIDATE_NAMES = {"bush": "Jeb Bush",
  12. "carson": "Ben Carson",
  13. "christie": "Chris Christie",
  14. "cruz": "Ted Cruz",
  15. "fiorina": "Carly Fiorina",
  16. "gilmore": "Jim Gilmore",
  17. "graham": "Lindsey Graham",
  18. "huckabee": "Mike Huckabee",
  19. "jindal": "Bobby Jindal",
  20. "kasich": "John Kasich",
  21. "pataki": "George Pataki",
  22. "paul": "Rand Paul",
  23. "perry": "Rick Perry",
  24. "rubio": "Marco Rubio",
  25. "santorum": "Rick Santorum",
  26. "trump": "Donald Trump",
  27. "walker": "Scott Walker",
  28. "chafee": "Lincoln Chafee",
  29. "clinton": "Hillary Clinton",
  30. "omalley": "Martin O'Malley",
  31. "sanders": "Bernie Sanders",
  32. "webb": "Jim Webb"}
  33.  
  34. GOP_CANDIDATES = ['bush', 'carson', 'christie', 'cruz', 'fiorina', 'gilmore', 'graham', 'huckabee',
  35. 'jindal', 'kasich', 'pataki', 'paul', 'perry', 'rubio', 'santorum', 'trump', 'walker']
  36.  
  37. DEM_CANDIDATES = ['chafee', 'clinton', 'omalley', 'sanders', 'webb']
  38.  
  39. ALL_CANDIDATES = GOP_CANDIDATES + DEM_CANDIDATES
  40.  
  41.  
  42. # Size of the figures (these are the values you should pass
  43. # in parameter "figsize" of matplotlib's "figure" function)
  44. # Note: For task 4, use FIGWIDTH*2
  45. FIGWIDTH = 12
  46. FIGHEIGHT = 8
  47.  
  48.  
  49. # Start and end time (in seconds) of the debate
  50. DEBATE_START = 86400
  51. DEBATE_END = 97200
  52. # Maximum time (in seconds) of the dataset
  53. MAX_TIME = 183600
  54.  
  55.  
  56. # This function generates colors that can be passed to matplotlib functions
  57. # that accept a list of colors. The function takes one parameter: the number
  58. # of colors to generate. Using this function should result in the same colors
  59. # shown in the assignment writeup.
  60. def get_nice_colors(n_colors):
  61. return cm.Accent( [1 - (i/n_colors) for i in range(n_colors)] )
  62.  
  63.  
  64. ################################################
  65. #
  66. # Your functions go here
  67. #
  68. # Call your functions from the __main__ block
  69. #
  70. ################################################
  71.  
  72. class Tweet:
  73. def __init__(self, seconds, length, candidates, retweet, when, \
  74. polarity, subjectivity, longitude, lat):
  75. self.seconds = seconds
  76. self.length = length
  77. self.candidates = candidates
  78. self.retweet = retweet
  79. self.when = when
  80. self.polarity = polarity
  81. self.subjectivity = subjectivity
  82. self.longitude = longitude
  83. self.lat = lat
  84.  
  85. def read_csv(filename):
  86. tweets = []
  87. with open(filename) as f:
  88. reader = csv.DictReader(f)
  89. for row in reader:
  90. tweets.append(Tweet(row['seconds'], row['length'], \
  91. list(set(row['candidates'].split('|'))), row['retweet'], \
  92. row['when'], row['polarity'], row['subjectivity'], \
  93. row['long'], row['lat']))
  94. return tweets
  95.  
  96.  
  97. def count_candidates_per_tweet(tweets_list):
  98. counts_per_tweet = {}
  99. for tweet in tweets_list:
  100. if len(tweet.candidates) not in counts_per_tweet:
  101. counts_per_tweet[len(tweet.candidates)] = 0
  102. counts_per_tweet[len(tweet.candidates)] += 1
  103. return counts_per_tweet
  104.  
  105.  
  106. def count_candidate_pairs(tweets_list):
  107. candidate_pairs_count = {}
  108. candidate_pairs_top_ten = {}
  109. for tweets in tweets_list:
  110. for name1 in tweets.candidates:
  111. for name2 in tweets.candidates:
  112. if name1 != name2:
  113. pair = tuple(sorted([name1, name2]))
  114. if pair not in candidate_pairs_count:
  115. candidate_pairs_count[pair] = 0
  116. candidate_pairs_count[pair] += 1
  117. # make dictionary of top ten counts
  118. candidate_pairs_count = {y:x for x, y in candidate_pairs_count.items()}
  119. counts = sorted(candidate_pairs_count.values())[:10]
  120. for i in counts:
  121. candidate_pairs_top_ten[i] = candidate_pairs_count_flip[i]
  122. candidate_pairs_top_ten = {y:x for x, y in candidate_pairs_top_ten.items()}
  123. return candidate_pairs_top_ten
  124.  
  125.  
  126. def count_candidate_mentions(tweets_list):
  127. candidate_count = {}
  128. candidate_percentage = {}
  129. total = 0
  130. other = 0
  131. for tweet in tweets_list:
  132. for name in tweet.candidates:
  133. if name not in candidate_count:
  134. candidate_count[name] = 0
  135. candidate_count[name] += 1
  136. total += 1
  137. for name in candidate_count:
  138. candidate_count[name] = candidate_count[name] / total
  139. if candidate_count[name] < 0.03:
  140. other += candidate_count[name]
  141. candidate_count[name] = 'rem'
  142. if candidate_count[name] != 'rem':
  143. candidate_percentage[name] = candidate_count[name]
  144. return candidate_percentage
  145.  
  146.  
  147. def count_candidate_mentions_per_min(tweets_list):
  148. mentions_per_min = {}
  149. all_seconds = tweets_list[:, 0]
  150. min_sec = min(all_seconds) # are these 2 lines of code ok?
  151. max_sec = max(all_seconds)
  152. sec = min_sec
  153. while sec <= max_sec:
  154. mentions_per_min[sec] = {} # each key is lower bound
  155. sec += 60
  156. for tweet in tweets_list:
  157. for sec in tweet.seconds:
  158. # binary search???
  159. pass
  160.  
  161.  
  162. def convert_dict_to_lists(dictionary):
  163. keys = list(dictionary.keys())
  164. values = list(dictionary.values())
  165. return keys, values
  166.  
  167.  
  168. def plot_bar_graph(xs, ys, filename, plot_title = None, x_label = None, \
  169. y_label = None, x_label_rot = None):
  170. xnum = np.arange(len(xs))
  171.  
  172. # fig = plt.figure()
  173. plt.figure()
  174. plt.bar(xnum, ys, color = 'blue')
  175.  
  176. if plot_title != None:
  177. plt.title('{}'.format(plot_title))
  178. if x_label != None:
  179. plt.ylabel('{}'.format(y_label))
  180. if y_label != None:
  181. plt.ylabel('{}'.format(y_label))
  182.  
  183. xtickslocs = xnum + 0.4
  184. if x_label_rot != None:
  185. plt.xticks(xtickslocs, xs, rotation = x_label_rot)
  186. else:
  187. plt.xticks(xtickslocs, xs)
  188.  
  189. plt.show()
  190. # fig.savefig('{}.png'.format(filename))
  191.  
  192. if __name__ == "__main__":
  193.  
  194. # The following code parses the command-line parameters.
  195. # There is one required parameter (the CSV file) and an optional
  196. # parameter (the directory where the PNG files will be created;
  197. # if not specified, this defaults to "output/").
  198. #
  199. # This code results in two variables:
  200. #
  201. # - csv_file: The data file to read
  202. # - output_dir: The directory where the images should be generated
  203.  
  204. if not 2 <= len(sys.argv) <= 3:
  205. print("Usage: python3 {} <data file> [<output directory>]".format(sys.argv[0]))
  206. sys.exit(1)
  207. else:
  208. csv_file = sys.argv[1]
  209. if not os.path.exists(csv_file) or not os.path.isfile(csv_file):
  210. print("{} does not exist or is not a file.".format(csv_file))
  211. sys.exit(1)
  212. if len(sys.argv) == 3:
  213. output_dir = sys.argv[2]
  214. if not os.path.exists(output_dir) or not os.path.isdir(output_dir):
  215. print("{} does not exist or is not a directory.".format(output_dir))
  216. sys.exit(1)
  217. else:
  218. output_dir = "./output"
  219.  
  220. # Use the following file names to generate the plots
  221. TASK1_FILE = "{}/bar_num_mentions.png".format(output_dir)
  222.  
  223. TASK2_GOP_FILE = "{}/bar_candidates_together_gop.png".format(output_dir)
  224. TASK2_ALL_FILE = "{}/bar_candidates_together_all.png".format(output_dir)
  225.  
  226. TASK3_GOP_FILE = "{}/candidates_gop.png".format(output_dir)
  227. TASK3_ALL_FILE = "{}/candidates_all.png".format(output_dir)
  228.  
  229. TASK4A_DURING_FILE = "{}/mentions_over_time_during.png".format(output_dir)
  230. TASK4A_FULL_FILE = "{}/mentions_over_time.png".format(output_dir)
  231.  
  232. TASK4B_FILE = "{}/stackplot.png".format(output_dir)
  233.  
  234.  
  235. # Your code goes here, BUT NOT **ALL** YOUR CODE.
  236. #
  237. # You should write functions that do all the work, and then
  238. # call them from here.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement