Advertisement
Guest User

proj9

a guest
Nov 19th, 2017
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.57 KB | None | 0 0
  1. '''Skeleton file with all strings for Mimir testing'''
  2.  
  3. import string, calendar, pylab
  4.  
  5. MONTH_NAMES = [calendar.month_name[month] for month in range(1,13)]
  6.  
  7. def open_file():
  8.     '''Recieves a file pointer and returns an open file'''
  9.    
  10.     filefound = False
  11.     while filefound == False:
  12.         try:
  13.             filename = input("Input a file name: ") #input a file
  14.             fp = open(filename, 'r') #open file and read it
  15.             filefound = True #filefound become True
  16.         except FileNotFoundError:
  17.             print("Unable to open file. Please try again.") #print statement
  18.             continue #go back to the while loop
  19.        
  20.     return fp #return value
  21.  
  22. def validate_hashtag(s):
  23.     '''docstring'''
  24.  
  25. #    item = s.replace("#", "")
  26.     item = s[1:]
  27.     if item[0] in string.digits:
  28.         return False
  29.     for ch in item:
  30.         if ch in string.punctuation:
  31.             return False
  32.     return True
  33.  
  34. def get_hashtags(s):
  35.     '''docstring'''
  36. #    print(s)
  37.     valid_hashtag = []
  38.    
  39.     item_list = s.split()
  40.     for item in item_list:
  41. #        valid_hashtag = []
  42.         if item.startswith("#"):
  43.             if validate_hashtag(item):
  44.                 if item not in valid_hashtag:
  45.                     valid_hashtag.append(item)
  46.    
  47.     return valid_hashtag
  48.  
  49.  
  50. def read_data(fp):
  51.     '''docstring'''
  52.  
  53.     line_list = []
  54.     hashtag_string = ''
  55.     data_list = []
  56.     final_data = []
  57.     for line in fp:
  58. #        print(line)
  59.         line_list = line.split(",")
  60. #        print(line_list)
  61.         data_list = []
  62.         username = line_list[0]
  63.         data_list.append(username)
  64.         month = line_list[1]
  65.         data_list.append(month)
  66.         hashtag = line_list[2]
  67.         hashtag_string += hashtag
  68. #    print(hashtag_string)
  69.         valid_hashtag = get_hashtags(hashtag_string)
  70.         data_list.append(valid_hashtag)
  71. #        print(data_list)
  72.         final_data.append(data_list)
  73.     print(final_data)
  74.     return final_data
  75.  
  76. def get_histogram_tag_count_for_users(data,usernames):
  77.     '''docstring'''
  78.     pass
  79.  
  80. def get_tags_by_month_for_users(data,usernames):
  81.     '''docstring'''
  82.     pass
  83.  
  84. def get_user_names(L):
  85.     '''docstring'''
  86.     pass
  87.  
  88. def three_most_common_hashtags_combined(L,usernames):
  89.     '''docstring'''
  90.     pass
  91.  
  92. def three_most_common_hashtags_individuals(data_lst,usernames):
  93.     '''docstring'''
  94.     pass
  95.            
  96. def similarity(data_lst,user1,user2):
  97.     '''docstring'''
  98.     pass
  99.        
  100. def plot_similarity(x_list,y_list,name1,name2):
  101.     '''Plot y vs. x with name1 and name2 in the title.'''
  102.    
  103.     pylab.plot(x_list,y_list)
  104.     pylab.xticks(x_list,MONTH_NAMES,rotation=45,ha='right')
  105.     pylab.ylabel('Hashtag Similarity')
  106.     pylab.title('Twitter Similarity Between '+name1+' and '+name2)
  107.     pylab.tight_layout()
  108.     pylab.show()
  109.     # the next line is simply to illustrate how to save the plot
  110.     # leave it commented out in the version you submit
  111.     #pylab.savefig("plot.png")
  112.  
  113.  
  114. def main():
  115.     # Open the file
  116.     # Read the data from the file
  117.     # Create username list from data
  118.     # Calculate the top three hashtags combined for all users
  119.     # Print them
  120.     # Calculate the top three hashtags individually for all users
  121.     # Print them
  122.     # Prompt for two user names from username list
  123.     # Calculate similarity for the two users
  124.     # Print them
  125.     # Prompt to plot or not and plot if 'yes'
  126.     fp = open_file()
  127.     list_data = read_data(fp)
  128.    
  129. #    print("Top Three Hashtags Combined")
  130. #    print("{:>6s} {:<20s}".format("Count","Hashtag"))
  131. #    # your printing loop goes here
  132. #    print()
  133. #    
  134. #    print("Top Three Hashtags by Individual")
  135. #    print("{:>6s} {:<20s} {:<20s}".format("Count","Hashtag","User"))
  136. #    # your printing loop goes here
  137. #    print()
  138. #        
  139. #    #print("Usernames: ", usernames_str)
  140. #    #while True:  # prompt for and validate user names
  141. #        #user_str = input("Input two user names from the list, comma separated: ")
  142. #        # your check to for correct user names goes here
  143. #        #    print("Error in user names.  Please try again")
  144. #        
  145. #    # calculate similarity here
  146. #    print()
  147. #    #print("Similarities for "+users[0]+" and "+users[1])
  148. #    print("{:12s}{:6s}".format("Month","Count"))
  149. #    # your printing loop goes here
  150. #    print()
  151. #    
  152. #    # Prompt for a plot
  153. #    choice = input("Do you want to plot (yes/no)?: ")
  154. #    if choice.lower() == 'yes':
  155. #        # create x_list and y_list
  156. #        #plot_similarity(x_list,y_list,users[0],users[1])
  157.  
  158. if __name__ == '__main__':
  159.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement