Advertisement
Guest User

Untitled

a guest
May 16th, 2017
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.34 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. """
  4. hashtaglist.py
  5.  
  6. Created by Gordon Bonnar on 2010-03-14.
  7. Thanks to dlitz for the code replicating cat file|sort|uniq -c|sort -rn
  8. (NOT WORKING)
  9. """
  10. import twitter
  11. import re
  12. import sys
  13. import os.path
  14.  
  15. #Configuration Variables
  16. #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  17. username = "XXXXXXXX"
  18. password = "XXXXXXXXX"
  19.  
  20. #User from which to collect hashtag data
  21. user = "XXXXXXX"
  22.  
  23. #Define path to logfiles
  24. path = "XXXXXXXXXXX/"
  25. filename=path + user + "_hashtaglist.txt"
  26. #End Configuration section
  27. #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  28.  
  29. def get_twitter_api_cxn(user, passwd):
  30.   apicxn = twitter.Api(username=user, password=passwd)
  31.   return apicxn
  32.  
  33.  
  34. def print_from_file(file):
  35.     filename = file
  36.     print filename
  37.     # Build a dictionary of lines and their associated counts.
  38.     counts = {}
  39.     input_file = open(filename, "r")
  40.     print input_file.readline()
  41.     for line in input_file:
  42.         line = line.rstrip("\n").rstrip("\r")        # Strip trailing LF/CRLF
  43.         print line
  44.         counts[line] = counts.get(line, 0) + 1
  45.         print counts[line]
  46.  
  47.         # Build a list of [(lineA, countA), (lineB, countB), ... ]
  48.         sorted_counts = list(counts.items())
  49.  
  50.         # Sort the list by (count, line) in reverse order.
  51.         sorted_counts.sort(lambda a,b: -cmp((a[1], a[0]), (b[1], b[0])))
  52.  
  53.     # Output the lines
  54.     for line, count in sorted_counts:
  55.         print "%7d %s" % (count, line)
  56.  
  57.  
  58. #-------
  59. def get_hashtaglist(apicxn, user, file, start_id, count):
  60.     api = apicxn
  61.     filename = file
  62.  
  63.     #If a file already exists, grab the first line (StatusID) and open for
  64.     #reading and appending, otherwise create file and open for writing.
  65.     finalstatus = get_final_status_id(filename)
  66.     try:
  67.         finalstatus
  68.     except NameError:
  69.         finalstatus = None
  70.  
  71.     if finalstatus is None:
  72.         FILE = open(filename, "w")
  73.         print filename
  74.     else:
  75.         if os.path.exists(filename):
  76.             FILE = open(filename, "r+a")
  77.  
  78.     #If we don't know the last statusID checked, then grab the last 200 tweets, if we do,
  79.     #grab from last known ID.
  80.     if finalstatus is None:
  81.         statuses = api.GetUserTimeline(user, count)
  82.         print statuses[0].id
  83.         FILE.write(str(statuses[0].id) + "\n")
  84.     else:
  85.         statuses = api.GetUserTimeline(user,count, finalstatus)
  86.  
  87.     #Reverse the list of statuses so it is in chronological order
  88.     statuses.reverse()  
  89.  
  90.     #Compile regex for matching hastags
  91.     p = re.compile('[\A ](#\w+)',re.UNICODE)
  92.  
  93.     #Initialise list for hashtags
  94.     hashtaglist=[]
  95.  
  96.     #For every status in retrieved statuses, if it contains a hashtag add to hashtag list
  97.     for s in statuses :
  98.         matches = re.findall(p,s.text)
  99.     for match in matches :
  100.         hashtaglist.append(match)
  101.     return(hashtaglist)
  102.  
  103. def get_final_status_id(file):
  104.     if os.path.exists(filename):
  105.         FILE = open(filename, "r")
  106.         status_id = FILE.readline()
  107.         FILE.close()
  108.     else:
  109.         status_id = None
  110.     return status_id
  111.  
  112. def write_hashtaglist(file,hashtaglist):
  113.     filename = file
  114.     hashthaglist = hashtaglist
  115.  
  116.     FILE = open(filename, "w")
  117.  
  118.     #Write the hashtags to file
  119.     for hashtag in hashtaglist :
  120.         #Write hashtags to file
  121.         FILE.write(hashtag + "\n")
  122.  
  123.     FILE.close()
  124.  
  125. def main():
  126.     api = get_twitter_api_cxn(username, password)
  127.     finalstatus = get_final_status_id(filename)
  128.     hashtaglist = get_hashtaglist(api, user, filename, finalstatus, 200)
  129.     write_hashtaglist(filename, hashtaglist)
  130.     print_from_file(filename)
  131.  
  132. if __name__ == "__main__":
  133.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement