Guest User

TagGen

a guest
Jan 6th, 2012
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.92 KB | None | 0 0
  1. # TagGen.py
  2. # Process a list of URLs, get tags for each from a web service and create a bag of words
  3. # Arguments:
  4. # 1. file containing URLs to process (1 per line)
  5. # 2. file to write list to as a comma seperated string
  6. # usage:
  7. # python TagGen.py urls.txt tagsfile.txt
  8.  
  9. import fileinput
  10. import simplejson as json
  11. import sys
  12. import urllib2
  13.  
  14. # Read in a list of URLs (one one each line) and create a list to process
  15. # @return <list> of urls
  16. def getURLs(fileName):
  17.     L = []
  18.     for line in fileinput.input(fileName):
  19.         L.append(line)
  20.     print "%s urls to check" % len(L)
  21.     return L
  22.  
  23. # Query the TagTheNet api and get Tags
  24. # OpenCalais is also nice, enjoying the simplicity of this
  25. # could create different tag processing functions for different APIs and set as an option
  26. # @return <list> of tags
  27. def getTags(destURL):
  28.     u = 'http://tagthe.net/api/?view=json&url=%s' % destURL
  29.     print "Checking: %s" % destURL
  30.    
  31.     try:
  32.         f = urllib2.urlopen(u)
  33.         jsonString = f.read()
  34.     except:
  35.         print "FAIL: URL not loaded/read"
  36.         pass
  37.     try:
  38.         o = json.loads(jsonString)
  39.     except:
  40.         print "FAIL: json not loaded"
  41.         pass
  42.        
  43.     try:
  44.         tags = o['memes'][0]['dimensions']['topic']
  45.         print "Found some tags : %s" % ", ".join(tags)
  46.         return tags
  47.     except:
  48.         print "FAIL: No tags returned from service"
  49.         return []
  50.  
  51. def go(urlFile):
  52.     T = []
  53.     L = getURLs(urlFile);
  54.     for u in L:
  55.         s = getTags(u);
  56.         for item in s:
  57.             if item != "nullItem":
  58.                 T.append(unicode(item).encode("utf-8"))
  59.     return T
  60.    
  61.    
  62. if len(sys.argv) < 2:
  63.     sys.stderr.write('Usage: sys.argv[0] ')
  64.     sys.exit(1)
  65. else:
  66.     myTags = go(sys.argv[1])
  67.     outfile = open(sys.argv[2], 'w')
  68.     outfile.write(", ".join(myTags))
  69.     outfile.close()
  70.     print "%s tags created!"  % len(myTags)
Advertisement
Add Comment
Please, Sign In to add comment