Advertisement
yspanchal

twitter hashtag

Mar 28th, 2014
139
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.26 KB | None | 0 0
  1. ==================================================================================================================
  2.         hashtagspout.py
  3. ==================================================================================================================
  4. import sys
  5. import time
  6. import random
  7. import logging
  8. from twitter import *
  9. from petrel import storm
  10. from petrel.emitter import Spout
  11.  
  12. log = logging.getLogger('hashtagspout')
  13.  
  14. log.debug('hashtagspout started')
  15.  
  16. class HashtagSpout(Spout):
  17.     def __init__(self):
  18.         super(HashtagSpout, self).__init__(script=__file__)
  19.  
  20.     def declareOutputFields(cls):
  21.         return ['tag', 'date']
  22.  
  23.     t = [[{'indices': [96, 101], 'text': 'cute'}],[{'indices': [96, 101], 'text': 'beauty'}],[{'indices': [96, 101], 'text': 'nice'}]]
  24.  
  25.     def nextTuple(self):
  26.         sys.stdout.write("started tuple")
  27.         tag = self.t[random.randint(0, len(t) - 1)]
  28.         date = 'Tue Apr 01 05:27:35 +0000 2014'
  29.         log.debug('Sending HashTag For Processing: %s', hashtag)
  30.         sys.stdout.write(tag)
  31.         storm.emit([tag, date])
  32.  
  33. def run():
  34.     HashtagSpout().run()
  35.  
  36. ==================================================================================================================
  37.         splithashtag.py
  38. ==================================================================================================================
  39.  
  40. import sys
  41. import logging
  42. from petrel import storm
  43. from petrel.emitter import BasicBolt
  44.  
  45. log = logging.getLogger('splithashtag')
  46. log.debug('splithashtag Started')
  47.  
  48. class SplitHashtagBolt(BasicBolt):
  49.     def __init__(self):
  50.         super(SplitHashtagBolt, self).__init__(script=__file__)
  51.  
  52.     @classmethod
  53.     def declareOutputFields(self):
  54.         return ['date','tag']
  55.  
  56.     def process(self, tup):
  57.         log.debug('SplitHashtagBolt.process() started with: %s', tup)
  58.         data = tup.values[0]
  59.         for i in data:
  60.             log.debug('SplitHashtagBolt.process() emitting: %s', i['text'])
  61.             date = tup.values[1]
  62.             tag = i['text']
  63.             sys.stdout.write("splithashtag: %s" %tag)
  64.             storm.emit([date, tag])
  65.  
  66. def run():
  67.     SplitHashtagBolt().run()
  68.  
  69. ==================================================================================================================
  70.         hashtagcount.py
  71. ==================================================================================================================
  72.  
  73. import sys
  74. import redis
  75. import logging
  76. from collections import defaultdict
  77. from petrel import storm
  78. from petrel.emitter import BasicBolt
  79.  
  80. log = logging.getLogger('hashtagcount')
  81. log.debug('hashtagcount started')
  82.  
  83. r = redis.StrictRedis(host='localhost', port=6379, db=0)
  84.  
  85. class HashtagCountBolt(BasicBolt):
  86.     def __init__(self):
  87.         super(HashtagCountBolt, self).__init__(script=__file__)
  88.         self._count = defaultdict(int)
  89.  
  90.     @classmethod
  91.     def declareOutputFields(cls):
  92.         return ['tag', 'count']
  93.  
  94.     def process(self, tup):
  95.         log.debug("HashtagCountBolt.process() started with: %s", tup)
  96.         tag = tup.values[1]
  97.         self._count[tag] =+ 1
  98.         storm.emit([tag, self._count[tag]])
  99.         r.set("tag:%s" %tag, "%s, %s" % (tup.values[0],self._count[tag]))
  100.         r.rpush("%s" %tag, "{%s:%s}" % (tup.values[0],self._count[tag]))
  101.         sys.stdout.write("data addded to redis: %s" %tag)
  102.  
  103. def run():
  104.     HashtagCountBolt().run()
  105.  
  106.  
  107. ======================================================================================================
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement