Advertisement
Guest User

Untitled

a guest
Feb 19th, 2017
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.75 KB | None | 0 0
  1. from html.parser import HTMLParser
  2. import re
  3. import plotly as py
  4. import plotly.graph_objs as go
  5.  
  6.  
  7. class LinksParser(HTMLParser):
  8. def __init__(self):
  9. HTMLParser.__init__(self)
  10. self.recordingData = False
  11. self.recordUser = False
  12. self.message_data = [()]
  13. self.user_data = []
  14. self.filter_tag = 'p'
  15. self.user_tag = 'span'
  16. self.user = ''
  17. self.persons = []
  18.  
  19. def handle_starttag(self, tag, attributes):
  20. # print('TAG : ' + tag)
  21. if tag == self.filter_tag:
  22. self.recordingData = True
  23. if tag == self.user_tag:
  24. self.recordUser = True
  25.  
  26. def handle_endtag(self, tag):
  27. return
  28.  
  29. def handle_data(self, data):
  30. # print('USEDATA : ' + data)
  31.  
  32. if self.recordUser and '@facebook' in data:
  33. result = ''
  34. for i in data:
  35. if i.isdigit():
  36. result += i
  37. # print('USERID : ' + result)
  38. self.recordUser = False
  39. self.user_id = result
  40. if result not in self.persons:
  41. self.persons.append(result)
  42. if self.recordingData:
  43. # print('Message: ' + data)
  44. self.message_data.append((data, self.user_id))
  45. self.recordingData = False
  46.  
  47.  
  48. parser = LinksParser()
  49. """
  50. parser.feed('<html><head><title>Test</title></head>'
  51. '<body><h1>Parse me!</h1></body></html>'
  52. '<p>PRINT ME</p>'
  53. '<div>NOT ME</div>')
  54. # print(parser.data)"""
  55. with open('messages.htm', encoding="utf8") as content_file:
  56. content = content_file.read()
  57. parser.feed(content)
  58.  
  59. from pprint import pprint
  60.  
  61. # pprint(parser.message_data)
  62. # pprint(parser.persons)
  63. smiley_count_dict = dict.fromkeys(parser.persons, 0)
  64. message_count_dict = dict.fromkeys(parser.persons, 0)
  65. # pprint(count_dict[parser.message_data[5][1]])
  66.  
  67. for message in parser.message_data:
  68. # pprint(message)
  69.  
  70. if len(message) == 2:
  71. key = message[1]
  72. smileys = 0
  73. smileys += message[0].count(':)')
  74. smileys += message[0].count(':-')
  75. smileys += message[0].count(':(')
  76. smileys += message[0].count(':p')
  77. smileys += message[0].count(':s')
  78. smileys += message[0].count(':P')
  79. smileys += message[0].count(':')
  80. smileys += message[0].count('^^')
  81. smileys += len(re.findall('[\U0001f600-\U0001f650]', message[0]))
  82.  
  83. smiley_count_dict[key] += smileys
  84. message_count_dict[key] += 1
  85.  
  86. data = []
  87. for key in smiley_count_dict.keys():
  88. message = message_count_dict[key]
  89. smileys = smiley_count_dict[key]
  90. if message > 0:
  91. rel = smileys / message
  92. # print("User: " + str(key) + " messages: " + str(message)+ " smileys: " + str(smileys) + " Rel = "+str(rel))
  93. data.append((smileys, message, rel, key))
  94. data.sort(key=lambda tup: tup[2])
  95. topTen = []
  96. for data_instance in data:
  97. if data_instance[1] > 50:
  98. topTen.append(data_instance)
  99. #pprint(topTen)
  100. #topTen=topTen[-10:]
  101. """
  102. data = [go.Bar(
  103. x=['giraffes', 'orangutans', 'monkeys'],
  104. y=[20, 14, 23]
  105. )]
  106. """
  107. keys = [x[3] for x in topTen]
  108. messages = [x[1] for x in topTen]
  109. smileys = [x[1] for x in topTen]
  110. print(topTen[0])
  111. print(keys)
  112. #print(messages)
  113. # trace1 = go.Bar(key,messages,'Messages')
  114. trace1 = go.Bar(
  115. # x=keys,
  116. y=messages,
  117. name='Messages'
  118. )
  119. trace2 = go.Bar(
  120. # x=keys,
  121. y=smileys,
  122. name='Smileys'
  123. )
  124. g_rel = [x[2] for x in topTen]
  125. g_data = [go.Bar(
  126. y=g_rel
  127. )]
  128. py.offline.plot(g_data, filename='basic-bar')
  129.  
  130. g_data = [trace1, trace2]
  131. layout = go.Layout(barmode='stack')
  132. fig = go.Figure(data=g_data, layout=layout)
  133. # py.offline.plot(fig, filename='stacked-bar')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement