Guest User

Untitled

a guest
Oct 16th, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.08 KB | None | 0 0
  1. def tweets_to_df(path):
  2.  
  3. tweets = list(open('tweets_bkp.json', 'rt'))
  4.  
  5. text = []
  6. weekday = []
  7. month = []
  8. day = []
  9. hour = []
  10. hashtag = []
  11. url = []
  12. favorite = []
  13. reply = []
  14. retweet = []
  15. follower = []
  16. following = []
  17. user = []
  18. screen_name = []
  19.  
  20. for t in tweets:
  21. t = jsonpickle.decode(t)
  22.  
  23. # Text
  24. text.append(t['text'])
  25.  
  26. # Decompose date
  27. date = t['created_at']
  28. weekday.append(date.split(' ')[0])
  29. month.append(date.split(' ')[1])
  30. day.append(date.split(' ')[2])
  31.  
  32. time = date.split(' ')[3].split(':')
  33. hour.append(time[0])
  34.  
  35. # Has hashtag
  36. if len(t['entities']['hashtags']) == 0:
  37. hashtag.append(0)
  38. else:
  39. hashtag.append(1)
  40.  
  41. # Has url
  42. if len(t['entities']['urls']) == 0:
  43. url.append(0)
  44. else:
  45. url.append(1)
  46.  
  47. # Number of favs
  48. favorite.append(t['favorite_count'])
  49.  
  50. # Is reply?
  51. if t['in_reply_to_status_id'] == None:
  52. reply.append(0)
  53. else:
  54. reply.append(1)
  55.  
  56. # Retweets count
  57. retweet.append(t['retweet_count'])
  58.  
  59. # Followers number
  60. follower.append(t['user']['followers_count'])
  61.  
  62. # Following number
  63. following.append(t['user']['friends_count'])
  64.  
  65. # Add user
  66. user.append(t['user']['name'])
  67.  
  68. # Add screen name
  69. screen_name.append(t['user']['screen_name'])
  70.  
  71. d = {'text': text,
  72. 'weekday': weekday,
  73. 'month' : month,
  74. 'day': day,
  75. 'hour' : hour,
  76. 'has_hashtag': hashtag,
  77. 'has_url': url,
  78. 'fav_count': favorite,
  79. 'is_reply': reply,
  80. 'retweet_count': retweet,
  81. 'followers': follower,
  82. 'following' : following,
  83. 'user': user,
  84. 'screen_name' : screen_name
  85. }
  86.  
  87. return pd.DataFrame(data = d)
  88.  
  89. tweets_df = tweets_to_df('tweets_bkp.json')
Add Comment
Please, Sign In to add comment