Advertisement
Guest User

Untitled

a guest
May 26th, 2018
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.13 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon May 21 17:00:36 2018
  4.  
  5. @author: 20166843
  6. """
  7.  
  8. import sqlite3
  9. import pandas as pd
  10. import datetime
  11. import numpy as np
  12. import matplotlib.pyplot as plt
  13. import seaborn as sns
  14.  
  15. database = sqlite3.connect('data/mydb.sqlite3')
  16.  
  17.  
  18. airlines = {
  19. "KLM" : "56377143",
  20. "Air_France" : "106062176",
  21. "British_Airways" : "18332190",
  22. "American_Air" : "22536055",
  23. "Lufthansa" : "124476322",
  24. "Air_Berlin" : "26223583",
  25. "Air_Berlin_Assist": "2182373406",
  26. "easyJet": "38676903",
  27. "Ryanair": "1542862735",
  28. "Singapore_Air": "253340062",
  29. "Qantas": "218730857",
  30. "Etihad_Airways": "45621423",
  31. "Virgin_Atlantic": "20626359"}
  32.  
  33. def incoming_volume_df_dates(user_name, airlines_id, date_start= "2016-03-01 00:00:00", date_end="2017-05-01 00:00:00"):
  34. """
  35. Gets the amount of tweets which mention this user in the given timespan
  36. :param user_name: String of the username of twitter user
  37. :param date_start: Datetime string in YYYY-MM-DD HH:MM:SS format
  38. :param date_end: Datetime string in YYYY-MM-DD HH:MM:SS format
  39. :return: Returns dataframe which contains created_at only
  40. """
  41.  
  42. query = """
  43. SELECT created_at
  44. FROM tweets
  45. WHERE (text LIKE '%{}%' OR in_reply_to_user_id == {})
  46. AND datetime(created_at) >= datetime('{}')
  47. AND datetime(created_at) < datetime('{}');
  48. """.format(user_name, airlines_id, date_start, date_end)
  49.  
  50. return pd.read_sql_query(query, database)
  51.  
  52. def outgoing_volume_df_dates(airlines_id, date_start= "2016-03-01 00:00:00", date_end="2017-05-01 00:00:00"):
  53. """
  54. Gets the amount of tweets sent by the user in the given timespan
  55. :param user_id: String of ID of twitter user
  56. :param date_start: Datetime string in YYYY-MM-DD HH:MM:SS format
  57. :param date_end: Datetime string in YYYY-MM-DD HH:MM:SS format
  58. :return: Amount of tweets which comply to all the requirements
  59. """
  60. query = """SELECT created_at FROM tweets WHERE user_id == {} AND
  61. (in_reply_to_user_id NOT NULL OR in_reply_to_tweet_id NOT NULL) AND
  62. datetime(created_at) >= datetime('{}') AND
  63. datetime(created_at) < datetime('{}');""".format(airlines_id, date_start, date_end)
  64.  
  65. return pd.read_sql_query(query, database)
  66.  
  67. data = incoming_volume_df_dates(user_name="AmericanAir", airlines_id= airlines["American_Air"])
  68. data_outgoing = outgoing_volume_df_dates(airlines_id= airlines["American_Air"])
  69.  
  70. data['created_at'] = pd.to_datetime(data['created_at'])
  71. data_outgoing['created_at'] = pd.to_datetime(data['created_at'])
  72. data_outgoing
  73. counts = {}
  74. counts_outgoing = {}
  75. for year in [2016, 2017]:
  76. for month in range(12):
  77. print(month)
  78. month += 1
  79. month_list = []
  80. month_list_outgoing = []
  81. for day in range(31):
  82. day += 1
  83. month_list.append(data[(data['created_at'].dt.year == year) & (data['created_at'].dt.month == month) & (data['created_at'].dt.day == day)].count()[0])
  84. month_list_outgoing.append(data_outgoing[(data_outgoing['created_at'].dt.year == year) & (data_outgoing['created_at'].dt.month == month) & (data_outgoing['created_at'].dt.day == day)].count()[0])
  85. counts[str(year) + str(" ") + str(month)] = month_list
  86. counts_outgoing[str(year) + str(" ") + str(month)] = month_list_outgoing
  87.  
  88. '''counts_week = {}
  89. for year in [2016,2017]:
  90. for week in range(52):
  91. print(week)
  92. week += 1
  93. week_list = []
  94. for day in range(7):
  95. day += 1
  96. week_list.append(data[(data['created_at'].dt.year == year) & (data['created_at'].dt.week == week) & (data['created_at'].dt.day == day)].count()[0])
  97. counts_week[str(year) + str(" "), str(week)] = week_list
  98. '''
  99.  
  100. # Pops stuff
  101. counts.pop("2016 1")
  102. counts.pop("2016 2")
  103. counts.pop("2016 3")
  104. counts.pop("2016 4")
  105. counts.pop("2017 5")
  106. counts.pop("2017 6")
  107. counts.pop("2017 7")
  108. counts.pop("2017 8")
  109. counts.pop("2017 9")
  110. counts.pop("2017 10")
  111. counts.pop("2017 11")
  112. counts.pop("2017 12")
  113.  
  114. df = pd.DataFrame(data=counts)
  115. df
  116. cols = list(df.columns.values)
  117. cols = cols[3:8]+cols[0:3]+cols[8:]
  118. df = df[cols]
  119. df.index = range(32,1)
  120. df = df.iloc[::-1]
  121. df.columns = ["May '16", "June '16", "July '16", "August '16", "September '16", "October '16", "November '16", "December '16", "January '17", "February '17", "March '17", "April '17"]
  122. sns.heatmap(df, linewidths=.5, cmap="Blues");
  123. plt.xlabel("Months",size=13)
  124. plt.ylabel("Days of the month", size=13)
  125. plt.title('Incoming tweet volume by day for American Airlines', size=20);
  126.  
  127. # Pops stuff for outgoing volume
  128. counts_outgoing.pop("2016 1")
  129. counts_outgoing.pop("2016 2")
  130. counts_outgoing.pop("2016 3")
  131. counts_outgoing.pop("2016 4")
  132. counts_outgoing.pop("2017 5")
  133. counts_outgoing.pop("2017 6")
  134. counts_outgoing.pop("2017 7")
  135. counts_outgoing.pop("2017 8")
  136. counts_outgoing.pop("2017 9")
  137. counts_outgoing.pop("2017 10")
  138. counts_outgoing.pop("2017 11")
  139. counts_outgoing.pop("2017 12")
  140. df_outgoing = pd.DataFrame(data=counts_outgoing)
  141. cols = list(df_outgoing.columns.values)
  142. df_outgoing
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement