Advertisement
Guest User

Untitled

a guest
Jun 9th, 2017
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.40 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 4,
  6. "metadata": {
  7. "collapsed": false
  8. },
  9. "outputs": [
  10. {
  11. "data": {
  12. "text/html": [
  13. "<div>\n",
  14. "<style>\n",
  15. " .dataframe thead tr:only-child th {\n",
  16. " text-align: right;\n",
  17. " }\n",
  18. "\n",
  19. " .dataframe thead th {\n",
  20. " text-align: left;\n",
  21. " }\n",
  22. "\n",
  23. " .dataframe tbody tr th {\n",
  24. " vertical-align: top;\n",
  25. " }\n",
  26. "</style>\n",
  27. "<table border=\"1\" class=\"dataframe\">\n",
  28. " <thead>\n",
  29. " <tr style=\"text-align: right;\">\n",
  30. " <th></th>\n",
  31. " <th>assist_scores</th>\n",
  32. " </tr>\n",
  33. " </thead>\n",
  34. " <tbody>\n",
  35. " <tr>\n",
  36. " <th>count</th>\n",
  37. " <td>283.000000</td>\n",
  38. " </tr>\n",
  39. " <tr>\n",
  40. " <th>mean</th>\n",
  41. " <td>9.190813</td>\n",
  42. " </tr>\n",
  43. " <tr>\n",
  44. " <th>std</th>\n",
  45. " <td>1.679767</td>\n",
  46. " </tr>\n",
  47. " <tr>\n",
  48. " <th>min</th>\n",
  49. " <td>0.000000</td>\n",
  50. " </tr>\n",
  51. " <tr>\n",
  52. " <th>25%</th>\n",
  53. " <td>9.000000</td>\n",
  54. " </tr>\n",
  55. " <tr>\n",
  56. " <th>50%</th>\n",
  57. " <td>10.000000</td>\n",
  58. " </tr>\n",
  59. " <tr>\n",
  60. " <th>75%</th>\n",
  61. " <td>10.000000</td>\n",
  62. " </tr>\n",
  63. " <tr>\n",
  64. " <th>max</th>\n",
  65. " <td>10.000000</td>\n",
  66. " </tr>\n",
  67. " </tbody>\n",
  68. "</table>\n",
  69. "</div>"
  70. ],
  71. "text/plain": [
  72. " assist_scores\n",
  73. "count 283.000000\n",
  74. "mean 9.190813\n",
  75. "std 1.679767\n",
  76. "min 0.000000\n",
  77. "25% 9.000000\n",
  78. "50% 10.000000\n",
  79. "75% 10.000000\n",
  80. "max 10.000000"
  81. ]
  82. },
  83. "execution_count": 4,
  84. "metadata": {},
  85. "output_type": "execute_result"
  86. }
  87. ],
  88. "source": [
  89. "import psycopg2\n",
  90. "from collections import Counter\n",
  91. "import numpy as np\n",
  92. "import pandas as pd\n",
  93. "from scipy import stats\n",
  94. "import matplotlib.pyplot as plt\n",
  95. "\n",
  96. "\n",
  97. "db_user = 'tyler.engel'\n",
  98. "db_password = 'zedt7CTM8pr3'\n",
  99. "redshift_address = 'fulla.cth9cfyvuxeq.us-east-1.redshift.amazonaws.com'\n",
  100. "redshift_port = '5439'\n",
  101. "redshift_DBName = 'dev'\n",
  102. "connection = 'postgres://{}:{}@{}:{}/{}'.format(db_user, db_password, redshift_address, redshift_port, redshift_DBName)\n",
  103. "\n",
  104. "\n",
  105. "conn = psycopg2.connect(dbname=redshift_DBName, user=db_user, host=redshift_address, password=db_password, port=redshift_port)\n",
  106. "\n",
  107. "cur = conn.cursor()\n",
  108. "\n",
  109. "assist = cur.execute(\"SELECT u.teamid, sport, score FROM delighted_responses d JOIN user_user_roles u ON u.userid = d.userid WHERE created_at > '2017-03-01' AND d.sport IN ('Football', 'Basketball') AND ROLE IN (2, 3) AND u.teamid IN (SELECT DISTINCT teamid FROM assist_teams_and_invoices)\")\n",
  110. "assist_teams = cur.fetchall()\n",
  111. "\n",
  112. "assist_teams_list = []\n",
  113. "assist_sports_list = []\n",
  114. "assist_scores_list = []\n",
  115. "\n",
  116. "def make_team_list(team_list, new_list):\n",
  117. " for i in range(0, len(team_list)):\n",
  118. " new_list.append(team_list[i][0])\n",
  119. " return new_list\n",
  120. "\n",
  121. "def make_sport_list(team_list, new_list):\n",
  122. " for i in range(0, len(team_list)):\n",
  123. " new_list.append(team_list[i][1])\n",
  124. " return new_list\n",
  125. "\n",
  126. "def make_score_list(team_list, new_list):\n",
  127. " for i in range(0, len(team_list)):\n",
  128. " new_list.append(team_list[i][2])\n",
  129. " return new_list\n",
  130. " \n",
  131. "assist_teams_pd = make_team_list(assist_teams, assist_teams_list)\n",
  132. "assist_sports_pd = make_sport_list(assist_teams, assist_sports_list)\n",
  133. "assist_scores_pd = make_score_list(assist_teams, assist_scores_list)\n",
  134. "\n",
  135. "non_assist = cur.execute(\"SELECT u.teamid, sport, score FROM delighted_responses d JOIN user_user_roles u ON u.userid = d.userid WHERE created_at > '2017-03-01' AND d.sport IN ('Football', 'Basketball') AND ROLE IN (2, 3) AND u.teamid not IN (SELECT DISTINCT teamid FROM assist_teams_and_invoices)\")\n",
  136. "non_assist_teams = cur.fetchall()\n",
  137. "\n",
  138. "non_assist_teams_list = []\n",
  139. "non_assist_sports_list = []\n",
  140. "non_assist_scores_list = []\n",
  141. "\n",
  142. "non_assist_teams_pd = make_team_list(non_assist_teams, non_assist_teams_list)\n",
  143. "non_assist_sports_pd = make_sport_list(non_assist_teams, non_assist_sports_list)\n",
  144. "non_assist_scores_pd = make_score_list(non_assist_teams, non_assist_scores_list)\n",
  145. "\n",
  146. "assist_df = pd.DataFrame(np.column_stack([assist_teams_pd, assist_sports_pd, assist_scores_pd]), columns = ['assist_teams', 'assist_sports', 'assist_scores']) \n",
  147. "# Convert score to numeric\n",
  148. "assist_df[['assist_scores']] = assist_df[['assist_scores']].apply(pd.to_numeric)\n",
  149. "non_assist_df = pd.DataFrame(np.column_stack([non_assist_teams_pd, non_assist_sports_pd, non_assist_scores_pd]), columns = ['non_assist_teams', 'non_assist_sports', 'non_assist_scores']) \n",
  150. "# Convert score to numeric\n",
  151. "non_assist_df[['assist_scores']] = non_assist_df[['non_assist_scores']].apply(pd.to_numeric)\n",
  152. "assist_df.describe()"
  153. ]
  154. },
  155. {
  156. "cell_type": "code",
  157. "execution_count": 9,
  158. "metadata": {
  159. "collapsed": false
  160. },
  161. "outputs": [
  162. {
  163. "data": {
  164. "text/plain": [
  165. "(array([ 2., 0., 4., 0., 1., 8., 4., 11., 22., 231.]),\n",
  166. " array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),\n",
  167. " <a list of 10 Patch objects>)"
  168. ]
  169. },
  170. "execution_count": 9,
  171. "metadata": {},
  172. "output_type": "execute_result"
  173. }
  174. ],
  175. "source": [
  176. "plt.hist(assist_scores_pd)\n",
  177. "plt.hist(non_assist_scores_pd)"
  178. ]
  179. },
  180. {
  181. "cell_type": "code",
  182. "execution_count": 8,
  183. "metadata": {
  184. "collapsed": false
  185. },
  186. "outputs": [
  187. {
  188. "name": "stdout",
  189. "output_type": "stream",
  190. "text": [
  191. " assist_scores\n",
  192. "assist_sports \n",
  193. "Basketball 8.518987\n",
  194. "Football 9.450980\n",
  195. " assist_scores\n",
  196. "non_assist_sports \n",
  197. "Basketball 8.882570\n",
  198. "Football 9.371036\n"
  199. ]
  200. }
  201. ],
  202. "source": [
  203. "print(assist_df.groupby('assist_sports').mean())\n",
  204. "print(non_assist_df.groupby('non_assist_sports').mean())"
  205. ]
  206. },
  207. {
  208. "cell_type": "code",
  209. "execution_count": null,
  210. "metadata": {
  211. "collapsed": true
  212. },
  213. "outputs": [],
  214. "source": []
  215. }
  216. ],
  217. "metadata": {
  218. "kernelspec": {
  219. "display_name": "Python 3",
  220. "language": "python",
  221. "name": "python3"
  222. },
  223. "language_info": {
  224. "codemirror_mode": {
  225. "name": "ipython",
  226. "version": 3
  227. },
  228. "file_extension": ".py",
  229. "mimetype": "text/x-python",
  230. "name": "python",
  231. "nbconvert_exporter": "python",
  232. "pygments_lexer": "ipython3",
  233. "version": "3.6.0"
  234. }
  235. },
  236. "nbformat": 4,
  237. "nbformat_minor": 2
  238. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement