Advertisement
Guest User

Workout-1

a guest
Nov 16th, 2017
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.66 KB | None | 0 0
  1.  
  2. ## about
  3.  
  4. # Data comes from my training log.
  5. # Search for "concl:" in order to find actionable insights
  6.  
  7. ## packages
  8.  
  9. import numpy as np
  10. import pandas as pd
  11. import matplotlib.pyplot as plt
  12.  
  13. ## import data
  14.  
  15. # cd to folder /Documents/python/trainingdata/
  16. df_raw = pd.read_csv('Traaining.1-report.csv', parse_dates=True)
  17.  
  18. # todo: import automatically using https://github.com/underdogio/typeform
  19.  
  20. # Remove test entries
  21. # first real training date is 2017-01-17 on row 3. entries 0:2 are tests
  22. df = df_raw[3:]
  23.  
  24. ## rename columns
  25.  
  26. colnames = ['IDnumber', 'Date', 'Yoga', 'Cardio_time', 'Legsfront', 'Legsback',
  27. 'Ass', 'Calf', 'Back', 'Biceps', 'Chest', 'Shoulders', 'Triceps',
  28. 'Other', 'Muscles_time', 'Stretch_time', 'Notes',
  29. 'StartDate(TC)', 'SubmitDate(TC)', 'NetworkID']
  30. df.columns = colnames
  31.  
  32. ## keep only some columns
  33.  
  34. keepcols = ['Date', 'Yoga', 'Cardio_time', 'Legsfront', 'Legsback',
  35. 'Ass', 'Calf', 'Back', 'Biceps', 'Chest', 'Shoulders', 'Triceps',
  36. 'Other', 'Muscles_time', 'Stretch_time', 'Notes']
  37. df = df[keepcols]
  38.  
  39. ## Create binary variables and recode NaN
  40.  
  41. # map binaryvars from NaN to 0 and map from not NaN to 1.
  42. binaryvars = ['Legsfront', 'Legsback', 'Ass', 'Calf', 'Back',
  43. 'Biceps', 'Chest', 'Shoulders', 'Triceps']
  44. df[binaryvars] = df[binaryvars].notnull().astype(int)
  45. # map from NaN to 0. this affects e.g. Cardio_time, Other and Notes.
  46. df.fillna(0, inplace=True)
  47.  
  48. ## Explanation of why I need to create these varaibles
  49.  
  50. # From df.head() we see 'Chest'='NaN' when I have not trained my chest,
  51. # and 'Chest'='Bröst' when I have trained chest (bröst is swedish for chest).
  52. # df.info() reveals the same problem: dtype=objects but I want dtype=float64
  53. # Hence we must map from 'NaN' to 0, and from 'some-text-not-NaN' to 1.
  54. #
  55. # Why did this problem occur? In typeform I have a multiple choice question,
  56. # apparently when downloading to csv each choice is converted to a variable.
  57.  
  58. ## Chestday vs Legday
  59.  
  60. # It is chestday if I have done any of these: Chest, Triceps
  61. df['Chestday'] = ( (df['Chest'] == 1) |
  62. (df['Triceps'] == 1)
  63. )
  64. # It is legday if I have done any of these: Legsfront, Legsback, Calf.
  65. df['Legday'] = ( (df['Legsfront'] == 1) |
  66. (df['Legsback'] == 1) |
  67. (df['Calf'] == 1)
  68. )
  69. # below is True. there is no rows with both Leg day and chest day.
  70. df[(df['Chestday'] == True) & (df['Legday'] == True)].count().sum() == 0
  71. # below is False. some days are only yoga / cardio / back, not legs / chest.
  72. df[(df['Chestday'] == False) & (df['Legday'] == False)].count().sum() == 0
  73.  
  74. # is legday vs chestday balanced?
  75. print(sum(df['Chestday']))
  76. print(sum(df['Legday']))
  77.  
  78. ## create variable: Daycategory
  79.  
  80. # less than cardiothreshold minutes of cardio is merely warmup
  81. cardiothreshold = 30
  82.  
  83. # function to go from binary variables to a categorical variable
  84. def daycategorizer(row):
  85. if (row['Cardio_time'] > cardiothreshold) & (row['Muscles_time'] > 0):
  86. day = 'Cardio & Muscle'
  87. elif row['Cardio_time'] > cardiothreshold:
  88. day = 'Cardio'
  89. elif row['Chestday'] == True:
  90. day = 'Chest'
  91. elif row['Legday'] == True:
  92. day = 'Leg'
  93. elif row['Back'] == True:
  94. day = 'Back'
  95. elif row['Yoga'] == 1:
  96. day = 'Yoga'
  97. else:
  98. day = 'Other' #
  99. return day
  100.  
  101. # go from binary variables to a categorical variable
  102. df['Daycategory'] = df.apply(daycategorizer, axis=1)
  103. # type is categorical
  104. df['Daycategory'] = df['Daycategory'].astype('category')
  105. # frequency table
  106. print(df['Daycategory'].value_counts() / df['Daycategory'].count() * 100)
  107. # concl: good split of types
  108.  
  109. ## create variable: Training_time
  110.  
  111. df['Training_time'] = df['Cardio_time'] + df['Muscles_time'] + df['Stretch_time']
  112.  
  113. ## create variable: Datetime, week
  114.  
  115. df['Datetime'] = pd.to_datetime(df['Date'])
  116. df['Week'] = df['Datetime'].dt.week
  117. df = df.drop('Date', 1)
  118.  
  119. ## Is Training_time related to category?
  120.  
  121. df.pivot_table(index='Daycategory', values='Training_time', aggfunc='sum')
  122.  
  123. # todo: more pivot tables. ask questions.
  124.  
  125. ## Histograms of training time
  126.  
  127. # Hist for total time
  128. pd.DataFrame.hist(df, column='Training_time')
  129. # concl: I see two groups. maybe legday is a long workout and chestday is shorter.
  130.  
  131. # Hist by daycateogry
  132. # pd.DataFrame.hist(df, column='Training_time', by='Daycategory')
  133. # this histogram has too few datapoints - must split by legday/chestday instead
  134.  
  135. # Hist for legdays
  136. pd.DataFrame.hist(df[df.Legday==True], column='Muscles_time')
  137. plt.title('Muscles_time for Legdays')
  138. # Hist for chestdays
  139. pd.DataFrame.hist(df[df.Chestday==True], column='Muscles_time')
  140. plt.title('Muscles_time for Chestdays')
  141.  
  142. # Stretch_time
  143. print(df['Stretch_time'].describe())
  144. # concl: Stretch_time must go up! stretch every session!
  145.  
  146. ## Last 6 traning sessions - this decides what next excerceise will be
  147.  
  148. df[['Datetime', 'Daycategory']].tail(6)
  149. df.tail(6)
  150.  
  151. ## Days since last training
  152.  
  153. nrows = df['Datetime'].count()
  154. pd.to_datetime('today') - pd.to_datetime(df['Datetime'][nrows])
  155.  
  156. ## How much cardio per week?
  157.  
  158. # Hist of Cardio_time (ignoring warmups)
  159. warmup_threshold = 10
  160. pd.DataFrame.hist(df[df.Cardio_time > warmup_threshold], column='Cardio_time')
  161.  
  162. # Cardio_time and Muscles_time, per week
  163. tbl1 = pd.pivot_table(df, index='Week',
  164. values=['Cardio_time', 'Muscles_time'],
  165. aggfunc='sum')
  166. tbl1
  167.  
  168. # number of weeks with no cardio for an entire week
  169. tbl1[tbl1.Cardio_time == 0].Cardio_time.count()
  170. # fraction of weeks with cardio
  171. n_days = (df['Datetime'].max() - df['Datetime'].min())
  172. n_days = (n_days / np.timedelta64(1, 'D')).astype(int)
  173. n_weeks = n_days / 7
  174. print(tbl1[tbl1.Cardio_time == 0].Cardio_time.count() / n_weeks * 100)
  175. # concl: too many weeks that I do zero cardio!
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement