SHARE
TWEET

Workout-1

a guest Nov 16th, 2017 46 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. ## about
  3.  
  4. # Data comes from my training log.
  5. # Search for "concl:" in order to find actionable insights
  6.  
  7. ## packages
  8.  
  9. import numpy as np
  10. import pandas as pd
  11. import matplotlib.pyplot as plt
  12.  
  13. ## import data
  14.  
  15. # cd to folder /Documents/python/trainingdata/
  16. df_raw = pd.read_csv('Traaining.1-report.csv', parse_dates=True)
  17.  
  18. # todo: import automatically using https://github.com/underdogio/typeform
  19.  
  20. # Remove test entries
  21. # first real training date is 2017-01-17 on row 3. entries 0:2 are tests
  22. df = df_raw[3:]
  23.  
  24. ## rename columns
  25.  
  26. colnames = ['IDnumber', 'Date', 'Yoga', 'Cardio_time', 'Legsfront', 'Legsback',
  27.        'Ass', 'Calf', 'Back', 'Biceps', 'Chest', 'Shoulders', 'Triceps',
  28.        'Other', 'Muscles_time', 'Stretch_time', 'Notes',
  29.        'StartDate(TC)', 'SubmitDate(TC)', 'NetworkID']
  30. df.columns = colnames
  31.  
  32. ## keep only some columns
  33.  
  34. keepcols = ['Date', 'Yoga', 'Cardio_time', 'Legsfront', 'Legsback',
  35.        'Ass', 'Calf', 'Back', 'Biceps', 'Chest', 'Shoulders', 'Triceps',
  36.        'Other', 'Muscles_time', 'Stretch_time', 'Notes']
  37. df = df[keepcols]
  38.  
  39. ## Create binary variables and recode NaN
  40.  
  41. # map binaryvars from NaN to 0 and map from not NaN to 1.
  42. binaryvars = ['Legsfront', 'Legsback', 'Ass', 'Calf', 'Back',
  43.              'Biceps', 'Chest', 'Shoulders', 'Triceps']
  44. df[binaryvars] = df[binaryvars].notnull().astype(int)
  45. # map from NaN to 0. this affects e.g. Cardio_time, Other and Notes.
  46. df.fillna(0, inplace=True)
  47.  
  48. ##  Explanation of why I need to create these varaibles
  49.  
  50. # From df.head() we see 'Chest'='NaN' when I have not trained my chest,
  51. # and 'Chest'='Bröst' when I have trained chest (bröst is swedish for chest).
  52. # df.info() reveals the same problem: dtype=objects but I want dtype=float64
  53. # Hence we must map from 'NaN' to 0, and from 'some-text-not-NaN' to 1.
  54. #
  55. # Why did this problem occur? In typeform I have a multiple choice question,
  56. # apparently when downloading to csv each choice is converted to a variable.
  57.  
  58. ## Chestday vs Legday
  59.  
  60. # It is chestday if I have done any of these: Chest, Triceps
  61. df['Chestday'] = ( (df['Chest'] == 1)   |
  62.                    (df['Triceps'] == 1)
  63.                   )
  64. # It is legday if I have done any of these: Legsfront, Legsback, Calf.
  65. df['Legday'] = ( (df['Legsfront'] == 1) |
  66.                  (df['Legsback'] == 1)  |
  67.                  (df['Calf'] == 1)
  68.                 )
  69. # below is True. there is no rows with both Leg day and chest day.
  70. df[(df['Chestday'] == True) & (df['Legday'] == True)].count().sum() == 0
  71. # below is False. some days are only yoga / cardio / back, not legs / chest.
  72. df[(df['Chestday'] == False) & (df['Legday'] == False)].count().sum() == 0
  73.  
  74. # is legday vs chestday balanced?
  75. print(sum(df['Chestday']))
  76. print(sum(df['Legday']))
  77.  
  78. ## create variable: Daycategory
  79.  
  80. # less than cardiothreshold minutes of cardio is merely warmup
  81. cardiothreshold = 30
  82.  
  83. # function to go from binary variables to a categorical variable
  84. def daycategorizer(row):
  85.     if (row['Cardio_time'] > cardiothreshold) & (row['Muscles_time'] > 0):
  86.         day = 'Cardio & Muscle'
  87.     elif row['Cardio_time'] > cardiothreshold:
  88.         day = 'Cardio'
  89.     elif row['Chestday'] == True:
  90.         day = 'Chest'
  91.     elif row['Legday'] == True:
  92.         day = 'Leg'
  93.     elif row['Back'] == True:
  94.         day = 'Back'
  95.     elif row['Yoga'] == 1:
  96.         day = 'Yoga'
  97.     else:
  98.         day = 'Other' #
  99.     return day
  100.  
  101. # go from binary variables to a categorical variable
  102. df['Daycategory'] = df.apply(daycategorizer, axis=1)
  103. # type is categorical
  104. df['Daycategory'] = df['Daycategory'].astype('category')
  105. # frequency table
  106. print(df['Daycategory'].value_counts() / df['Daycategory'].count() * 100)
  107. # concl: good split of types
  108.  
  109. ## create variable: Training_time
  110.  
  111. df['Training_time'] = df['Cardio_time'] + df['Muscles_time'] + df['Stretch_time']
  112.  
  113. ## create variable: Datetime, week
  114.  
  115. df['Datetime'] = pd.to_datetime(df['Date'])
  116. df['Week'] = df['Datetime'].dt.week
  117. df = df.drop('Date', 1)
  118.  
  119. ## Is Training_time related to category?
  120.  
  121. df.pivot_table(index='Daycategory', values='Training_time', aggfunc='sum')
  122.  
  123. # todo: more pivot tables. ask questions.
  124.  
  125. ## Histograms of training time
  126.  
  127. # Hist for total time
  128. pd.DataFrame.hist(df, column='Training_time')
  129. # concl: I see two groups. maybe legday is a long workout and chestday is shorter.
  130.  
  131. # Hist by daycateogry
  132. # pd.DataFrame.hist(df, column='Training_time', by='Daycategory')
  133. # this histogram has too few datapoints - must split by legday/chestday instead
  134.  
  135. # Hist for legdays
  136. pd.DataFrame.hist(df[df.Legday==True], column='Muscles_time')
  137. plt.title('Muscles_time for Legdays')
  138. # Hist for chestdays
  139. pd.DataFrame.hist(df[df.Chestday==True], column='Muscles_time')
  140. plt.title('Muscles_time for Chestdays')
  141.  
  142. # Stretch_time
  143. print(df['Stretch_time'].describe())
  144. # concl: Stretch_time must go up! stretch every session!
  145.  
  146. ## Last 6 traning sessions - this decides what next excerceise will be
  147.  
  148. df[['Datetime', 'Daycategory']].tail(6)
  149. df.tail(6)
  150.  
  151. ## Days since last training
  152.  
  153. nrows = df['Datetime'].count()
  154. pd.to_datetime('today') - pd.to_datetime(df['Datetime'][nrows])
  155.  
  156. ## How much cardio per week?
  157.  
  158. # Hist of Cardio_time (ignoring warmups)
  159. warmup_threshold = 10
  160. pd.DataFrame.hist(df[df.Cardio_time > warmup_threshold], column='Cardio_time')
  161.  
  162. # Cardio_time and Muscles_time, per week
  163. tbl1 = pd.pivot_table(df, index='Week',
  164.                           values=['Cardio_time', 'Muscles_time'],
  165.                           aggfunc='sum')
  166. tbl1
  167.  
  168. # number of weeks with no cardio for an entire week
  169. tbl1[tbl1.Cardio_time == 0].Cardio_time.count()
  170. # fraction of weeks with cardio
  171. n_days = (df['Datetime'].max() - df['Datetime'].min())
  172. n_days = (n_days / np.timedelta64(1, 'D')).astype(int)
  173. n_weeks = n_days / 7
  174. print(tbl1[tbl1.Cardio_time == 0].Cardio_time.count() / n_weeks * 100)
  175. # concl: too many weeks that I do zero cardio!
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top