Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Given:
- - CSV file with path: <INPUT_DIR>/heartRate-<YYYYMMDD>.csv
- - First line of CSV file: <COLUMN_TIME>,<COLUMN_HEARTRATE>
- - Rest of the lines: <Timestamp with format YYYY-MM-DD HH:mm:ss>,<Beats per minute>
- Then:
- - Create a new CSV file in: <OUTPUT_DIR>/heartRate-<YYYYMMDD>.csv
- - First line of CSV file: <COLUMN_TIME>,<COLUMN_HEARTRATE>,<COLUMN_WORKINGOUT>
- - Rest of the lines: <Timestamp with format YYYY-MM-DD HH:mm:ss>,<Beats per minute>,<True or False>
- The new column in the new CSV file indicates if, at each moment, the person
- which heart rate is recorded in this file, was working out or not.
- The values for the new column will be set to True between
- <WORKOUT_INIT_HOUR>:<WORKOUT_INIT_MIN>:<WORKOUT_INIT_SEC> and
- <WORKOUT_END_HOUR>:<WORKOUT_END_MIN>:<WORKOUT_END_SEC>.
- TIP: To set all values to False, set
- <WORKOUT_INIT_HOUR>:<WORKOUT_INIT_MIN>:<WORKOUT_INIT_SEC> to 02:00:00 and
- <WORKOUT_END_HOUR>:<WORKOUT_END_MIN>:<WORKOUT_END_SEC> to 01:00:00.
- NOTICE: This code plots 2 graphs, the first one based on the data in the input
- file and the second one equal to the first but coloring each dot with a
- different color depending on the value in the 3 column.
- TIP: Execute the first half of the script to find the range where the person
- if exercising, then update
- <WORKOUT_INIT_HOUR>:<WORKOUT_INIT_MIN>:<WORKOUT_INIT_SEC> and
- <WORKOUT_END_HOUR>:<WORKOUT_END_MIN>:<WORKOUT_END_SEC> and execute all the
- code.
- """
- # Imports
- from datetime import datetime as dt
- from os.path import join
- import matplotlib.pyplot as plt
- import pandas as pd
- # Constants
- COLUMN_TIME = 'Time'
- COLUMN_HEARTRATE = 'BPM'
- COLUMN_WORKINGOUT = 'WorkingOut'
- YEAR = 2016
- MONTH = 12
- DAY = 10
- WORKOUT_INIT_HOUR = 2
- WORKOUT_INIT_MIN = 0
- WORKOUT_INIT_SEC = 0
- WORKOUT_END_HOUR = 1
- WORKOUT_END_MIN = 0
- WORKOUT_END_SEC = 0
- DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
- INPUT_DIR = './Raw'
- OUTPUT_DIR = './Processed'
- FILENAME = 'heartRate-%d%02d%02d.csv' % (YEAR, MONTH, DAY)
- # Read dataset
- input_file = join(INPUT_DIR, FILENAME)
- dataset = pd.read_csv(input_file,
- parse_dates = [COLUMN_TIME],
- date_parser = lambda x: dt.strptime(x, DATE_FORMAT))
- dataset.dtypes
- # Plot dataset
- plt.plot(dataset[COLUMN_TIME].values,
- dataset[COLUMN_HEARTRATE].values,
- color = 'black')
- plt.scatter(dataset[COLUMN_TIME].values,
- dataset[COLUMN_HEARTRATE].values,
- color = 'black')
- plt.title(COLUMN_TIME + ' vs ' + COLUMN_HEARTRATE + ' (' + FILENAME + ')')
- plt.xlabel(COLUMN_TIME)
- plt.ylabel(COLUMN_HEARTRATE)
- plt.show()
- # Label dataset
- init_time = dt(YEAR, MONTH, DAY,
- WORKOUT_INIT_HOUR, WORKOUT_INIT_MIN, WORKOUT_INIT_SEC)
- end_time = dt(YEAR, MONTH, DAY,
- WORKOUT_END_HOUR, WORKOUT_END_MIN, WORKOUT_END_SEC)
- dataset[COLUMN_WORKINGOUT] = ((dataset[COLUMN_TIME] > init_time) &
- (dataset[COLUMN_TIME] < end_time))
- # Plot dataset
- plt.plot(dataset[COLUMN_TIME].values,
- dataset[COLUMN_HEARTRATE].values,
- color = 'black')
- plt.scatter(dataset[dataset[COLUMN_WORKINGOUT] == False][COLUMN_TIME].values,
- dataset[dataset[COLUMN_WORKINGOUT] == False][COLUMN_HEARTRATE].values,
- color = 'blue')
- plt.scatter(dataset[dataset[COLUMN_WORKINGOUT] == True][COLUMN_TIME].values,
- dataset[dataset[COLUMN_WORKINGOUT] == True][COLUMN_HEARTRATE].values,
- color = 'red')
- plt.title(COLUMN_TIME + ' vs ' + COLUMN_HEARTRATE + ' (' + FILENAME + ')')
- plt.xlabel(COLUMN_TIME)
- plt.ylabel(COLUMN_HEARTRATE)
- plt.show()
- # Export dataset
- output_file = join(OUTPUT_DIR, FILENAME)
- dataset.to_csv(output_file,
- index = False,
- date_format = DATE_FORMAT)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement