Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "nbformat_minor": 1,
- "cells": [
- {
- "execution_count": 32,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "# The code was removed by Watson Studio for sharing."
- },
- {
- "execution_count": 33,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "# Fetch the file\nmy_file = project.get_file(\"data_exp_output.csv\")\n\n# Read the CSV data file from the object storage into a pandas DataFrame\nmy_file.seek(0)\nimport pandas as pd\nhData_HSI = pd.read_csv(my_file)"
- },
- {
- "execution_count": 34,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2009-01-02</td>\n <td>14448.22</td>\n <td>15042.81</td>\n <td>14412.12</td>\n <td>15042.81</td>\n <td>15042.81</td>\n <td>1752401800</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2009-01-05</td>\n <td>15349.33</td>\n <td>15563.31</td>\n <td>15128.32</td>\n <td>15563.31</td>\n <td>15563.31</td>\n <td>2172620600</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2009-01-06</td>\n <td>15612.47</td>\n <td>15651.61</td>\n <td>15367.93</td>\n <td>15509.51</td>\n <td>15509.51</td>\n <td>2484472200</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2009-01-07</td>\n <td>15759.53</td>\n <td>15763.55</td>\n <td>14976.74</td>\n <td>14987.46</td>\n <td>14987.46</td>\n <td>9799120000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2009-01-08</td>\n <td>14755.81</td>\n <td>14755.81</td>\n <td>14334.15</td>\n <td>14415.91</td>\n <td>14415.91</td>\n <td>4374435600</td>\n </tr>\n </tbody>\n</table>\n</div>",
- "text/plain": " Date Open High Low Close Adj Close Volume\n0 2009-01-02 14448.22 15042.81 14412.12 15042.81 15042.81 1752401800\n1 2009-01-05 15349.33 15563.31 15128.32 15563.31 15563.31 2172620600\n2 2009-01-06 15612.47 15651.61 15367.93 15509.51 15509.51 2484472200\n3 2009-01-07 15759.53 15763.55 14976.74 14987.46 14987.46 9799120000\n4 2009-01-08 14755.81 14755.81 14334.15 14415.91 14415.91 4374435600"
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2623</th>\n <td>2019-08-26</td>\n <td>25322.00</td>\n <td>25692.28</td>\n <td>25249.51</td>\n <td>25680.33</td>\n <td>25680.33</td>\n <td>2475604900</td>\n </tr>\n <tr>\n <th>2624</th>\n <td>2019-08-27</td>\n <td>25715.98</td>\n <td>25764.61</td>\n <td>25581.73</td>\n <td>25664.07</td>\n <td>25664.07</td>\n <td>2214167200</td>\n </tr>\n <tr>\n <th>2625</th>\n <td>2019-08-28</td>\n <td>25767.08</td>\n <td>25830.64</td>\n <td>25596.08</td>\n <td>25615.48</td>\n <td>25615.48</td>\n <td>1462114400</td>\n </tr>\n <tr>\n <th>2626</th>\n <td>2019-08-29</td>\n <td>25606.33</td>\n <td>25714.78</td>\n <td>25372.49</td>\n <td>25703.50</td>\n <td>25703.50</td>\n <td>1784869100</td>\n </tr>\n <tr>\n <th>2627</th>\n <td>2019-08-30</td>\n <td>26011.64</td>\n <td>26011.64</td>\n <td>25536.15</td>\n <td>25724.73</td>\n <td>25724.73</td>\n <td>2017892000</td>\n </tr>\n </tbody>\n</table>\n</div>",
- "text/plain": " Date Open High Low Close Adj Close \\\n2623 2019-08-26 25322.00 25692.28 25249.51 25680.33 25680.33 \n2624 2019-08-27 25715.98 25764.61 25581.73 25664.07 25664.07 \n2625 2019-08-28 25767.08 25830.64 25596.08 25615.48 25615.48 \n2626 2019-08-29 25606.33 25714.78 25372.49 25703.50 25703.50 \n2627 2019-08-30 26011.64 26011.64 25536.15 25724.73 25724.73 \n\n Volume \n2623 2475604900 \n2624 2214167200 \n2625 1462114400 \n2626 1784869100 \n2627 2017892000 "
- },
- "metadata": {}
- }
- ],
- "source": "display(hData_HSI.head())\ndisplay(hData_HSI.tail())"
- },
- {
- "execution_count": 35,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": "/opt/ibm/conda/miniconda3.6/lib/python3.6/site-packages/sklearn/preprocessing/data.py:323: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by MinMaxScaler.\n return self.partial_fit(X, y)\n"
- }
- ],
- "source": "#Perform normalization on the price data \nfrom sklearn.preprocessing import MinMaxScaler\n\ndataset = hData_HSI.copy()\ndataset_withoutDate = dataset.drop(columns=[\"Date\"])\n\nscaler = MinMaxScaler(feature_range=(0, 1))\nnormalized_dataset = scaler.fit_transform(dataset_withoutDate)"
- },
- {
- "execution_count": 36,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "execution_count": 36,
- "metadata": {},
- "data": {
- "text/plain": "numpy.ndarray"
- },
- "output_type": "execute_result"
- }
- ],
- "source": "#Check the type of the normalized dataset\ntype(normalized_dataset)"
- },
- {
- "execution_count": 37,
- "cell_type": "code",
- "metadata": {},
- "outputs": [],
- "source": "# Transform the scaled numpy array back to pandas dataframe\nnormalized_df = pd.DataFrame(data=normalized_dataset, index=dataset.index, columns=dataset.columns[1:7])\nnormalized_df['Date'] = dataset['Date']\nnormalized_df = normalized_df[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]"
- },
- {
- "execution_count": 38,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2009-01-02</td>\n <td>0.133318</td>\n <td>0.151617</td>\n <td>0.142329</td>\n <td>0.169569</td>\n <td>0.169569</td>\n <td>0.178833</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2009-01-05</td>\n <td>0.174667</td>\n <td>0.175563</td>\n <td>0.175560</td>\n <td>0.193435</td>\n <td>0.193435</td>\n <td>0.221716</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2009-01-06</td>\n <td>0.186742</td>\n <td>0.179625</td>\n <td>0.186677</td>\n <td>0.190968</td>\n <td>0.190968</td>\n <td>0.253540</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2009-01-07</td>\n <td>0.193490</td>\n <td>0.184775</td>\n <td>0.168526</td>\n <td>0.167031</td>\n <td>0.167031</td>\n <td>1.000000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2009-01-08</td>\n <td>0.147432</td>\n <td>0.138414</td>\n <td>0.138711</td>\n <td>0.140825</td>\n <td>0.140825</td>\n <td>0.446411</td>\n </tr>\n </tbody>\n</table>\n</div>",
- "text/plain": " Date Open High Low Close Adj Close Volume\n0 2009-01-02 0.133318 0.151617 0.142329 0.169569 0.169569 0.178833\n1 2009-01-05 0.174667 0.175563 0.175560 0.193435 0.193435 0.221716\n2 2009-01-06 0.186742 0.179625 0.186677 0.190968 0.190968 0.253540\n3 2009-01-07 0.193490 0.184775 0.168526 0.167031 0.167031 1.000000\n4 2009-01-08 0.147432 0.138414 0.138711 0.140825 0.140825 0.446411"
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2623</th>\n <td>2019-08-26</td>\n <td>0.632284</td>\n <td>0.641542</td>\n <td>0.645167</td>\n <td>0.657316</td>\n <td>0.657316</td>\n <td>0.252635</td>\n </tr>\n <tr>\n <th>2624</th>\n <td>2019-08-27</td>\n <td>0.650363</td>\n <td>0.644869</td>\n <td>0.660581</td>\n <td>0.656570</td>\n <td>0.656570</td>\n <td>0.225956</td>\n </tr>\n <tr>\n <th>2625</th>\n <td>2019-08-28</td>\n <td>0.652708</td>\n <td>0.647907</td>\n <td>0.661247</td>\n <td>0.654342</td>\n <td>0.654342</td>\n <td>0.149209</td>\n </tr>\n <tr>\n <th>2626</th>\n <td>2019-08-29</td>\n <td>0.645331</td>\n <td>0.642577</td>\n <td>0.650873</td>\n <td>0.658378</td>\n <td>0.658378</td>\n <td>0.182146</td>\n </tr>\n <tr>\n <th>2627</th>\n <td>2019-08-30</td>\n <td>0.663930</td>\n <td>0.656234</td>\n <td>0.658466</td>\n <td>0.659351</td>\n <td>0.659351</td>\n <td>0.205926</td>\n </tr>\n </tbody>\n</table>\n</div>",
- "text/plain": " Date Open High Low Close Adj Close Volume\n2623 2019-08-26 0.632284 0.641542 0.645167 0.657316 0.657316 0.252635\n2624 2019-08-27 0.650363 0.644869 0.660581 0.656570 0.656570 0.225956\n2625 2019-08-28 0.652708 0.647907 0.661247 0.654342 0.654342 0.149209\n2626 2019-08-29 0.645331 0.642577 0.650873 0.658378 0.658378 0.182146\n2627 2019-08-30 0.663930 0.656234 0.658466 0.659351 0.659351 0.205926"
- },
- "metadata": {}
- }
- ],
- "source": "display(normalized_df.head())\ndisplay(normalized_df.tail())"
- },
- {
- "execution_count": 40,
- "cell_type": "code",
- "metadata": {},
- "outputs": [
- {
- "execution_count": 40,
- "metadata": {},
- "data": {
- "text/plain": "{'file_name': 'etl_output.csv',\n 'message': 'File saved to project storage.',\n 'bucket_name': 'capstone-donotdelete-pr-m3ibiajq1tejos',\n 'asset_id': '5992cc5d-4e75-4c0e-bb64-034db8526b67'}"
- },
- "output_type": "execute_result"
- }
- ],
- "source": "#Save the processed dataset with my added features \nproject.save_data(\"etl_output.csv\", normalized_df.to_csv(), overwrite=True)"
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3.6 with Spark",
- "name": "python36",
- "language": "python3"
- },
- "language_info": {
- "mimetype": "text/x-python",
- "nbconvert_exporter": "python",
- "version": "3.6.8",
- "name": "python",
- "file_extension": ".py",
- "pygments_lexer": "ipython3",
- "codemirror_mode": {
- "version": 3,
- "name": "ipython"
- }
- }
- },
- "nbformat": 4
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement