Advertisement
Guest User

Untitled

a guest
Oct 17th, 2019
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.70 KB | None | 0 0
  1. {
  2. "nbformat_minor": 1,
  3. "cells": [
  4. {
  5. "execution_count": 32,
  6. "cell_type": "code",
  7. "metadata": {},
  8. "outputs": [],
  9. "source": "# The code was removed by Watson Studio for sharing."
  10. },
  11. {
  12. "execution_count": 33,
  13. "cell_type": "code",
  14. "metadata": {},
  15. "outputs": [],
  16. "source": "# Fetch the file\nmy_file = project.get_file(\"data_exp_output.csv\")\n\n# Read the CSV data file from the object storage into a pandas DataFrame\nmy_file.seek(0)\nimport pandas as pd\nhData_HSI = pd.read_csv(my_file)"
  17. },
  18. {
  19. "execution_count": 34,
  20. "cell_type": "code",
  21. "metadata": {},
  22. "outputs": [
  23. {
  24. "output_type": "display_data",
  25. "data": {
  26. "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2009-01-02</td>\n <td>14448.22</td>\n <td>15042.81</td>\n <td>14412.12</td>\n <td>15042.81</td>\n <td>15042.81</td>\n <td>1752401800</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2009-01-05</td>\n <td>15349.33</td>\n <td>15563.31</td>\n <td>15128.32</td>\n <td>15563.31</td>\n <td>15563.31</td>\n <td>2172620600</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2009-01-06</td>\n <td>15612.47</td>\n <td>15651.61</td>\n <td>15367.93</td>\n <td>15509.51</td>\n <td>15509.51</td>\n <td>2484472200</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2009-01-07</td>\n <td>15759.53</td>\n <td>15763.55</td>\n <td>14976.74</td>\n <td>14987.46</td>\n <td>14987.46</td>\n <td>9799120000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2009-01-08</td>\n <td>14755.81</td>\n <td>14755.81</td>\n <td>14334.15</td>\n <td>14415.91</td>\n <td>14415.91</td>\n <td>4374435600</td>\n </tr>\n </tbody>\n</table>\n</div>",
  27. "text/plain": " Date Open High Low Close Adj Close Volume\n0 2009-01-02 14448.22 15042.81 14412.12 15042.81 15042.81 1752401800\n1 2009-01-05 15349.33 15563.31 15128.32 15563.31 15563.31 2172620600\n2 2009-01-06 15612.47 15651.61 15367.93 15509.51 15509.51 2484472200\n3 2009-01-07 15759.53 15763.55 14976.74 14987.46 14987.46 9799120000\n4 2009-01-08 14755.81 14755.81 14334.15 14415.91 14415.91 4374435600"
  28. },
  29. "metadata": {}
  30. },
  31. {
  32. "output_type": "display_data",
  33. "data": {
  34. "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2623</th>\n <td>2019-08-26</td>\n <td>25322.00</td>\n <td>25692.28</td>\n <td>25249.51</td>\n <td>25680.33</td>\n <td>25680.33</td>\n <td>2475604900</td>\n </tr>\n <tr>\n <th>2624</th>\n <td>2019-08-27</td>\n <td>25715.98</td>\n <td>25764.61</td>\n <td>25581.73</td>\n <td>25664.07</td>\n <td>25664.07</td>\n <td>2214167200</td>\n </tr>\n <tr>\n <th>2625</th>\n <td>2019-08-28</td>\n <td>25767.08</td>\n <td>25830.64</td>\n <td>25596.08</td>\n <td>25615.48</td>\n <td>25615.48</td>\n <td>1462114400</td>\n </tr>\n <tr>\n <th>2626</th>\n <td>2019-08-29</td>\n <td>25606.33</td>\n <td>25714.78</td>\n <td>25372.49</td>\n <td>25703.50</td>\n <td>25703.50</td>\n <td>1784869100</td>\n </tr>\n <tr>\n <th>2627</th>\n <td>2019-08-30</td>\n <td>26011.64</td>\n <td>26011.64</td>\n <td>25536.15</td>\n <td>25724.73</td>\n <td>25724.73</td>\n <td>2017892000</td>\n </tr>\n </tbody>\n</table>\n</div>",
  35. "text/plain": " Date Open High Low Close Adj Close \\\n2623 2019-08-26 25322.00 25692.28 25249.51 25680.33 25680.33 \n2624 2019-08-27 25715.98 25764.61 25581.73 25664.07 25664.07 \n2625 2019-08-28 25767.08 25830.64 25596.08 25615.48 25615.48 \n2626 2019-08-29 25606.33 25714.78 25372.49 25703.50 25703.50 \n2627 2019-08-30 26011.64 26011.64 25536.15 25724.73 25724.73 \n\n Volume \n2623 2475604900 \n2624 2214167200 \n2625 1462114400 \n2626 1784869100 \n2627 2017892000 "
  36. },
  37. "metadata": {}
  38. }
  39. ],
  40. "source": "display(hData_HSI.head())\ndisplay(hData_HSI.tail())"
  41. },
  42. {
  43. "execution_count": 35,
  44. "cell_type": "code",
  45. "metadata": {},
  46. "outputs": [
  47. {
  48. "output_type": "stream",
  49. "name": "stderr",
  50. "text": "/opt/ibm/conda/miniconda3.6/lib/python3.6/site-packages/sklearn/preprocessing/data.py:323: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by MinMaxScaler.\n return self.partial_fit(X, y)\n"
  51. }
  52. ],
  53. "source": "#Perform normalization on the price data \nfrom sklearn.preprocessing import MinMaxScaler\n\ndataset = hData_HSI.copy()\ndataset_withoutDate = dataset.drop(columns=[\"Date\"])\n\nscaler = MinMaxScaler(feature_range=(0, 1))\nnormalized_dataset = scaler.fit_transform(dataset_withoutDate)"
  54. },
  55. {
  56. "execution_count": 36,
  57. "cell_type": "code",
  58. "metadata": {},
  59. "outputs": [
  60. {
  61. "execution_count": 36,
  62. "metadata": {},
  63. "data": {
  64. "text/plain": "numpy.ndarray"
  65. },
  66. "output_type": "execute_result"
  67. }
  68. ],
  69. "source": "#Check the type of the normalized dataset\ntype(normalized_dataset)"
  70. },
  71. {
  72. "execution_count": 37,
  73. "cell_type": "code",
  74. "metadata": {},
  75. "outputs": [],
  76. "source": "# Transform the scaled numpy array back to pandas dataframe\nnormalized_df = pd.DataFrame(data=normalized_dataset, index=dataset.index, columns=dataset.columns[1:7])\nnormalized_df['Date'] = dataset['Date']\nnormalized_df = normalized_df[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]"
  77. },
  78. {
  79. "execution_count": 38,
  80. "cell_type": "code",
  81. "metadata": {},
  82. "outputs": [
  83. {
  84. "output_type": "display_data",
  85. "data": {
  86. "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2009-01-02</td>\n <td>0.133318</td>\n <td>0.151617</td>\n <td>0.142329</td>\n <td>0.169569</td>\n <td>0.169569</td>\n <td>0.178833</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2009-01-05</td>\n <td>0.174667</td>\n <td>0.175563</td>\n <td>0.175560</td>\n <td>0.193435</td>\n <td>0.193435</td>\n <td>0.221716</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2009-01-06</td>\n <td>0.186742</td>\n <td>0.179625</td>\n <td>0.186677</td>\n <td>0.190968</td>\n <td>0.190968</td>\n <td>0.253540</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2009-01-07</td>\n <td>0.193490</td>\n <td>0.184775</td>\n <td>0.168526</td>\n <td>0.167031</td>\n <td>0.167031</td>\n <td>1.000000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2009-01-08</td>\n <td>0.147432</td>\n <td>0.138414</td>\n <td>0.138711</td>\n <td>0.140825</td>\n <td>0.140825</td>\n <td>0.446411</td>\n </tr>\n </tbody>\n</table>\n</div>",
  87. "text/plain": " Date Open High Low Close Adj Close Volume\n0 2009-01-02 0.133318 0.151617 0.142329 0.169569 0.169569 0.178833\n1 2009-01-05 0.174667 0.175563 0.175560 0.193435 0.193435 0.221716\n2 2009-01-06 0.186742 0.179625 0.186677 0.190968 0.190968 0.253540\n3 2009-01-07 0.193490 0.184775 0.168526 0.167031 0.167031 1.000000\n4 2009-01-08 0.147432 0.138414 0.138711 0.140825 0.140825 0.446411"
  88. },
  89. "metadata": {}
  90. },
  91. {
  92. "output_type": "display_data",
  93. "data": {
  94. "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2623</th>\n <td>2019-08-26</td>\n <td>0.632284</td>\n <td>0.641542</td>\n <td>0.645167</td>\n <td>0.657316</td>\n <td>0.657316</td>\n <td>0.252635</td>\n </tr>\n <tr>\n <th>2624</th>\n <td>2019-08-27</td>\n <td>0.650363</td>\n <td>0.644869</td>\n <td>0.660581</td>\n <td>0.656570</td>\n <td>0.656570</td>\n <td>0.225956</td>\n </tr>\n <tr>\n <th>2625</th>\n <td>2019-08-28</td>\n <td>0.652708</td>\n <td>0.647907</td>\n <td>0.661247</td>\n <td>0.654342</td>\n <td>0.654342</td>\n <td>0.149209</td>\n </tr>\n <tr>\n <th>2626</th>\n <td>2019-08-29</td>\n <td>0.645331</td>\n <td>0.642577</td>\n <td>0.650873</td>\n <td>0.658378</td>\n <td>0.658378</td>\n <td>0.182146</td>\n </tr>\n <tr>\n <th>2627</th>\n <td>2019-08-30</td>\n <td>0.663930</td>\n <td>0.656234</td>\n <td>0.658466</td>\n <td>0.659351</td>\n <td>0.659351</td>\n <td>0.205926</td>\n </tr>\n </tbody>\n</table>\n</div>",
  95. "text/plain": " Date Open High Low Close Adj Close Volume\n2623 2019-08-26 0.632284 0.641542 0.645167 0.657316 0.657316 0.252635\n2624 2019-08-27 0.650363 0.644869 0.660581 0.656570 0.656570 0.225956\n2625 2019-08-28 0.652708 0.647907 0.661247 0.654342 0.654342 0.149209\n2626 2019-08-29 0.645331 0.642577 0.650873 0.658378 0.658378 0.182146\n2627 2019-08-30 0.663930 0.656234 0.658466 0.659351 0.659351 0.205926"
  96. },
  97. "metadata": {}
  98. }
  99. ],
  100. "source": "display(normalized_df.head())\ndisplay(normalized_df.tail())"
  101. },
  102. {
  103. "execution_count": 40,
  104. "cell_type": "code",
  105. "metadata": {},
  106. "outputs": [
  107. {
  108. "execution_count": 40,
  109. "metadata": {},
  110. "data": {
  111. "text/plain": "{'file_name': 'etl_output.csv',\n 'message': 'File saved to project storage.',\n 'bucket_name': 'capstone-donotdelete-pr-m3ibiajq1tejos',\n 'asset_id': '5992cc5d-4e75-4c0e-bb64-034db8526b67'}"
  112. },
  113. "output_type": "execute_result"
  114. }
  115. ],
  116. "source": "#Save the processed dataset with my added features \nproject.save_data(\"etl_output.csv\", normalized_df.to_csv(), overwrite=True)"
  117. }
  118. ],
  119. "metadata": {
  120. "kernelspec": {
  121. "display_name": "Python 3.6 with Spark",
  122. "name": "python36",
  123. "language": "python3"
  124. },
  125. "language_info": {
  126. "mimetype": "text/x-python",
  127. "nbconvert_exporter": "python",
  128. "version": "3.6.8",
  129. "name": "python",
  130. "file_extension": ".py",
  131. "pygments_lexer": "ipython3",
  132. "codemirror_mode": {
  133. "version": 3,
  134. "name": "ipython"
  135. }
  136. }
  137. },
  138. "nbformat": 4
  139. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement