Advertisement
Guest User

Untitled

a guest
Oct 17th, 2019
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 22.50 KB | None | 0 0
  1. {
  2. "nbformat_minor": 1,
  3. "cells": [
  4. {
  5. "execution_count": 1,
  6. "cell_type": "code",
  7. "metadata": {},
  8. "outputs": [
  9. {
  10. "output_type": "stream",
  11. "name": "stdout",
  12. "text": "Waiting for a Spark session to start...\nSpark Initialization Done! ApplicationId = app-20190929092647-0000\nKERNEL_ID = 3bb5dd9c-69fd-4cde-9252-fa2e81138105\n"
  13. }
  14. ],
  15. "source": "# The code was removed by Watson Studio for sharing."
  16. },
  17. {
  18. "execution_count": 2,
  19. "cell_type": "code",
  20. "metadata": {},
  21. "outputs": [],
  22. "source": "# Fetch the file\nmy_file = project.get_file(\"etl_output.csv\")\n\n# Read the CSV data file from the object storage into a pandas DataFrame\nmy_file.seek(0)\nimport pandas as pd\ndf = pd.read_csv(my_file)"
  23. },
  24. {
  25. "execution_count": 3,
  26. "cell_type": "code",
  27. "metadata": {},
  28. "outputs": [
  29. {
  30. "output_type": "display_data",
  31. "data": {
  32. "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unnamed: 0</th>\n <th>Date</th>\n <th>Open</th>\n <th>High</th>\n <th>Low</th>\n <th>Close</th>\n <th>Adj Close</th>\n <th>Volume</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>2009-01-02</td>\n <td>0.133318</td>\n <td>0.151617</td>\n <td>0.142329</td>\n <td>0.169569</td>\n <td>0.169569</td>\n <td>0.178833</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>2009-01-05</td>\n <td>0.174667</td>\n <td>0.175563</td>\n <td>0.175560</td>\n <td>0.193435</td>\n <td>0.193435</td>\n <td>0.221716</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2</td>\n <td>2009-01-06</td>\n <td>0.186742</td>\n <td>0.179625</td>\n <td>0.186677</td>\n <td>0.190968</td>\n <td>0.190968</td>\n <td>0.253540</td>\n </tr>\n <tr>\n <th>3</th>\n <td>3</td>\n <td>2009-01-07</td>\n <td>0.193490</td>\n <td>0.184775</td>\n <td>0.168526</td>\n <td>0.167031</td>\n <td>0.167031</td>\n <td>1.000000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>4</td>\n <td>2009-01-08</td>\n <td>0.147432</td>\n <td>0.138414</td>\n <td>0.138711</td>\n <td>0.140825</td>\n <td>0.140825</td>\n <td>0.446411</td>\n </tr>\n </tbody>\n</table>\n</div>",
  33. "text/plain": " Unnamed: 0 Date Open High Low Close Adj Close \\\n0 0 2009-01-02 0.133318 0.151617 0.142329 0.169569 0.169569 \n1 1 2009-01-05 0.174667 0.175563 0.175560 0.193435 0.193435 \n2 2 2009-01-06 0.186742 0.179625 0.186677 0.190968 0.190968 \n3 3 2009-01-07 0.193490 0.184775 0.168526 0.167031 0.167031 \n4 4 2009-01-08 0.147432 0.138414 0.138711 0.140825 0.140825 \n\n Volume \n0 0.178833 \n1 0.221716 \n2 0.253540 \n3 1.000000 \n4 0.446411 "
  34. },
  35. "metadata": {}
  36. }
  37. ],
  38. "source": "#Quick navigate on the dataframe\ndisplay(df.head())"
  39. },
  40. {
  41. "execution_count": 4,
  42. "cell_type": "code",
  43. "metadata": {},
  44. "outputs": [],
  45. "source": "#Necessary imports\nfrom datetime import datetime\nimport numpy as np"
  46. },
  47. {
  48. "execution_count": 5,
  49. "cell_type": "code",
  50. "metadata": {},
  51. "outputs": [],
  52. "source": "#The window (number of trading days) to look back for the historical prices\nLOOKBACK = 10\n\n#Prepare the dataframe with the lookback-window historical close prices\ndf_lookback = pd.DataFrame(index=range(0,len(df)),columns=['Date', 'Close'])\n\nfor i in range(1,LOOKBACK+1):\n df_lookback[\"PREV_CLOSE\" + str(i)] = 0.0\n\nfor i in range(0,len(df)):\n df_lookback.loc[i, 'Date'] = df.loc[i,'Date']\n df_lookback.loc[i,'Close'] = df.loc[i, 'Close']\n \n for j in range(1,LOOKBACK+1):\n if i>LOOKBACK:\n df_lookback.loc[i, \"PREV_CLOSE\" + str(j)] = df.loc[i-j, 'Close'];\n else:\n df_lookback.loc[i, \"PREV_CLOSE\" + str(j)] = np.NaN"
  53. },
  54. {
  55. "execution_count": 6,
  56. "cell_type": "code",
  57. "metadata": {},
  58. "outputs": [
  59. {
  60. "output_type": "display_data",
  61. "data": {
  62. "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Close</th>\n <th>PREV_CLOSE1</th>\n <th>PREV_CLOSE2</th>\n <th>PREV_CLOSE3</th>\n <th>PREV_CLOSE4</th>\n <th>PREV_CLOSE5</th>\n <th>PREV_CLOSE6</th>\n <th>PREV_CLOSE7</th>\n <th>PREV_CLOSE8</th>\n <th>PREV_CLOSE9</th>\n <th>PREV_CLOSE10</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2009-01-02</td>\n <td>0.169569</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2009-01-05</td>\n <td>0.193435</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2009-01-06</td>\n <td>0.190968</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2009-01-07</td>\n <td>0.167031</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2009-01-08</td>\n <td>0.140825</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>5</th>\n <td>2009-01-09</td>\n <td>0.139061</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>6</th>\n <td>2009-01-12</td>\n <td>0.120425</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>7</th>\n <td>2009-01-13</td>\n <td>0.106535</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>8</th>\n <td>2009-01-14</td>\n <td>0.108211</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>9</th>\n <td>2009-01-15</td>\n <td>0.0870436</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>10</th>\n <td>2009-01-16</td>\n <td>0.087619</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>11</th>\n <td>2009-01-19</td>\n <td>0.0914925</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n <td>0.140825</td>\n <td>0.167031</td>\n <td>0.190968</td>\n <td>0.193435</td>\n </tr>\n <tr>\n <th>12</th>\n <td>2009-01-20</td>\n <td>0.0740589</td>\n <td>0.091493</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n <td>0.140825</td>\n <td>0.167031</td>\n <td>0.190968</td>\n </tr>\n <tr>\n <th>13</th>\n <td>2009-01-21</td>\n <td>0.0568123</td>\n <td>0.074059</td>\n <td>0.091493</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n <td>0.140825</td>\n <td>0.167031</td>\n </tr>\n <tr>\n <th>14</th>\n <td>2009-01-22</td>\n <td>0.0602218</td>\n <td>0.056812</td>\n <td>0.074059</td>\n <td>0.091493</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n <td>0.140825</td>\n </tr>\n </tbody>\n</table>\n</div>",
  63. "text/plain": " Date Close PREV_CLOSE1 PREV_CLOSE2 PREV_CLOSE3 PREV_CLOSE4 \\\n0 2009-01-02 0.169569 NaN NaN NaN NaN \n1 2009-01-05 0.193435 NaN NaN NaN NaN \n2 2009-01-06 0.190968 NaN NaN NaN NaN \n3 2009-01-07 0.167031 NaN NaN NaN NaN \n4 2009-01-08 0.140825 NaN NaN NaN NaN \n5 2009-01-09 0.139061 NaN NaN NaN NaN \n6 2009-01-12 0.120425 NaN NaN NaN NaN \n7 2009-01-13 0.106535 NaN NaN NaN NaN \n8 2009-01-14 0.108211 NaN NaN NaN NaN \n9 2009-01-15 0.0870436 NaN NaN NaN NaN \n10 2009-01-16 0.087619 NaN NaN NaN NaN \n11 2009-01-19 0.0914925 0.087619 0.087044 0.108211 0.106535 \n12 2009-01-20 0.0740589 0.091493 0.087619 0.087044 0.108211 \n13 2009-01-21 0.0568123 0.074059 0.091493 0.087619 0.087044 \n14 2009-01-22 0.0602218 0.056812 0.074059 0.091493 0.087619 \n\n PREV_CLOSE5 PREV_CLOSE6 PREV_CLOSE7 PREV_CLOSE8 PREV_CLOSE9 \\\n0 NaN NaN NaN NaN NaN \n1 NaN NaN NaN NaN NaN \n2 NaN NaN NaN NaN NaN \n3 NaN NaN NaN NaN NaN \n4 NaN NaN NaN NaN NaN \n5 NaN NaN NaN NaN NaN \n6 NaN NaN NaN NaN NaN \n7 NaN NaN NaN NaN NaN \n8 NaN NaN NaN NaN NaN \n9 NaN NaN NaN NaN NaN \n10 NaN NaN NaN NaN NaN \n11 0.120425 0.139061 0.140825 0.167031 0.190968 \n12 0.106535 0.120425 0.139061 0.140825 0.167031 \n13 0.108211 0.106535 0.120425 0.139061 0.140825 \n14 0.087044 0.108211 0.106535 0.120425 0.139061 \n\n PREV_CLOSE10 \n0 NaN \n1 NaN \n2 NaN \n3 NaN \n4 NaN \n5 NaN \n6 NaN \n7 NaN \n8 NaN \n9 NaN \n10 NaN \n11 0.193435 \n12 0.190968 \n13 0.167031 \n14 0.140825 "
  64. },
  65. "metadata": {}
  66. }
  67. ],
  68. "source": "#Quick view on the transformed dataframe\ndisplay(df_lookback.head(15))"
  69. },
  70. {
  71. "execution_count": 7,
  72. "cell_type": "code",
  73. "metadata": {},
  74. "outputs": [],
  75. "source": "# Cleaned up those rows with NA\ndf_withoutNA = df_lookback.copy()\ndf_withoutNA = df_withoutNA.dropna()"
  76. },
  77. {
  78. "execution_count": 8,
  79. "cell_type": "code",
  80. "metadata": {},
  81. "outputs": [
  82. {
  83. "output_type": "display_data",
  84. "data": {
  85. "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Close</th>\n <th>PREV_CLOSE1</th>\n <th>PREV_CLOSE2</th>\n <th>PREV_CLOSE3</th>\n <th>PREV_CLOSE4</th>\n <th>PREV_CLOSE5</th>\n <th>PREV_CLOSE6</th>\n <th>PREV_CLOSE7</th>\n <th>PREV_CLOSE8</th>\n <th>PREV_CLOSE9</th>\n <th>PREV_CLOSE10</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>11</th>\n <td>2009-01-19</td>\n <td>0.0914925</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n <td>0.140825</td>\n <td>0.167031</td>\n <td>0.190968</td>\n <td>0.193435</td>\n </tr>\n <tr>\n <th>12</th>\n <td>2009-01-20</td>\n <td>0.0740589</td>\n <td>0.091493</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n <td>0.140825</td>\n <td>0.167031</td>\n <td>0.190968</td>\n </tr>\n <tr>\n <th>13</th>\n <td>2009-01-21</td>\n <td>0.0568123</td>\n <td>0.074059</td>\n <td>0.091493</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n <td>0.140825</td>\n <td>0.167031</td>\n </tr>\n <tr>\n <th>14</th>\n <td>2009-01-22</td>\n <td>0.0602218</td>\n <td>0.056812</td>\n <td>0.074059</td>\n <td>0.091493</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n <td>0.140825</td>\n </tr>\n <tr>\n <th>15</th>\n <td>2009-01-23</td>\n <td>0.0565817</td>\n <td>0.060222</td>\n <td>0.056812</td>\n <td>0.074059</td>\n <td>0.091493</td>\n <td>0.087619</td>\n <td>0.087044</td>\n <td>0.108211</td>\n <td>0.106535</td>\n <td>0.120425</td>\n <td>0.139061</td>\n </tr>\n </tbody>\n</table>\n</div>",
  86. "text/plain": " Date Close PREV_CLOSE1 PREV_CLOSE2 PREV_CLOSE3 PREV_CLOSE4 \\\n11 2009-01-19 0.0914925 0.087619 0.087044 0.108211 0.106535 \n12 2009-01-20 0.0740589 0.091493 0.087619 0.087044 0.108211 \n13 2009-01-21 0.0568123 0.074059 0.091493 0.087619 0.087044 \n14 2009-01-22 0.0602218 0.056812 0.074059 0.091493 0.087619 \n15 2009-01-23 0.0565817 0.060222 0.056812 0.074059 0.091493 \n\n PREV_CLOSE5 PREV_CLOSE6 PREV_CLOSE7 PREV_CLOSE8 PREV_CLOSE9 \\\n11 0.120425 0.139061 0.140825 0.167031 0.190968 \n12 0.106535 0.120425 0.139061 0.140825 0.167031 \n13 0.108211 0.106535 0.120425 0.139061 0.140825 \n14 0.087044 0.108211 0.106535 0.120425 0.139061 \n15 0.087619 0.087044 0.108211 0.106535 0.120425 \n\n PREV_CLOSE10 \n11 0.193435 \n12 0.190968 \n13 0.167031 \n14 0.140825 \n15 0.139061 "
  87. },
  88. "metadata": {}
  89. },
  90. {
  91. "output_type": "display_data",
  92. "data": {
  93. "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Date</th>\n <th>Close</th>\n <th>PREV_CLOSE1</th>\n <th>PREV_CLOSE2</th>\n <th>PREV_CLOSE3</th>\n <th>PREV_CLOSE4</th>\n <th>PREV_CLOSE5</th>\n <th>PREV_CLOSE6</th>\n <th>PREV_CLOSE7</th>\n <th>PREV_CLOSE8</th>\n <th>PREV_CLOSE9</th>\n <th>PREV_CLOSE10</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2623</th>\n <td>2019-08-26</td>\n <td>0.657316</td>\n <td>0.680195</td>\n <td>0.674207</td>\n <td>0.684355</td>\n <td>0.682589</td>\n <td>0.685354</td>\n <td>0.659786</td>\n <td>0.648839</td>\n <td>0.639981</td>\n <td>0.639019</td>\n <td>0.663936</td>\n </tr>\n <tr>\n <th>2624</th>\n <td>2019-08-27</td>\n <td>0.65657</td>\n <td>0.657316</td>\n <td>0.680195</td>\n <td>0.674207</td>\n <td>0.684355</td>\n <td>0.682589</td>\n <td>0.685354</td>\n <td>0.659786</td>\n <td>0.648839</td>\n <td>0.639981</td>\n <td>0.639019</td>\n </tr>\n <tr>\n <th>2625</th>\n <td>2019-08-28</td>\n <td>0.654342</td>\n <td>0.656570</td>\n <td>0.657316</td>\n <td>0.680195</td>\n <td>0.674207</td>\n <td>0.684355</td>\n <td>0.682589</td>\n <td>0.685354</td>\n <td>0.659786</td>\n <td>0.648839</td>\n <td>0.639981</td>\n </tr>\n <tr>\n <th>2626</th>\n <td>2019-08-29</td>\n <td>0.658378</td>\n <td>0.654342</td>\n <td>0.656570</td>\n <td>0.657316</td>\n <td>0.680195</td>\n <td>0.674207</td>\n <td>0.684355</td>\n <td>0.682589</td>\n <td>0.685354</td>\n <td>0.659786</td>\n <td>0.648839</td>\n </tr>\n <tr>\n <th>2627</th>\n <td>2019-08-30</td>\n <td>0.659351</td>\n <td>0.658378</td>\n <td>0.654342</td>\n <td>0.656570</td>\n <td>0.657316</td>\n <td>0.680195</td>\n <td>0.674207</td>\n <td>0.684355</td>\n <td>0.682589</td>\n <td>0.685354</td>\n <td>0.659786</td>\n </tr>\n </tbody>\n</table>\n</div>",
  94. "text/plain": " Date Close PREV_CLOSE1 PREV_CLOSE2 PREV_CLOSE3 \\\n2623 2019-08-26 0.657316 0.680195 0.674207 0.684355 \n2624 2019-08-27 0.65657 0.657316 0.680195 0.674207 \n2625 2019-08-28 0.654342 0.656570 0.657316 0.680195 \n2626 2019-08-29 0.658378 0.654342 0.656570 0.657316 \n2627 2019-08-30 0.659351 0.658378 0.654342 0.656570 \n\n PREV_CLOSE4 PREV_CLOSE5 PREV_CLOSE6 PREV_CLOSE7 PREV_CLOSE8 \\\n2623 0.682589 0.685354 0.659786 0.648839 0.639981 \n2624 0.684355 0.682589 0.685354 0.659786 0.648839 \n2625 0.674207 0.684355 0.682589 0.685354 0.659786 \n2626 0.680195 0.674207 0.684355 0.682589 0.685354 \n2627 0.657316 0.680195 0.674207 0.684355 0.682589 \n\n PREV_CLOSE9 PREV_CLOSE10 \n2623 0.639019 0.663936 \n2624 0.639981 0.639019 \n2625 0.648839 0.639981 \n2626 0.659786 0.648839 \n2627 0.685354 0.659786 "
  95. },
  96. "metadata": {}
  97. }
  98. ],
  99. "source": "# Quick view on the finalized dataset\ndisplay(df_withoutNA.head())\ndisplay(df_withoutNA.tail())"
  100. },
  101. {
  102. "execution_count": 9,
  103. "cell_type": "code",
  104. "metadata": {},
  105. "outputs": [
  106. {
  107. "execution_count": 9,
  108. "metadata": {},
  109. "data": {
  110. "text/plain": "{'file_name': 'feature_engineering_output.csv',\n 'message': 'File saved to project storage.',\n 'bucket_name': 'capstone-donotdelete-pr-m3ibiajq1tejos',\n 'asset_id': '0c63aa05-04cb-47cd-870b-011a53eb559c'}"
  111. },
  112. "output_type": "execute_result"
  113. }
  114. ],
  115. "source": "#Save the processed dataset\nproject.save_data(\"feature_engineering_output.csv\", df_withoutNA.to_csv(),overwrite=True)"
  116. }
  117. ],
  118. "metadata": {
  119. "kernelspec": {
  120. "display_name": "Python 3.6 with Spark",
  121. "name": "python36",
  122. "language": "python3"
  123. },
  124. "language_info": {
  125. "mimetype": "text/x-python",
  126. "nbconvert_exporter": "python",
  127. "version": "3.6.8",
  128. "name": "python",
  129. "file_extension": ".py",
  130. "pygments_lexer": "ipython3",
  131. "codemirror_mode": {
  132. "version": 3,
  133. "name": "ipython"
  134. }
  135. }
  136. },
  137. "nbformat": 4
  138. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement