Guest User

Untitled

a guest
Oct 22nd, 2017
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.98 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "scrolled": true
  8. },
  9. "outputs": [
  10. {
  11. "data": {
  12. "text/html": [
  13. "<div>\n",
  14. "<style>\n",
  15. " .dataframe thead tr:only-child th {\n",
  16. " text-align: right;\n",
  17. " }\n",
  18. "\n",
  19. " .dataframe thead th {\n",
  20. " text-align: left;\n",
  21. " }\n",
  22. "\n",
  23. " .dataframe tbody tr th {\n",
  24. " vertical-align: top;\n",
  25. " }\n",
  26. "</style>\n",
  27. "<table border=\"1\" class=\"dataframe\">\n",
  28. " <thead>\n",
  29. " <tr style=\"text-align: right;\">\n",
  30. " <th></th>\n",
  31. " <th>property_id</th>\n",
  32. " <th>ad_latitude</th>\n",
  33. " <th>ad_longitude</th>\n",
  34. " <th>create_date</th>\n",
  35. " <th>check_in_date</th>\n",
  36. " <th>check_out_date</th>\n",
  37. " <th>country_code</th>\n",
  38. " </tr>\n",
  39. " </thead>\n",
  40. " <tbody>\n",
  41. " <tr>\n",
  42. " <th>0</th>\n",
  43. " <td>1246</td>\n",
  44. " <td>NaN</td>\n",
  45. " <td>NaN</td>\n",
  46. " <td>2012-03-07 13:26:38</td>\n",
  47. " <td>2012-08-20</td>\n",
  48. " <td>2012-08-25</td>\n",
  49. " <td>NaN</td>\n",
  50. " </tr>\n",
  51. " <tr>\n",
  52. " <th>1</th>\n",
  53. " <td>1246</td>\n",
  54. " <td>NaN</td>\n",
  55. " <td>NaN</td>\n",
  56. " <td>2012-03-07 13:35:03</td>\n",
  57. " <td>2012-08-27</td>\n",
  58. " <td>2012-08-30</td>\n",
  59. " <td>NaN</td>\n",
  60. " </tr>\n",
  61. " <tr>\n",
  62. " <th>2</th>\n",
  63. " <td>1237</td>\n",
  64. " <td>NaN</td>\n",
  65. " <td>NaN</td>\n",
  66. " <td>2012-03-07 13:36:33</td>\n",
  67. " <td>2012-08-27</td>\n",
  68. " <td>2012-08-30</td>\n",
  69. " <td>NaN</td>\n",
  70. " </tr>\n",
  71. " <tr>\n",
  72. " <th>3</th>\n",
  73. " <td>1237</td>\n",
  74. " <td>NaN</td>\n",
  75. " <td>NaN</td>\n",
  76. " <td>2012-03-07 13:38:26</td>\n",
  77. " <td>2012-03-19</td>\n",
  78. " <td>2012-03-22</td>\n",
  79. " <td>NaN</td>\n",
  80. " </tr>\n",
  81. " <tr>\n",
  82. " <th>4</th>\n",
  83. " <td>1246</td>\n",
  84. " <td>NaN</td>\n",
  85. " <td>NaN</td>\n",
  86. " <td>2012-03-08 11:06:00</td>\n",
  87. " <td>2012-08-20</td>\n",
  88. " <td>2012-08-24</td>\n",
  89. " <td>NaN</td>\n",
  90. " </tr>\n",
  91. " </tbody>\n",
  92. "</table>\n",
  93. "</div>"
  94. ],
  95. "text/plain": [
  96. " property_id ad_latitude ad_longitude create_date check_in_date \\\n",
  97. "0 1246 NaN NaN 2012-03-07 13:26:38 2012-08-20 \n",
  98. "1 1246 NaN NaN 2012-03-07 13:35:03 2012-08-27 \n",
  99. "2 1237 NaN NaN 2012-03-07 13:36:33 2012-08-27 \n",
  100. "3 1237 NaN NaN 2012-03-07 13:38:26 2012-03-19 \n",
  101. "4 1246 NaN NaN 2012-03-08 11:06:00 2012-08-20 \n",
  102. "\n",
  103. " check_out_date country_code \n",
  104. "0 2012-08-25 NaN \n",
  105. "1 2012-08-30 NaN \n",
  106. "2 2012-08-30 NaN \n",
  107. "3 2012-03-22 NaN \n",
  108. "4 2012-08-24 NaN "
  109. ]
  110. },
  111. "execution_count": 1,
  112. "metadata": {},
  113. "output_type": "execute_result"
  114. }
  115. ],
  116. "source": [
  117. "import pandas as pd\n",
  118. "\n",
  119. "df = pd.read_csv('/home/valentina/Documents/Please/please3.csv', parse_dates=['check_in_date'])\n",
  120. "\n",
  121. "df.head()"
  122. ]
  123. },
  124. {
  125. "cell_type": "code",
  126. "execution_count": 2,
  127. "metadata": {},
  128. "outputs": [
  129. {
  130. "data": {
  131. "text/plain": [
  132. "property_id int64\n",
  133. "ad_latitude float64\n",
  134. "ad_longitude float64\n",
  135. "create_date object\n",
  136. "check_in_date datetime64[ns]\n",
  137. "check_out_date object\n",
  138. "country_code object\n",
  139. "dtype: object"
  140. ]
  141. },
  142. "execution_count": 2,
  143. "metadata": {},
  144. "output_type": "execute_result"
  145. }
  146. ],
  147. "source": [
  148. "df.dtypes"
  149. ]
  150. },
  151. {
  152. "cell_type": "code",
  153. "execution_count": 3,
  154. "metadata": {
  155. "collapsed": true
  156. },
  157. "outputs": [],
  158. "source": [
  159. "import datetime\n",
  160. "\n",
  161. "# Convert into datetime\n",
  162. "\n",
  163. "df['check_out_date'] = pd.to_datetime(df['check_out_date'])\n",
  164. "df['create_date'] = pd.to_datetime(df['create_date'])\n",
  165. "\n",
  166. "# Add column with difference\n",
  167. "\n",
  168. "df['diff'] = df['check_out_date'] - df['check_in_date']\n",
  169. "\n",
  170. "# Drop bookings for more than 10 days.\n",
  171. "\n",
  172. "dfclean = df[(df['diff'].dt.days < 10)]"
  173. ]
  174. },
  175. {
  176. "cell_type": "code",
  177. "execution_count": 4,
  178. "metadata": {
  179. "collapsed": true
  180. },
  181. "outputs": [],
  182. "source": [
  183. "# Split dataset for each month of check_in\n",
  184. "\n",
  185. "for year in dfclean.check_in_date.apply(lambda x: x.year).unique():\n",
  186. " for month in dfclean.check_in_date.apply(lambda x: x.month).unique():\n",
  187. " view = dfclean[dfclean.check_in_date.apply(lambda x: x.month == month and x.year==year)]\n",
  188. " if view.size:\n",
  189. " view.to_csv('/home/valentina/Documents/Months/{}_{:0>2}.csv'.format(year, month))"
  190. ]
  191. }
  192. ],
  193. "metadata": {
  194. "kernelspec": {
  195. "display_name": "Python 3",
  196. "language": "python",
  197. "name": "python3"
  198. },
  199. "language_info": {
  200. "codemirror_mode": {
  201. "name": "ipython",
  202. "version": 3
  203. },
  204. "file_extension": ".py",
  205. "mimetype": "text/x-python",
  206. "name": "python",
  207. "nbconvert_exporter": "python",
  208. "pygments_lexer": "ipython3",
  209. "version": "3.5.2"
  210. }
  211. },
  212. "nbformat": 4,
  213. "nbformat_minor": 2
  214. }
Add Comment
Please, Sign In to add comment