Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style>\n",
- " .dataframe thead tr:only-child th {\n",
- " text-align: right;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: left;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>property_id</th>\n",
- " <th>ad_latitude</th>\n",
- " <th>ad_longitude</th>\n",
- " <th>create_date</th>\n",
- " <th>check_in_date</th>\n",
- " <th>check_out_date</th>\n",
- " <th>country_code</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>1246</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2012-03-07 13:26:38</td>\n",
- " <td>2012-08-20</td>\n",
- " <td>2012-08-25</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>1246</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2012-03-07 13:35:03</td>\n",
- " <td>2012-08-27</td>\n",
- " <td>2012-08-30</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>1237</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2012-03-07 13:36:33</td>\n",
- " <td>2012-08-27</td>\n",
- " <td>2012-08-30</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>1237</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2012-03-07 13:38:26</td>\n",
- " <td>2012-03-19</td>\n",
- " <td>2012-03-22</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>1246</td>\n",
- " <td>NaN</td>\n",
- " <td>NaN</td>\n",
- " <td>2012-03-08 11:06:00</td>\n",
- " <td>2012-08-20</td>\n",
- " <td>2012-08-24</td>\n",
- " <td>NaN</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " property_id ad_latitude ad_longitude create_date check_in_date \\\n",
- "0 1246 NaN NaN 2012-03-07 13:26:38 2012-08-20 \n",
- "1 1246 NaN NaN 2012-03-07 13:35:03 2012-08-27 \n",
- "2 1237 NaN NaN 2012-03-07 13:36:33 2012-08-27 \n",
- "3 1237 NaN NaN 2012-03-07 13:38:26 2012-03-19 \n",
- "4 1246 NaN NaN 2012-03-08 11:06:00 2012-08-20 \n",
- "\n",
- " check_out_date country_code \n",
- "0 2012-08-25 NaN \n",
- "1 2012-08-30 NaN \n",
- "2 2012-08-30 NaN \n",
- "3 2012-03-22 NaN \n",
- "4 2012-08-24 NaN "
- ]
- },
- "execution_count": 1,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import pandas as pd\n",
- "\n",
- "df = pd.read_csv('/home/valentina/Documents/Please/please3.csv', parse_dates=['check_in_date'])\n",
- "\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "property_id int64\n",
- "ad_latitude float64\n",
- "ad_longitude float64\n",
- "create_date object\n",
- "check_in_date datetime64[ns]\n",
- "check_out_date object\n",
- "country_code object\n",
- "dtype: object"
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.dtypes"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import datetime\n",
- "\n",
- "# Convert into datetime\n",
- "\n",
- "df['check_out_date'] = pd.to_datetime(df['check_out_date'])\n",
- "df['create_date'] = pd.to_datetime(df['create_date'])\n",
- "\n",
- "# Add column with difference\n",
- "\n",
- "df['diff'] = df['check_out_date'] - df['check_in_date']\n",
- "\n",
- "# Drop bookings for more than 10 days.\n",
- "\n",
- "dfclean = df[(df['diff'].dt.days < 10)]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# Split dataset for each month of check_in\n",
- "\n",
- "for year in dfclean.check_in_date.apply(lambda x: x.year).unique():\n",
- " for month in dfclean.check_in_date.apply(lambda x: x.month).unique():\n",
- " view = dfclean[dfclean.check_in_date.apply(lambda x: x.month == month and x.year==year)]\n",
- " if view.size:\n",
- " view.to_csv('/home/valentina/Documents/Months/{}_{:0>2}.csv'.format(year, month))"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment