Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import cudf as gd\n",
- "import pandas as pd\n",
- "import time"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(200000, 202)\n",
- "CPU times: user 252 ms, sys: 252 ms, total: 504 ms\n",
- "Wall time: 509 ms\n"
- ]
- }
- ],
- "source": [
- "%%time\n",
- "PATH = '../input'\n",
- "cols = ['ID_code', 'target'] + ['var_%d'%i for i in range(200)]\n",
- "dtypes = ['int32', 'int32'] + ['float32' for i in range(200)]\n",
- "train_gd = gd.read_csv('%s/train.csv'%PATH,names=cols,dtype=dtypes,skiprows=1)\n",
- "print(train_gd.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(200000, 202)\n",
- "CPU times: user 5.08 s, sys: 404 ms, total: 5.48 s\n",
- "Wall time: 5.48 s\n"
- ]
- }
- ],
- "source": [
- "%%time\n",
- "train_pd = pd.read_csv('%s/train.csv'%PATH)\n",
- "print(train_pd.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>ID_code</th>\n",
- " <th>target</th>\n",
- " <th>var_0</th>\n",
- " <th>var_1</th>\n",
- " <th>var_2</th>\n",
- " <th>var_3</th>\n",
- " <th>var_4</th>\n",
- " <th>var_5</th>\n",
- " <th>var_6</th>\n",
- " <th>var_7</th>\n",
- " <th>...</th>\n",
- " <th>var_190</th>\n",
- " <th>var_191</th>\n",
- " <th>var_192</th>\n",
- " <th>var_193</th>\n",
- " <th>var_194</th>\n",
- " <th>var_195</th>\n",
- " <th>var_196</th>\n",
- " <th>var_197</th>\n",
- " <th>var_198</th>\n",
- " <th>var_199</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>75153670</td>\n",
- " <td>0</td>\n",
- " <td>8.925500</td>\n",
- " <td>-6.7863</td>\n",
- " <td>11.908100</td>\n",
- " <td>5.0930</td>\n",
- " <td>11.460700</td>\n",
- " <td>-9.2834</td>\n",
- " <td>5.1187</td>\n",
- " <td>18.626602</td>\n",
- " <td>...</td>\n",
- " <td>4.4354</td>\n",
- " <td>3.964200</td>\n",
- " <td>3.1364</td>\n",
- " <td>1.691000</td>\n",
- " <td>18.522701</td>\n",
- " <td>-2.3978</td>\n",
- " <td>7.8784</td>\n",
- " <td>8.5635</td>\n",
- " <td>12.780300</td>\n",
- " <td>-1.091400</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>75153671</td>\n",
- " <td>0</td>\n",
- " <td>11.500600</td>\n",
- " <td>-4.1473</td>\n",
- " <td>13.858801</td>\n",
- " <td>5.3890</td>\n",
- " <td>12.362201</td>\n",
- " <td>7.0433</td>\n",
- " <td>5.6208</td>\n",
- " <td>16.533800</td>\n",
- " <td>...</td>\n",
- " <td>7.6421</td>\n",
- " <td>7.721400</td>\n",
- " <td>2.5837</td>\n",
- " <td>10.951600</td>\n",
- " <td>15.430499</td>\n",
- " <td>2.0339</td>\n",
- " <td>8.1267</td>\n",
- " <td>8.7889</td>\n",
- " <td>18.355999</td>\n",
- " <td>1.951800</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>75153672</td>\n",
- " <td>0</td>\n",
- " <td>8.609301</td>\n",
- " <td>-2.7457</td>\n",
- " <td>12.080500</td>\n",
- " <td>7.8928</td>\n",
- " <td>10.582500</td>\n",
- " <td>-9.0837</td>\n",
- " <td>6.9427</td>\n",
- " <td>14.615500</td>\n",
- " <td>...</td>\n",
- " <td>2.9057</td>\n",
- " <td>9.790500</td>\n",
- " <td>1.6704</td>\n",
- " <td>1.685800</td>\n",
- " <td>21.604200</td>\n",
- " <td>3.1417</td>\n",
- " <td>-6.5213</td>\n",
- " <td>8.2675</td>\n",
- " <td>14.722200</td>\n",
- " <td>0.396500</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>75153673</td>\n",
- " <td>0</td>\n",
- " <td>11.060400</td>\n",
- " <td>-2.1518</td>\n",
- " <td>8.952200</td>\n",
- " <td>7.1957</td>\n",
- " <td>12.584599</td>\n",
- " <td>-1.8361</td>\n",
- " <td>5.8428</td>\n",
- " <td>14.925000</td>\n",
- " <td>...</td>\n",
- " <td>4.4666</td>\n",
- " <td>4.743299</td>\n",
- " <td>0.7178</td>\n",
- " <td>1.421400</td>\n",
- " <td>23.034700</td>\n",
- " <td>-1.2706</td>\n",
- " <td>-2.9275</td>\n",
- " <td>10.2922</td>\n",
- " <td>17.969700</td>\n",
- " <td>-8.999599</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>75153674</td>\n",
- " <td>0</td>\n",
- " <td>9.836900</td>\n",
- " <td>-1.4834</td>\n",
- " <td>12.874599</td>\n",
- " <td>6.6375</td>\n",
- " <td>12.277200</td>\n",
- " <td>2.4486</td>\n",
- " <td>5.9405</td>\n",
- " <td>19.251400</td>\n",
- " <td>...</td>\n",
- " <td>-1.4905</td>\n",
- " <td>9.521400</td>\n",
- " <td>-0.1508</td>\n",
- " <td>9.194201</td>\n",
- " <td>13.287600</td>\n",
- " <td>-1.5121</td>\n",
- " <td>3.9267</td>\n",
- " <td>9.5031</td>\n",
- " <td>17.997400</td>\n",
- " <td>-8.810400</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>5 rows × 202 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " ID_code target var_0 var_1 var_2 var_3 var_4 var_5 \\\n",
- "0 75153670 0 8.925500 -6.7863 11.908100 5.0930 11.460700 -9.2834 \n",
- "1 75153671 0 11.500600 -4.1473 13.858801 5.3890 12.362201 7.0433 \n",
- "2 75153672 0 8.609301 -2.7457 12.080500 7.8928 10.582500 -9.0837 \n",
- "3 75153673 0 11.060400 -2.1518 8.952200 7.1957 12.584599 -1.8361 \n",
- "4 75153674 0 9.836900 -1.4834 12.874599 6.6375 12.277200 2.4486 \n",
- "\n",
- " var_6 var_7 ... var_190 var_191 var_192 var_193 var_194 \\\n",
- "0 5.1187 18.626602 ... 4.4354 3.964200 3.1364 1.691000 18.522701 \n",
- "1 5.6208 16.533800 ... 7.6421 7.721400 2.5837 10.951600 15.430499 \n",
- "2 6.9427 14.615500 ... 2.9057 9.790500 1.6704 1.685800 21.604200 \n",
- "3 5.8428 14.925000 ... 4.4666 4.743299 0.7178 1.421400 23.034700 \n",
- "4 5.9405 19.251400 ... -1.4905 9.521400 -0.1508 9.194201 13.287600 \n",
- "\n",
- " var_195 var_196 var_197 var_198 var_199 \n",
- "0 -2.3978 7.8784 8.5635 12.780300 -1.091400 \n",
- "1 2.0339 8.1267 8.7889 18.355999 1.951800 \n",
- "2 3.1417 -6.5213 8.2675 14.722200 0.396500 \n",
- "3 -1.2706 -2.9275 10.2922 17.969700 -8.999599 \n",
- "4 -1.5121 3.9267 9.5031 17.997400 -8.810400 \n",
- "\n",
- "[5 rows x 202 columns]"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "train_gd.head().to_pandas()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement