Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import h5py\n",
- "import dask.dataframe as dd\n",
- "import dask.array as da"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 115,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div><strong>Dask DataFrame Structure:</strong></div>\n",
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>rx</th>\n",
- " <th>tx</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>npartitions=32</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>2017-08-07 13:07:18.261913088</th>\n",
- " <td>float64</td>\n",
- " <td>float64</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-08 07:10:18.283332096</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-31 05:05:18.282693888</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-31 22:35:18.225156096</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>\n",
- "<div>Dask Name: set_index, 419 tasks</div>"
- ],
- "text/plain": [
- "Dask DataFrame Structure:\n",
- " rx tx\n",
- "npartitions=32 \n",
- "2017-08-07 13:07:18.261913088 float64 float64\n",
- "2017-08-08 07:10:18.283332096 ... ...\n",
- "... ... ...\n",
- "2017-08-31 05:05:18.282693888 ... ...\n",
- "2017-08-31 22:35:18.225156096 ... ...\n",
- "Dask Name: set_index, 419 tasks"
- ]
- },
- "execution_count": 115,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "h5_reader = h5py.File('/pd/data/regclim_data/raw/cml/ericsson_tn_monthly_2017_2018/cmls_2017_08.h5', \n",
- " mode='r')\n",
- "cml_ids = h5_reader['/'].keys()\n",
- "channels = h5_reader['/'][cml_ids[0]].keys()\n",
- "\n",
- "# Link to data in HDF5 file\n",
- "rx = h5_reader['/'][cml_ids[0]][channels[0]]['rx']\n",
- "tx = h5_reader['/'][cml_ids[0]][channels[0]]['tx']\n",
- "time = h5_reader['/'][cml_ids[0]][channels[0]]['time']\n",
- "\n",
- "# Concatenate into DaskDataframe\n",
- "ddf = dd.from_dask_array(\n",
- " da.stack([\n",
- " da.from_array(rx, chunks=rx.chunks),\n",
- " da.from_array(tx, chunks=tx.chunks),\n",
- " da.from_array(time, chunks=time.chunks)], \n",
- " axis=1,\n",
- " ),\n",
- " columns=['rx', 'tx', 'time']\n",
- ")\n",
- "\n",
- "# Cast to correct time representation and set time as index\n",
- "ddf.time = (ddf.time * 1e9).astype('M8[ns]')\n",
- "ddf = ddf.set_index('time', sorted=True)\n",
- "ddf"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 111,
- "metadata": {},
- "outputs": [],
- "source": [
- "ddf['txrx'] = ddf.tx - ddf.rx"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 112,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div><strong>Dask DataFrame Structure:</strong></div>\n",
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>rx</th>\n",
- " <th>tx</th>\n",
- " <th>txrx</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>npartitions=32</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>2017-08-07 13:07:18.261913088</th>\n",
- " <td>float64</td>\n",
- " <td>float64</td>\n",
- " <td>float64</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-08 07:10:18.283332096</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>...</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-31 05:05:18.282693888</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-31 22:35:18.225156096</th>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " <td>...</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>\n",
- "<div>Dask Name: assign, 547 tasks</div>"
- ],
- "text/plain": [
- "Dask DataFrame Structure:\n",
- " rx tx txrx\n",
- "npartitions=32 \n",
- "2017-08-07 13:07:18.261913088 float64 float64 float64\n",
- "2017-08-08 07:10:18.283332096 ... ... ...\n",
- "... ... ... ...\n",
- "2017-08-31 05:05:18.282693888 ... ... ...\n",
- "2017-08-31 22:35:18.225156096 ... ... ...\n",
- "Dask Name: assign, 547 tasks"
- ]
- },
- "execution_count": 112,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ddf"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 113,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>rx</th>\n",
- " <th>tx</th>\n",
- " <th>txrx</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>time</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>2017-08-07 13:07:18.261913088</th>\n",
- " <td>-39.8</td>\n",
- " <td>16.0</td>\n",
- " <td>55.8</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-07 13:08:18.294920960</th>\n",
- " <td>-40.1</td>\n",
- " <td>16.0</td>\n",
- " <td>56.1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-07 13:09:18.280340992</th>\n",
- " <td>-40.1</td>\n",
- " <td>16.0</td>\n",
- " <td>56.1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-07 13:10:18.248403968</th>\n",
- " <td>-39.8</td>\n",
- " <td>16.0</td>\n",
- " <td>55.8</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2017-08-07 13:11:18.248102912</th>\n",
- " <td>-39.8</td>\n",
- " <td>16.0</td>\n",
- " <td>55.8</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " rx tx txrx\n",
- "time \n",
- "2017-08-07 13:07:18.261913088 -39.8 16.0 55.8\n",
- "2017-08-07 13:08:18.294920960 -40.1 16.0 56.1\n",
- "2017-08-07 13:09:18.280340992 -40.1 16.0 56.1\n",
- "2017-08-07 13:10:18.248403968 -39.8 16.0 55.8\n",
- "2017-08-07 13:11:18.248102912 -39.8 16.0 55.8"
- ]
- },
- "execution_count": 113,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ddf.compute().head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "|"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.15"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment