Guest User

Untitled

a guest
Oct 22nd, 2018
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.88 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import h5py\n",
  10. "import dask.dataframe as dd\n",
  11. "import dask.array as da"
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": 115,
  17. "metadata": {},
  18. "outputs": [
  19. {
  20. "data": {
  21. "text/html": [
  22. "<div><strong>Dask DataFrame Structure:</strong></div>\n",
  23. "<div>\n",
  24. "<style scoped>\n",
  25. " .dataframe tbody tr th:only-of-type {\n",
  26. " vertical-align: middle;\n",
  27. " }\n",
  28. "\n",
  29. " .dataframe tbody tr th {\n",
  30. " vertical-align: top;\n",
  31. " }\n",
  32. "\n",
  33. " .dataframe thead th {\n",
  34. " text-align: right;\n",
  35. " }\n",
  36. "</style>\n",
  37. "<table border=\"1\" class=\"dataframe\">\n",
  38. " <thead>\n",
  39. " <tr style=\"text-align: right;\">\n",
  40. " <th></th>\n",
  41. " <th>rx</th>\n",
  42. " <th>tx</th>\n",
  43. " </tr>\n",
  44. " <tr>\n",
  45. " <th>npartitions=32</th>\n",
  46. " <th></th>\n",
  47. " <th></th>\n",
  48. " </tr>\n",
  49. " </thead>\n",
  50. " <tbody>\n",
  51. " <tr>\n",
  52. " <th>2017-08-07 13:07:18.261913088</th>\n",
  53. " <td>float64</td>\n",
  54. " <td>float64</td>\n",
  55. " </tr>\n",
  56. " <tr>\n",
  57. " <th>2017-08-08 07:10:18.283332096</th>\n",
  58. " <td>...</td>\n",
  59. " <td>...</td>\n",
  60. " </tr>\n",
  61. " <tr>\n",
  62. " <th>...</th>\n",
  63. " <td>...</td>\n",
  64. " <td>...</td>\n",
  65. " </tr>\n",
  66. " <tr>\n",
  67. " <th>2017-08-31 05:05:18.282693888</th>\n",
  68. " <td>...</td>\n",
  69. " <td>...</td>\n",
  70. " </tr>\n",
  71. " <tr>\n",
  72. " <th>2017-08-31 22:35:18.225156096</th>\n",
  73. " <td>...</td>\n",
  74. " <td>...</td>\n",
  75. " </tr>\n",
  76. " </tbody>\n",
  77. "</table>\n",
  78. "</div>\n",
  79. "<div>Dask Name: set_index, 419 tasks</div>"
  80. ],
  81. "text/plain": [
  82. "Dask DataFrame Structure:\n",
  83. " rx tx\n",
  84. "npartitions=32 \n",
  85. "2017-08-07 13:07:18.261913088 float64 float64\n",
  86. "2017-08-08 07:10:18.283332096 ... ...\n",
  87. "... ... ...\n",
  88. "2017-08-31 05:05:18.282693888 ... ...\n",
  89. "2017-08-31 22:35:18.225156096 ... ...\n",
  90. "Dask Name: set_index, 419 tasks"
  91. ]
  92. },
  93. "execution_count": 115,
  94. "metadata": {},
  95. "output_type": "execute_result"
  96. }
  97. ],
  98. "source": [
  99. "h5_reader = h5py.File('/pd/data/regclim_data/raw/cml/ericsson_tn_monthly_2017_2018/cmls_2017_08.h5', \n",
  100. " mode='r')\n",
  101. "cml_ids = h5_reader['/'].keys()\n",
  102. "channels = h5_reader['/'][cml_ids[0]].keys()\n",
  103. "\n",
  104. "# Link to data in HDF5 file\n",
  105. "rx = h5_reader['/'][cml_ids[0]][channels[0]]['rx']\n",
  106. "tx = h5_reader['/'][cml_ids[0]][channels[0]]['tx']\n",
  107. "time = h5_reader['/'][cml_ids[0]][channels[0]]['time']\n",
  108. "\n",
  109. "# Concatenate into DaskDataframe\n",
  110. "ddf = dd.from_dask_array(\n",
  111. " da.stack([\n",
  112. " da.from_array(rx, chunks=rx.chunks),\n",
  113. " da.from_array(tx, chunks=tx.chunks),\n",
  114. " da.from_array(time, chunks=time.chunks)], \n",
  115. " axis=1,\n",
  116. " ),\n",
  117. " columns=['rx', 'tx', 'time']\n",
  118. ")\n",
  119. "\n",
  120. "# Cast to correct time representation and set time as index\n",
  121. "ddf.time = (ddf.time * 1e9).astype('M8[ns]')\n",
  122. "ddf = ddf.set_index('time', sorted=True)\n",
  123. "ddf"
  124. ]
  125. },
  126. {
  127. "cell_type": "code",
  128. "execution_count": 111,
  129. "metadata": {},
  130. "outputs": [],
  131. "source": [
  132. "ddf['txrx'] = ddf.tx - ddf.rx"
  133. ]
  134. },
  135. {
  136. "cell_type": "code",
  137. "execution_count": 112,
  138. "metadata": {},
  139. "outputs": [
  140. {
  141. "data": {
  142. "text/html": [
  143. "<div><strong>Dask DataFrame Structure:</strong></div>\n",
  144. "<div>\n",
  145. "<style scoped>\n",
  146. " .dataframe tbody tr th:only-of-type {\n",
  147. " vertical-align: middle;\n",
  148. " }\n",
  149. "\n",
  150. " .dataframe tbody tr th {\n",
  151. " vertical-align: top;\n",
  152. " }\n",
  153. "\n",
  154. " .dataframe thead th {\n",
  155. " text-align: right;\n",
  156. " }\n",
  157. "</style>\n",
  158. "<table border=\"1\" class=\"dataframe\">\n",
  159. " <thead>\n",
  160. " <tr style=\"text-align: right;\">\n",
  161. " <th></th>\n",
  162. " <th>rx</th>\n",
  163. " <th>tx</th>\n",
  164. " <th>txrx</th>\n",
  165. " </tr>\n",
  166. " <tr>\n",
  167. " <th>npartitions=32</th>\n",
  168. " <th></th>\n",
  169. " <th></th>\n",
  170. " <th></th>\n",
  171. " </tr>\n",
  172. " </thead>\n",
  173. " <tbody>\n",
  174. " <tr>\n",
  175. " <th>2017-08-07 13:07:18.261913088</th>\n",
  176. " <td>float64</td>\n",
  177. " <td>float64</td>\n",
  178. " <td>float64</td>\n",
  179. " </tr>\n",
  180. " <tr>\n",
  181. " <th>2017-08-08 07:10:18.283332096</th>\n",
  182. " <td>...</td>\n",
  183. " <td>...</td>\n",
  184. " <td>...</td>\n",
  185. " </tr>\n",
  186. " <tr>\n",
  187. " <th>...</th>\n",
  188. " <td>...</td>\n",
  189. " <td>...</td>\n",
  190. " <td>...</td>\n",
  191. " </tr>\n",
  192. " <tr>\n",
  193. " <th>2017-08-31 05:05:18.282693888</th>\n",
  194. " <td>...</td>\n",
  195. " <td>...</td>\n",
  196. " <td>...</td>\n",
  197. " </tr>\n",
  198. " <tr>\n",
  199. " <th>2017-08-31 22:35:18.225156096</th>\n",
  200. " <td>...</td>\n",
  201. " <td>...</td>\n",
  202. " <td>...</td>\n",
  203. " </tr>\n",
  204. " </tbody>\n",
  205. "</table>\n",
  206. "</div>\n",
  207. "<div>Dask Name: assign, 547 tasks</div>"
  208. ],
  209. "text/plain": [
  210. "Dask DataFrame Structure:\n",
  211. " rx tx txrx\n",
  212. "npartitions=32 \n",
  213. "2017-08-07 13:07:18.261913088 float64 float64 float64\n",
  214. "2017-08-08 07:10:18.283332096 ... ... ...\n",
  215. "... ... ... ...\n",
  216. "2017-08-31 05:05:18.282693888 ... ... ...\n",
  217. "2017-08-31 22:35:18.225156096 ... ... ...\n",
  218. "Dask Name: assign, 547 tasks"
  219. ]
  220. },
  221. "execution_count": 112,
  222. "metadata": {},
  223. "output_type": "execute_result"
  224. }
  225. ],
  226. "source": [
  227. "ddf"
  228. ]
  229. },
  230. {
  231. "cell_type": "code",
  232. "execution_count": 113,
  233. "metadata": {},
  234. "outputs": [
  235. {
  236. "data": {
  237. "text/html": [
  238. "<div>\n",
  239. "<style scoped>\n",
  240. " .dataframe tbody tr th:only-of-type {\n",
  241. " vertical-align: middle;\n",
  242. " }\n",
  243. "\n",
  244. " .dataframe tbody tr th {\n",
  245. " vertical-align: top;\n",
  246. " }\n",
  247. "\n",
  248. " .dataframe thead th {\n",
  249. " text-align: right;\n",
  250. " }\n",
  251. "</style>\n",
  252. "<table border=\"1\" class=\"dataframe\">\n",
  253. " <thead>\n",
  254. " <tr style=\"text-align: right;\">\n",
  255. " <th></th>\n",
  256. " <th>rx</th>\n",
  257. " <th>tx</th>\n",
  258. " <th>txrx</th>\n",
  259. " </tr>\n",
  260. " <tr>\n",
  261. " <th>time</th>\n",
  262. " <th></th>\n",
  263. " <th></th>\n",
  264. " <th></th>\n",
  265. " </tr>\n",
  266. " </thead>\n",
  267. " <tbody>\n",
  268. " <tr>\n",
  269. " <th>2017-08-07 13:07:18.261913088</th>\n",
  270. " <td>-39.8</td>\n",
  271. " <td>16.0</td>\n",
  272. " <td>55.8</td>\n",
  273. " </tr>\n",
  274. " <tr>\n",
  275. " <th>2017-08-07 13:08:18.294920960</th>\n",
  276. " <td>-40.1</td>\n",
  277. " <td>16.0</td>\n",
  278. " <td>56.1</td>\n",
  279. " </tr>\n",
  280. " <tr>\n",
  281. " <th>2017-08-07 13:09:18.280340992</th>\n",
  282. " <td>-40.1</td>\n",
  283. " <td>16.0</td>\n",
  284. " <td>56.1</td>\n",
  285. " </tr>\n",
  286. " <tr>\n",
  287. " <th>2017-08-07 13:10:18.248403968</th>\n",
  288. " <td>-39.8</td>\n",
  289. " <td>16.0</td>\n",
  290. " <td>55.8</td>\n",
  291. " </tr>\n",
  292. " <tr>\n",
  293. " <th>2017-08-07 13:11:18.248102912</th>\n",
  294. " <td>-39.8</td>\n",
  295. " <td>16.0</td>\n",
  296. " <td>55.8</td>\n",
  297. " </tr>\n",
  298. " </tbody>\n",
  299. "</table>\n",
  300. "</div>"
  301. ],
  302. "text/plain": [
  303. " rx tx txrx\n",
  304. "time \n",
  305. "2017-08-07 13:07:18.261913088 -39.8 16.0 55.8\n",
  306. "2017-08-07 13:08:18.294920960 -40.1 16.0 56.1\n",
  307. "2017-08-07 13:09:18.280340992 -40.1 16.0 56.1\n",
  308. "2017-08-07 13:10:18.248403968 -39.8 16.0 55.8\n",
  309. "2017-08-07 13:11:18.248102912 -39.8 16.0 55.8"
  310. ]
  311. },
  312. "execution_count": 113,
  313. "metadata": {},
  314. "output_type": "execute_result"
  315. }
  316. ],
  317. "source": [
  318. "ddf.compute().head()"
  319. ]
  320. },
  321. {
  322. "cell_type": "code",
  323. "execution_count": null,
  324. "metadata": {},
  325. "outputs": [],
  326. "source": [
  327. "|"
  328. ]
  329. }
  330. ],
  331. "metadata": {
  332. "kernelspec": {
  333. "display_name": "Python 2",
  334. "language": "python",
  335. "name": "python2"
  336. },
  337. "language_info": {
  338. "codemirror_mode": {
  339. "name": "ipython",
  340. "version": 2
  341. },
  342. "file_extension": ".py",
  343. "mimetype": "text/x-python",
  344. "name": "python",
  345. "nbconvert_exporter": "python",
  346. "pygments_lexer": "ipython2",
  347. "version": "2.7.15"
  348. }
  349. },
  350. "nbformat": 4,
  351. "nbformat_minor": 2
  352. }
Add Comment
Please, Sign In to add comment