SHARE
TWEET

Untitled

a guest Oct 16th, 2019 75 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. {
  2.  "cells": [
  3.   {
  4.    "cell_type": "code",
  5.    "execution_count": 28,
  6.    "metadata": {},
  7.    "outputs": [],
  8.    "source": [
  9.     "import dask.array as da\n",
  10.     "import numpy as np\n",
  11.     "import dask\n",
  12.     "import xarray as xr\n",
  13.     "import math\n",
  14.     "import pandas as pd"
  15.    ]
  16.   },
  17.   {
  18.    "cell_type": "code",
  19.    "execution_count": 29,
  20.    "metadata": {},
  21.    "outputs": [],
  22.    "source": [
  23.     "def climatology(ds):\n",
  24.     "    seasonal_clim = ds.groupby(\"time.season\").mean(dim=\"time\")\n",
  25.     "    return seasonal_clim\n",
  26.     "def anomaly(ds):\n",
  27.     "    seasonal_clim = climatology(ds)\n",
  28.     "    seasonal_anom = ds.groupby(\"time.season\") - seasonal_clim\n",
  29.     "    return seasonal_anom"
  30.    ]
  31.   },
  32.   {
  33.    "cell_type": "code",
  34.    "execution_count": 33,
  35.    "metadata": {},
  36.    "outputs": [],
  37.    "source": [
  38.     "dask.config.set({\"array.chunk-size\": '256MB'})\n",
  39.     "timesteps=20834\n",
  40.     "lat=320\n",
  41.     "lon=384\n",
  42.     "random_data =da.random.RandomState(0).standard_normal(shape, chunks='auto'  )\n",
  43.     "lats = xr.DataArray(np.linspace(start=-90, stop=90, num=lat), dims=[\"lat\"])\n",
  44.     "lons = xr.DataArray(np.linspace(start=-180, stop=180, num=lon), dims=[\"lon\"])\n",
  45.     "times = xr.DataArray(pd.date_range(start=\"1980-01-01\", freq=\"1D\", periods=timesteps), dims=[\"time\"])\n",
  46.     "ds = xr.DataArray(\n",
  47.     "        random_data,\n",
  48.     "        dims=[\"time\", \"lon\", \"lat\"],\n",
  49.     "       coords={\"time\": times, \"lon\": lons, \"lat\": lats},\n",
  50.     "        name=\"sst\",\n",
  51.     ").to_dataset()\n"
  52.    ]
  53.   },
  54.   {
  55.    "cell_type": "code",
  56.    "execution_count": 34,
  57.    "metadata": {},
  58.    "outputs": [
  59.     {
  60.      "data": {
  61.       "text/plain": [
  62.        "<xarray.Dataset>\n",
  63.        "Dimensions:  (lat: 320, lon: 384, time: 20834)\n",
  64.        "Coordinates:\n",
  65.        "  * time     (time) datetime64[ns] 1980-01-01 1980-01-02 ... 2037-01-14\n",
  66.        "  * lon      (lon) float64 -180.0 -179.1 -178.1 -177.2 ... 178.1 179.1 180.0\n",
  67.        "  * lat      (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0\n",
  68.        "Data variables:\n",
  69.        "    sst      (time, lon, lat) float64 dask.array<shape=(20834, 384, 320), chunksize=(317, 192, 160)>"
  70.       ]
  71.      },
  72.      "execution_count": 34,
  73.      "metadata": {},
  74.      "output_type": "execute_result"
  75.     }
  76.    ],
  77.    "source": [
  78.     "ds"
  79.    ]
  80.   },
  81.   {
  82.    "cell_type": "markdown",
  83.    "metadata": {},
  84.    "source": [
  85.     "When specifying chunk size as 370MB in dask, dask array creates chunksize 187MB."
  86.    ]
  87.   },
  88.   {
  89.    "cell_type": "code",
  90.    "execution_count": 35,
  91.    "metadata": {},
  92.    "outputs": [
  93.     {
  94.      "data": {
  95.       "text/plain": [
  96.        "<xarray.Dataset>\n",
  97.        "Dimensions:  (lat: 320, lon: 384, time: 20834)\n",
  98.        "Coordinates:\n",
  99.        "  * lon      (lon) float64 -180.0 -179.1 -178.1 -177.2 ... 178.1 179.1 180.0\n",
  100.        "  * lat      (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0\n",
  101.        "  * time     (time) datetime64[ns] 1980-01-01 1980-01-02 ... 2037-01-14\n",
  102.        "    season   (time) <U3 'DJF' 'DJF' 'DJF' 'DJF' ... 'DJF' 'DJF' 'DJF' 'DJF'\n",
  103.        "Data variables:\n",
  104.        "    sst      (time, lon, lat) float64 dask.array<shape=(20834, 384, 320), chunksize=(60, 192, 160)>"
  105.       ]
  106.      },
  107.      "execution_count": 35,
  108.      "metadata": {},
  109.      "output_type": "execute_result"
  110.     }
  111.    ],
  112.    "source": [
  113.     "anomaly(ds)"
  114.    ]
  115.   }
  116.  ],
  117.  "metadata": {
  118.   "kernelspec": {
  119.    "display_name": "pangeobench",
  120.    "language": "python",
  121.    "name": "pangeobench"
  122.   },
  123.   "language_info": {
  124.    "codemirror_mode": {
  125.     "name": "ipython",
  126.     "version": 3
  127.    },
  128.    "file_extension": ".py",
  129.    "mimetype": "text/x-python",
  130.    "name": "python",
  131.    "nbconvert_exporter": "python",
  132.    "pygments_lexer": "ipython3",
  133.    "version": "3.6.8"
  134.   }
  135.  },
  136.  "nbformat": 4,
  137.  "nbformat_minor": 2
  138. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top