Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [],
- "source": [
- "import dask.array as da\n",
- "import numpy as np\n",
- "import dask\n",
- "import xarray as xr\n",
- "import math\n",
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [],
- "source": [
- "def climatology(ds):\n",
- " seasonal_clim = ds.groupby(\"time.season\").mean(dim=\"time\")\n",
- " return seasonal_clim\n",
- "def anomaly(ds):\n",
- " seasonal_clim = climatology(ds)\n",
- " seasonal_anom = ds.groupby(\"time.season\") - seasonal_clim\n",
- " return seasonal_anom"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [],
- "source": [
- "dask.config.set({\"array.chunk-size\": '256MB'})\n",
- "timesteps=20834\n",
- "lat=320\n",
- "lon=384\n",
- "random_data =da.random.RandomState(0).standard_normal(shape, chunks='auto' )\n",
- "lats = xr.DataArray(np.linspace(start=-90, stop=90, num=lat), dims=[\"lat\"])\n",
- "lons = xr.DataArray(np.linspace(start=-180, stop=180, num=lon), dims=[\"lon\"])\n",
- "times = xr.DataArray(pd.date_range(start=\"1980-01-01\", freq=\"1D\", periods=timesteps), dims=[\"time\"])\n",
- "ds = xr.DataArray(\n",
- " random_data,\n",
- " dims=[\"time\", \"lon\", \"lat\"],\n",
- " coords={\"time\": times, \"lon\": lons, \"lat\": lats},\n",
- " name=\"sst\",\n",
- ").to_dataset()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<xarray.Dataset>\n",
- "Dimensions: (lat: 320, lon: 384, time: 20834)\n",
- "Coordinates:\n",
- " * time (time) datetime64[ns] 1980-01-01 1980-01-02 ... 2037-01-14\n",
- " * lon (lon) float64 -180.0 -179.1 -178.1 -177.2 ... 178.1 179.1 180.0\n",
- " * lat (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0\n",
- "Data variables:\n",
- " sst (time, lon, lat) float64 dask.array<shape=(20834, 384, 320), chunksize=(317, 192, 160)>"
- ]
- },
- "execution_count": 34,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ds"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "When specifying chunk size as 370MB in dask, dask array creates chunksize 187MB."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<xarray.Dataset>\n",
- "Dimensions: (lat: 320, lon: 384, time: 20834)\n",
- "Coordinates:\n",
- " * lon (lon) float64 -180.0 -179.1 -178.1 -177.2 ... 178.1 179.1 180.0\n",
- " * lat (lat) float64 -90.0 -89.44 -88.87 -88.31 ... 88.31 88.87 89.44 90.0\n",
- " * time (time) datetime64[ns] 1980-01-01 1980-01-02 ... 2037-01-14\n",
- " season (time) <U3 'DJF' 'DJF' 'DJF' 'DJF' ... 'DJF' 'DJF' 'DJF' 'DJF'\n",
- "Data variables:\n",
- " sst (time, lon, lat) float64 dask.array<shape=(20834, 384, 320), chunksize=(60, 192, 160)>"
- ]
- },
- "execution_count": 35,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "anomaly(ds)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "pangeobench",
- "language": "python",
- "name": "pangeobench"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement