Guest User

Untitled

a guest
Apr 20th, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.97 KB | None | 0 0
  1. from dask.distributed import Client
  2. import pandas as pd
  3. import xarray as xr
  4. import s3fs
  5. import time
  6.  
  7. if __name__ == '__main__':
  8. # use however many processors are available on local machine
  9. client = Client()
  10. print(client)
  11.  
  12. # Input: list of netcdf file names
  13. root = '/projects/water/nwm/data/forcing_short_range/'
  14. dates = pd.date_range(start='2018-03-02T00:00', end='2018-04-11T00:00', freq='H')
  15. uris = ['{}{}/nwm.t{}z.short_range.forcing.f001.conus.nc'.format(root,a.strftime('%Y%m%d'),a.strftime('%H')) for a in dates]
  16.  
  17. # Output: S3 Bucket
  18. f_zarr = 'rsignell/nwm/test04'
  19.  
  20. # Read data using xarray for lazy evaluation and parallel execution
  21. ds = xr.open_mfdataset(uris,concat_dim='time')
  22.  
  23. # Drop these problematic, unimportant variables
  24. ds = ds.drop(['ProjectionCoordinateSystem','time_bounds'])
  25.  
  26. # Write data using xarray.to_zarr
  27. fs = s3fs.S3FileSystem(anon=False)
  28. d = s3fs.S3Map(f_zarr, s3=fs)
  29. ds.to_zarr(store=d, mode='w')
Add Comment
Please, Sign In to add comment