Skip to content

Instantly share code, notes, and snippets.

@ellesmith88
Last active June 15, 2021 13:17
Show Gist options
  • Save ellesmith88/532e6395afe1b53567dd564b15696d0e to your computer and use it in GitHub Desktop.
Save ellesmith88/532e6395afe1b53567dd564b15696d0e to your computer and use it in GitHub Desktop.
Decadal fixes
import xarray as xr
import numpy as np
import re
from datetime import datetime
from roocs_utils.xarray_utils import xarray_utils as xu
def edit_var_attrs(ds, **operands):
"""
:param ds: Xarray DataSet
:param operands: sequence of arguments
:return: Xarray Dataset
Change the attributes of a variable.
"""
var_id = operands.get("var_id")
attrs = operands.get("attrs")
for k, v in operands.get("attrs").items():
ds[var_id].attrs[k] = v
return ds
def edit_global_attrs(ds, **operands):
"""
:param ds: Xarray DataSet
:param operands: sequence of arguments
:return: Xarray DataArray
Change the gloabl attributes.
"""
attrs = operands.get("attrs")
for k, v in operands.get("attrs").items():
ds.attrs[k] = v
return ds
def add_global_attrs_if_needed(ds, **operands):
"""
:param ds: Xarray DataSet
:param operands: sequence of arguments
:return: Xarray Dataset
Add a global attribute if it doesn't already exist.
"""
attrs = operands.get("attrs")
for k, v in operands.get("attrs").items():
# check if the key already exists before setting it
if not ds.attrs.get(k, None):
ds.attrs[k] = v
return ds
def add_coord(ds, **operands):
"""
:param ds: Xarray DataSet
:param operands: sequence of arguments
:return: Xarray DataArray
Add a coordinate.
"""
var_id = operands.get("var_id")
coords = operands.get("coords")
value = operands.get("value")
dtype = operands.get("dtype")
ds = ds.assign_coords({f"{var_id}": (coords, np.array(value, dtype=dtype))})
for k, v in operands.get("attrs").items():
ds[var_id].attrs[k] = v
for k, v in operands.get("encoding").items():
ds[var_id].encoding[k] = v
# update coordinates of main variable of dataset
main_var = xu.get_main_variable(ds)
main_var_coords = ds[main_var].encoding.get('coordinates', '')
main_var_coords += f" {var_id}"
ds[main_var].encoding['coordinates'] = main_var_coords
return ds
def add_scalar_coord(ds, **operands):
"""
:param ds: Xarray DataSet
:param operands: sequence of arguments
:return: Xarray Dataset
Add a scalar coordinate.
"""
var_id = operands.get("var_id")
value = operands.get("value")
dtype = operands.get("dtype")
ds = ds.assign_coords({f"{var_id}": np.array(value, dtype=dtype)})
for k, v in operands.get("attrs").items():
ds[var_id].attrs[k] = v
if operands.get("encoding"):
for k, v in operands.get("encoding").items():
ds[var_id].encoding[k] = v
# update coordinates of main variable of dataset
main_var = xu.get_main_variable(ds)
main_var_coords = ds[main_var].encoding.get('coordinates', '')
main_var_coords += f" {var_id}"
ds[main_var].encoding['coordinates'] = main_var_coords
return ds
def add_data_var(ds, **operands):
"""
:param ds: Xarray DataSet
:param operands: sequence of arguments
:return: Xarray Dataset
Add a data variable.
"""
var_id = operands.get("var_id")
value = operands.get("value")
dtype = operands.get("dtype")
main_var = xu.get_main_variable(ds)
ds = ds.assign({f"{var_id}": np.array(value, dtype=dtype)})
for k, v in operands.get("attrs").items():
ds[var_id].attrs[k] = v
return ds
def remove_fill_values(ds):
"""
:param ds: Xarray Dataset
:return: Xarray Dataset
Remove _FillValue attribute that is added by xarray.
"""
main_var = xu.get_main_variable(ds)
for coord_id in ds[main_var].dims:
if ds.coords[coord_id].dims == (coord_id,):
ds[coord_id].encoding["_FillValue"] = None
return ds
def get_start_date(ds, default='1986-11-01T00:00:00'):
# get the start date
start_date = ds.attrs.get('startdate', None)
if not start_date:
start_date = default
else:
# attempt to get from startdate attribute - don't know if it will always be in sYYYYMM format?
regex = re.compile(r"^s(\d{4})(\d{2})$")
match = regex.match(start_date)
default = datetime.fromisoformat(default)
if match:
items = match.groups()
try:
start_date = datetime(int(items[0]), int(items[1]), default.day, default.hour, default.minute, default.second).isoformat()
except ValueError:
start_date = default.isoformat()
else:
start_date = default.isoformat()
return start_date
def get_lead_times(ds):
times = ds.time.values.astype('datetime64[ns]')
reftime = ds.reftime.values
lead_times = []
# calculate leadtime from reftime and valid times
for time in times:
td = time - reftime
days = td.astype('timedelta64[D]')
days.astype(int)
lead_times.append(days)
# td = times[0] - reftime
# days = td.astype('timedelta64[D]')
# days.astype(int)
# lead_times.append(days)
# # lead times in example data calculated by adding 30 days - this differs slightly from calculating them from the data
# # as in the commented out section above
# for count, time in enumerate(times[1:]):
# lead_times.append(lead_times[count] + 30)
return lead_times
def main():
fixed_by_xarray = xr.open_dataset("tas_Amon_EC-Earth3_dcppA-hindcast_s1986-r1i1p1f1_gr_198611-198710.nc", use_cftime=True, decode_timedelta=False)
fixed_provided = xr.open_dataset("tas_Amon_EC-Earth3_dcppA-hindcast_s198611-r1i1p1f1_gr_198611-198710.nc", use_cftime=True, decode_timedelta=False)
# check for startdate and sub_experiment_id
# commented out as these need to be added/changed anyway
# check_attrs_fix = {'attrs': {'startdate': 's198611', 'sub_experiment_id':'s198611'}}
# fixed_by_xarray = add_global_attrs_if_needed(fixed_by_xarray, **check_attrs_fix)
# change time long_name
time_long_name_fix = {"var_id": "time", "attrs": {"long_name": "valid_time"}}
fixed_by_xarray = edit_var_attrs(fixed_by_xarray, **time_long_name_fix)
# add/change gloabl attributes
global_attrs_fix = {"attrs":
{"forcing_description": "Free text describing the forcings",
"physics_description": "Free text describing the physics method",
"initialization_description":"Free text describing the initialization method",
"startdate": "s198611",
"sub_experiment_id":"s198611"}}
fixed_by_xarray = edit_global_attrs(fixed_by_xarray, **global_attrs_fix)
# get the start date to use for reftime
start_date = get_start_date(fixed_by_xarray)
# add reftime variable
reftime_fix = {'var_id': 'reftime',
'value': start_date,
'dtype': 'datetime64[ns]',
'attrs': {'long_name': 'Start date of the forecast', 'standard_name': 'forecast_reference_time'},
'encoding': {'dtype': 'int32', 'units':'days since 1850-01-01', 'calendar':'gregorian'}}
fixed_by_xarray = add_scalar_coord(fixed_by_xarray, **reftime_fix)
# calculate the lead times
lead_times = get_lead_times(fixed_by_xarray)
# add leadtime variable
leadtime_fix = {'var_id': 'leadtime',
'value': lead_times,
'coords': ['time'],
'dtype': 'timedelta64[D]',
'attrs': {'long_name': 'Time elapsed since the start of the forecast', 'standard_name': 'forecast_period'},
'encoding': {'dtype': 'double'}}
fixed_by_xarray = add_coord(fixed_by_xarray, **leadtime_fix)
# add realization data variable - this was in the fixed file but not the original file - so I added it in as well
# unsure if needed
data_var_fix = {'var_id': 'realization',
'value': '1',
'dtype': 'int32',
'attrs': {'long_name': 'realization', 'comment': 'For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes'}}
fixed_by_xarray = add_data_var(fixed_by_xarray, **data_var_fix)
fixed_by_xarray = remove_fill_values(fixed_by_xarray)
# print(fixed_by_xarray)
# print(fixed_provided)
fixed_provided.to_netcdf("fixed_provided.nc")
fixed_by_xarray.to_netcdf("after_fixes.nc")
if __name__ == "__main__":
main()
@ellesmith88
Copy link
Author

Dictionary items for fixes should be provided, even if there are no values e.g.

fix = 
{'var_id': 'leadtime', 
'value': lead_times,       
'coords': ('time'),       
'dtype': 'timedelta64[D]',       
'attrs': {'long_name': 'Time elapsed since the start of the forecast', 'standard_name': 'forecast_period'},       
'encoding': {}}

note that encoding is present and is an empty dictionary.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment