-
-
Save ellesmith88/532e6395afe1b53567dd564b15696d0e to your computer and use it in GitHub Desktop.
Decadal fixes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xarray as xr | |
import numpy as np | |
import re | |
from datetime import datetime | |
from roocs_utils.xarray_utils import xarray_utils as xu | |
def edit_var_attrs(ds, **operands): | |
""" | |
:param ds: Xarray DataSet | |
:param operands: sequence of arguments | |
:return: Xarray Dataset | |
Change the attributes of a variable. | |
""" | |
var_id = operands.get("var_id") | |
attrs = operands.get("attrs") | |
for k, v in operands.get("attrs").items(): | |
ds[var_id].attrs[k] = v | |
return ds | |
def edit_global_attrs(ds, **operands): | |
""" | |
:param ds: Xarray DataSet | |
:param operands: sequence of arguments | |
:return: Xarray DataArray | |
Change the gloabl attributes. | |
""" | |
attrs = operands.get("attrs") | |
for k, v in operands.get("attrs").items(): | |
ds.attrs[k] = v | |
return ds | |
def add_global_attrs_if_needed(ds, **operands): | |
""" | |
:param ds: Xarray DataSet | |
:param operands: sequence of arguments | |
:return: Xarray Dataset | |
Add a global attribute if it doesn't already exist. | |
""" | |
attrs = operands.get("attrs") | |
for k, v in operands.get("attrs").items(): | |
# check if the key already exists before setting it | |
if not ds.attrs.get(k, None): | |
ds.attrs[k] = v | |
return ds | |
def add_coord(ds, **operands): | |
""" | |
:param ds: Xarray DataSet | |
:param operands: sequence of arguments | |
:return: Xarray DataArray | |
Add a coordinate. | |
""" | |
var_id = operands.get("var_id") | |
coords = operands.get("coords") | |
value = operands.get("value") | |
dtype = operands.get("dtype") | |
ds = ds.assign_coords({f"{var_id}": (coords, np.array(value, dtype=dtype))}) | |
for k, v in operands.get("attrs").items(): | |
ds[var_id].attrs[k] = v | |
for k, v in operands.get("encoding").items(): | |
ds[var_id].encoding[k] = v | |
# update coordinates of main variable of dataset | |
main_var = xu.get_main_variable(ds) | |
main_var_coords = ds[main_var].encoding.get('coordinates', '') | |
main_var_coords += f" {var_id}" | |
ds[main_var].encoding['coordinates'] = main_var_coords | |
return ds | |
def add_scalar_coord(ds, **operands): | |
""" | |
:param ds: Xarray DataSet | |
:param operands: sequence of arguments | |
:return: Xarray Dataset | |
Add a scalar coordinate. | |
""" | |
var_id = operands.get("var_id") | |
value = operands.get("value") | |
dtype = operands.get("dtype") | |
ds = ds.assign_coords({f"{var_id}": np.array(value, dtype=dtype)}) | |
for k, v in operands.get("attrs").items(): | |
ds[var_id].attrs[k] = v | |
if operands.get("encoding"): | |
for k, v in operands.get("encoding").items(): | |
ds[var_id].encoding[k] = v | |
# update coordinates of main variable of dataset | |
main_var = xu.get_main_variable(ds) | |
main_var_coords = ds[main_var].encoding.get('coordinates', '') | |
main_var_coords += f" {var_id}" | |
ds[main_var].encoding['coordinates'] = main_var_coords | |
return ds | |
def add_data_var(ds, **operands): | |
""" | |
:param ds: Xarray DataSet | |
:param operands: sequence of arguments | |
:return: Xarray Dataset | |
Add a data variable. | |
""" | |
var_id = operands.get("var_id") | |
value = operands.get("value") | |
dtype = operands.get("dtype") | |
main_var = xu.get_main_variable(ds) | |
ds = ds.assign({f"{var_id}": np.array(value, dtype=dtype)}) | |
for k, v in operands.get("attrs").items(): | |
ds[var_id].attrs[k] = v | |
return ds | |
def remove_fill_values(ds): | |
""" | |
:param ds: Xarray Dataset | |
:return: Xarray Dataset | |
Remove _FillValue attribute that is added by xarray. | |
""" | |
main_var = xu.get_main_variable(ds) | |
for coord_id in ds[main_var].dims: | |
if ds.coords[coord_id].dims == (coord_id,): | |
ds[coord_id].encoding["_FillValue"] = None | |
return ds | |
def get_start_date(ds, default='1986-11-01T00:00:00'): | |
# get the start date | |
start_date = ds.attrs.get('startdate', None) | |
if not start_date: | |
start_date = default | |
else: | |
# attempt to get from startdate attribute - don't know if it will always be in sYYYYMM format? | |
regex = re.compile(r"^s(\d{4})(\d{2})$") | |
match = regex.match(start_date) | |
default = datetime.fromisoformat(default) | |
if match: | |
items = match.groups() | |
try: | |
start_date = datetime(int(items[0]), int(items[1]), default.day, default.hour, default.minute, default.second).isoformat() | |
except ValueError: | |
start_date = default.isoformat() | |
else: | |
start_date = default.isoformat() | |
return start_date | |
def get_lead_times(ds): | |
times = ds.time.values.astype('datetime64[ns]') | |
reftime = ds.reftime.values | |
lead_times = [] | |
# calculate leadtime from reftime and valid times | |
for time in times: | |
td = time - reftime | |
days = td.astype('timedelta64[D]') | |
days.astype(int) | |
lead_times.append(days) | |
# td = times[0] - reftime | |
# days = td.astype('timedelta64[D]') | |
# days.astype(int) | |
# lead_times.append(days) | |
# # lead times in example data calculated by adding 30 days - this differs slightly from calculating them from the data | |
# # as in the commented out section above | |
# for count, time in enumerate(times[1:]): | |
# lead_times.append(lead_times[count] + 30) | |
return lead_times | |
def main(): | |
fixed_by_xarray = xr.open_dataset("tas_Amon_EC-Earth3_dcppA-hindcast_s1986-r1i1p1f1_gr_198611-198710.nc", use_cftime=True, decode_timedelta=False) | |
fixed_provided = xr.open_dataset("tas_Amon_EC-Earth3_dcppA-hindcast_s198611-r1i1p1f1_gr_198611-198710.nc", use_cftime=True, decode_timedelta=False) | |
# check for startdate and sub_experiment_id | |
# commented out as these need to be added/changed anyway | |
# check_attrs_fix = {'attrs': {'startdate': 's198611', 'sub_experiment_id':'s198611'}} | |
# fixed_by_xarray = add_global_attrs_if_needed(fixed_by_xarray, **check_attrs_fix) | |
# change time long_name | |
time_long_name_fix = {"var_id": "time", "attrs": {"long_name": "valid_time"}} | |
fixed_by_xarray = edit_var_attrs(fixed_by_xarray, **time_long_name_fix) | |
# add/change gloabl attributes | |
global_attrs_fix = {"attrs": | |
{"forcing_description": "Free text describing the forcings", | |
"physics_description": "Free text describing the physics method", | |
"initialization_description":"Free text describing the initialization method", | |
"startdate": "s198611", | |
"sub_experiment_id":"s198611"}} | |
fixed_by_xarray = edit_global_attrs(fixed_by_xarray, **global_attrs_fix) | |
# get the start date to use for reftime | |
start_date = get_start_date(fixed_by_xarray) | |
# add reftime variable | |
reftime_fix = {'var_id': 'reftime', | |
'value': start_date, | |
'dtype': 'datetime64[ns]', | |
'attrs': {'long_name': 'Start date of the forecast', 'standard_name': 'forecast_reference_time'}, | |
'encoding': {'dtype': 'int32', 'units':'days since 1850-01-01', 'calendar':'gregorian'}} | |
fixed_by_xarray = add_scalar_coord(fixed_by_xarray, **reftime_fix) | |
# calculate the lead times | |
lead_times = get_lead_times(fixed_by_xarray) | |
# add leadtime variable | |
leadtime_fix = {'var_id': 'leadtime', | |
'value': lead_times, | |
'coords': ['time'], | |
'dtype': 'timedelta64[D]', | |
'attrs': {'long_name': 'Time elapsed since the start of the forecast', 'standard_name': 'forecast_period'}, | |
'encoding': {'dtype': 'double'}} | |
fixed_by_xarray = add_coord(fixed_by_xarray, **leadtime_fix) | |
# add realization data variable - this was in the fixed file but not the original file - so I added it in as well | |
# unsure if needed | |
data_var_fix = {'var_id': 'realization', | |
'value': '1', | |
'dtype': 'int32', | |
'attrs': {'long_name': 'realization', 'comment': 'For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes'}} | |
fixed_by_xarray = add_data_var(fixed_by_xarray, **data_var_fix) | |
fixed_by_xarray = remove_fill_values(fixed_by_xarray) | |
# print(fixed_by_xarray) | |
# print(fixed_provided) | |
fixed_provided.to_netcdf("fixed_provided.nc") | |
fixed_by_xarray.to_netcdf("after_fixes.nc") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Dictionary items for fixes should be provided, even if there are no values e.g.
note that encoding is present and is an empty dictionary.