Source code for ILAMB.ilamblib

from scipy.interpolate import NearestNDInterpolator
from .constants import mid_months,bnd_months
from .Regions import Regions
from netCDF4 import Dataset
from datetime import datetime
from cf_units import Unit
from copy import deepcopy
from mpi4py import MPI
import numpy as np
import logging,re,os
import cftime as cf
from pkg_resources import parse_version, get_distribution

logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank)

class VarNotInFile(Exception):
    def __str__(self): return "VarNotInFile"

class VarNotMonthly(Exception):
    def __str__(self): return "VarNotMonthly"

class VarNotInModel(Exception):
    def __str__(self): return "VarNotInModel"

class VarsNotComparable(Exception):
    def __str__(self): return "VarNotComparable"

class VarNotOnTimeScale(Exception):
    def __str__(self): return "VarNotOnTimeScale"

class UnknownUnit(Exception):
    def __str__(self): return "UnknownUnit"

class AreasNotInModel(Exception):
    def __str__(self): return "AreasNotInModel"

class MisplacedData(Exception):
    def __str__(self): return "MisplacedData"

class NotTemporalVariable(Exception):
    def __str__(self): return "NotTemporalVariable"

class NotSpatialVariable(Exception):
    def __str__(self): return "NotSpatialVariable"

class UnitConversionError(Exception):
    def __str__(self): return "UnitConversionError"

class AnalysisError(Exception):
    def __str__(self): return "AnalysisError"

class NotLayeredVariable(Exception):
    def __str__(self): return "NotLayeredVariable"

class NotDatasiteVariable(Exception):
    def __str__(self): return "NotDatasiteVariable"

class MonotonicityError(Exception):
    def __str__(self): return "MonotonicityError"

def FixDumbUnits(unit):
    r"""Try to fix the dumb units people insist on using.

    Parameters
    ----------
    unit : str
        the trial unit

    Returns
    -------
    unit : str
        the fixed unit
    """
    # Various synonyms for 1
    if unit.lower().strip() in ["unitless",
                                "n/a",
                                "none"]: unit = "1"
    # Remove the C which so often is used to mean carbon but actually means coulomb
    tokens = re.findall(r"[\w']+", unit)
    for i,token in enumerate(tokens):
        if token.endswith("C"):
            try:
                if Unit(token[:-1]).is_convertible(Unit("g")):
                    unit = unit.replace(token,token[:-1])
                elif (i > 0) and Unit(tokens[i-1]).is_convertible(Unit("g")):
                    unit = unit.replace(" C","")
            except:
                pass
    # ... and Nitrogen
    for i,token in enumerate(tokens):
        if token.endswith("N"):
            try:
                if Unit(token[:-1]).is_convertible(Unit("g")):
                    unit = unit.replace(token,token[:-1])
                elif (i > 0) and Unit(tokens[i-1]).is_convertible(Unit("g")):
                    unit = unit.replace(" N","")
            except:
                pass
    return unit

[docs]def GenerateDistinctColors(N,saturation=0.67,value=0.67): r"""Generates a series of distinct colors. Computes N distinct colors using HSV color space, holding the saturation and value levels constant and linearly vary the hue. Colors are returned as a RGB tuple. Parameters ---------- N : int number of distinct colors to generate saturation : float, optional argument of HSV color space value : float, optional argument of HSV color space Returns ------- RGB_tuples : list list of N distinct RGB tuples """ from colorsys import hsv_to_rgb HSV_tuples = [(x/float(N), saturation, value) for x in range(N)] RGB_tuples = list(map(lambda x: hsv_to_rgb(*x), HSV_tuples)) return RGB_tuples
def GuessAlpha(t): """Guess at what point in the time interval is the given time. Although part of the CF standard, many datasets do not have the bounds specifed on the time variable. This is complicated by the many conventions that groups have regarding at what point in the time interval the time is reported. In the absence of given bounds, we guess here at what convention was used. Parameters ---------- t: netCDF4.Variable the dataset or array representing the times of the intervals Returns ------- alpha: float estimated location in the time interval where the given time is defined """ if t.size == 1: return 0.5 t0 = cf.num2date(t[0],t.units,calendar=t.calendar) t1 = cf.num2date(t[1],t.units,calendar=t.calendar) dt = (t1-t0).total_seconds()/3600/24 if np.allclose(dt,365.,atol=10): # annual: assumes that the first time entry represents the beginning of a decade y0 = cf.date2num(cf.datetime(10*(t0.year/10),1,1),t.units,calendar=t.calendar) alpha = np.round((t[0]-y0)/dt,1).clip(0,1) elif np.allclose(dt,30,atol=4): # monthly: assumes that the first time entry represents the beginning of a year m0 = cf.date2num(cf.datetime(t0.year,1,1),t.units,calendar=t.calendar) alpha = np.round((t[0]-m0)/dt,1).clip(0,1) elif dt < 0.9: # sub-daily: assumes that the first time entry represents the beginning of a day h0 = cf.date2num(cf.datetime(t0.year,t0.month,t0.day),t.units,calendar=t.calendar) alpha = np.round((t[0]-h0)/dt,1) else: msg = "GuessAlpha for dt = %f [d] not implemented yet" % (dt) raise ValueError(msg) return alpha def CreateTimeBounds(t,alpha=0.5): """Create time bounds from a time array. Parameters ---------- t: netCDF4.Variable or numpy.ndarray the dataset or array representing the times of the intervals alpha: float the fraction of the interval to use to set the bounds Returns ------- tb: numpy.ndarray (t.size,2) the bounds of the given time array Notes ----- Using alpha = 0.5 will create bounds in the middle of each time point. An alpha = 0 will use the given time as the beginning of the interval and alpha = 1 will use the given time as the end of the interval. """ if ((alpha<0)+(alpha>1)): msg = "Alpha out of bounds, should be (0 <= %g <= 1)" % alpha raise ValueError(msg) if t.size == 1: return np.asarray([[t[0],t[0]]]) dt = np.diff(t) dt = np.hstack([dt[0],dt,dt[-1]]) tb = np.asarray([t - alpha *dt[:-1], t + (1.-alpha)*dt[+1:]]).T return tb
[docs]def ConvertCalendar(t,units,calendar): """ """ return cf.date2num(cf.datetime(t.year,t.month,t.day,t.hour,t.minute,t.second),units,calendar=calendar)
def GetTime(var,t0=None,tf=None,convert_calendar=True,ignore_time_array=True): """ """ # New method of handling time does not like my biggest/smallest time convention if t0 is not None: if np.allclose(t0,-1e20): t0 = None if tf is not None: if np.allclose(tf,+1e20): tf = None # Get parent dataset/group dset = var.group() vname = "%s:%s" % (dset.filepath(),var.name) CB = None # What is the time dimension called in the dataset/variable? time_name = [name for name in var.dimensions if "time" in name.lower()] if len(time_name) == 0: return None,None,None,None,None,None elif len(time_name) > 1: msg = "Ambiguous 'time' dimension in the variable %s, one of these [%s]" % (vname,",".join(time_name)) raise IOError(msg) time_name = time_name[0] t = dset.variables[time_name] # Check for units on time if "units" not in t.ncattrs(): msg = "No units given in the time variable in %s:%s" % (dset.filepath(),time_name) raise ValueError(msg) if "calendar" not in t.ncattrs(): msg = "No calendar given in the time variable in %s:%s" % (dset.filepath(),time_name) raise ValueError(msg) # If no time bounds we create them time_bnds_name = t.bounds if "bounds" in t.ncattrs() else None if time_bnds_name is not None: if time_bnds_name not in dset.variables.keys(): msg = "Time bounds specified in %s as %s, but not a variable in the dataset, found these [%s]" % (time_name,time_bnds_name,dset.variables.keys()) raise IOError(msg) tb = dset.variables[time_bnds_name][...] else: tb = CreateTimeBounds(t,alpha=GuessAlpha(t)) if "climatology" in t.ncattrs(): clim_name = t.climatology if clim_name not in dset.variables.keys(): msg = "Climatology bounds specified in %s as %s, but not a variable in the dataset, found these [%s]" % (time_name,clim_name,dset.variables.keys()) raise IOError(msg) CB = dset.variables[clim_name][...] if not np.allclose(CB.shape,[12,2]): msg = "ILAMB only supports annual cycle style climatologies" raise IOError(msg) CB = np.round(CB[0,:]/365.+1850.) # Convert the input beginning/ending time to the current calendar/datum if t0 is not None: t0 = cf.num2date(t0,units="days since 1850-1-1 00:00:00",calendar="noleap") t0 = ConvertCalendar(t0,t.units,t.calendar) if (t0 > tb[-1,1]): return None,None,None,None,None,None if tf is not None: tf = cf.num2date(tf,units="days since 1850-1-1 00:00:00",calendar="noleap") tf = ConvertCalendar(tf,t.units,t.calendar) if (tf < tb[0,0]): return None,None,None,None,None,None # Subset by the desired initial and final times dt = np.diff(tb,axis=1)[:,0] begin = 0 end = t.size-1 if t0 is not None: begin = np.where(t0>(tb[:,0]-0.01*dt))[0] begin = begin[-1] if begin.size > 0 else 0 if tf is not None: end = np.where(tf<(tb[:,1]+0.01*dt))[0] end = end[0] if end.size > 0 else t.size-1 T = np.asarray(t [begin:(end+1)]) TB = np.asarray(tb[begin:(end+1)]) if ignore_time_array: T = TB.mean(axis=1) # Are the time intervals consecutively if not np.allclose(TB[1:,0],TB[:-1,1]): msg = "Time intervals defined in %s:%s are not continuous" % (dset.filepath(),time_bnds_name) raise ValueError(msg) # Do the times lie in the bounds TF = (T >= TB[:,0])*(T <= TB[:,1]) if not TF.all(): index = ["%d" % i for i in np.where(TF==False)[0]] msg = "Times at indices [%s] do not fall inside of the corresponding bounds in %s:%s and %s" % (",".join(index),dset.filepath(),time_name,time_bnds_name) raise ValueError(msg) # Convert time array to ILAMB default calendar / datum try: datum_shift = (cf.num2date(0,"days since 1850-1-1 00:00:00",calendar=t.calendar)- cf.num2date(0,t.units ,calendar=t.calendar)).total_seconds() except: msg = "Error in computing the datum: t.units = %s, t.calendar = %s" % (t.units,t.calendar) raise ValueError(msg) if ((abs(datum_shift) > 60) or (convert_calendar and t.calendar != "noleap")): T = cf.num2date(T ,units=t.units,calendar=t.calendar) TB = cf.num2date(TB,units=t.units,calendar=t.calendar) for index,x in np.ndenumerate(T): T [index] = ConvertCalendar(x,"days since 1850-1-1 00:00:00","noleap" if convert_calendar else t.calendar) for index,x in np.ndenumerate(TB): TB[index] = ConvertCalendar(x,"days since 1850-1-1 00:00:00","noleap" if convert_calendar else t.calendar) cal = "noleap" if convert_calendar else t.calendar return T.astype(float),TB.astype(float),CB,begin,end,cal
[docs]def CellAreas(lat,lon,lat_bnds=None,lon_bnds=None): """Given arrays of latitude and longitude, return cell areas in square meters. Parameters ---------- lat : numpy.ndarray a 1D array of latitudes which represent cell centroids lon : numpy.ndarray a 1D array of longitudes which represent cell centroids Returns ------- areas : numpy.ndarray a 2D array of cell areas in [m2] """ from .constants import earth_rad if (lat_bnds is not None and lon_bnds is not None): return earth_rad**2*np.outer((np.sin(lat_bnds[:,1]*np.pi/180.)- np.sin(lat_bnds[:,0]*np.pi/180.)), (lon_bnds[:,1]-lon_bnds[:,0])*np.pi/180.) x = np.zeros(lon.size+1) x[1:-1] = 0.5*(lon[1:]+lon[:-1]) x[ 0] = lon[ 0]-0.5*(lon[ 1]-lon[ 0]) x[-1] = lon[-1]+0.5*(lon[-1]-lon[-2]) if(x.max() > 181): x -= 180 x = x.clip(-180,180) x *= np.pi/180. y = np.zeros(lat.size+1) y[1:-1] = 0.5*(lat[1:]+lat[:-1]) y[ 0] = lat[ 0]-0.5*(lat[ 1]-lat[ 0]) y[-1] = lat[-1]+0.5*(lat[-1]-lat[-2]) y = y.clip(-90,90) y *= np.pi/180. dx = earth_rad*(x[1:]-x[:-1]) dy = earth_rad*(np.sin(y[1:])-np.sin(y[:-1])) areas = np.outer(dx,dy).T return areas
[docs]def GlobalLatLonGrid(res,**keywords): r"""Generates a latitude/longitude grid at a desired resolution Computes 1D arrays of latitude and longitude values which correspond to cell interfaces and centroids at a given resolution. Parameters ---------- res : float the desired resolution of the grid in degrees from_zero : boolean sets longitude convention { True:(0,360), False:(-180,180) } Returns ------- lat_bnd : numpy.ndarray a 1D array of latitudes which represent cell interfaces lon_bnd : numpy.ndarray a 1D array of longitudes which represent cell interfaces lat : numpy.ndarray a 1D array of latitudes which represent cell centroids lon : numpy.ndarray a 1D array of longitudes which represent cell centroids """ from_zero = keywords.get("from_zero",False) res_lat = keywords.get("res_lat",res) res_lon = keywords.get("res_lon",res) nlon = int(360./res_lon)+1 nlat = int(180./res_lat)+1 lon_bnd = np.linspace(-180,180,nlon) if from_zero: lon_bnd += 180 lat_bnd = np.linspace(-90,90,nlat) lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) return lat_bnd,lon_bnd,lat,lon
[docs]def NearestNeighborInterpolation(lat1,lon1,data1,lat2,lon2): r"""Interpolates globally grided data at another resolution Parameters ---------- lat1 : numpy.ndarray a 1D array of latitudes of cell centroids corresponding to the source data lon1 : numpy.ndarray a 1D array of longitudes of cell centroids corresponding to the source data data1 : numpy.ndarray an array of data to be interpolated of shape = (lat1.size,lon1.size,...) lat2 : numpy.ndarray a 1D array of latitudes of cell centroids corresponding to the target resolution lon2 : numpy.ndarray a 1D array of longitudes of cell centroids corresponding to the target resolution Returns ------- data2 : numpy.ndarray an array of interpolated data of shape = (lat2.size,lon2.size,...) """ rows = np.apply_along_axis(np.argmin,1,np.abs(lat2[:,np.newaxis]-lat1)) cols = np.apply_along_axis(np.argmin,1,np.abs(lon2[:,np.newaxis]-lon1)) data2 = data1[np.ix_(rows,cols)] return data2
[docs]def TrueError(lat1_bnd,lon1_bnd,lat1,lon1,data1,lat2_bnd,lon2_bnd,lat2,lon2,data2): r"""Computes the pointwise difference between two sets of gridded data To obtain the pointwise error we populate a list of common cell interfaces and then interpolate both input arrays to the composite grid resolution using nearest-neighbor interpolation. Parameters ---------- lat1_bnd, lon1_bnd, lat1, lon1 : numpy.ndarray 1D arrays corresponding to the latitude/longitudes of the cell interfaces/centroids data1 : numpy.ndarray an array of data to be interpolated of shape = (lat1.size,lon1.size,...) lat2_bnd, lon2_bnd, lat2, lon2 : numpy.ndarray 1D arrays corresponding to the latitude/longitudes of the cell interfaces/centroids data2 : numpy.ndarray an array of data to be interpolated of shape = (lat2.size,lon2.size,...) Returns ------- lat_bnd, lon_bnd, lat, lon : numpy.ndarray 1D arrays corresponding to the latitude/longitudes of the cell interfaces/centroids of the resulting error error : numpy array an array of the pointwise error of shape = (lat.size,lon.size,...) """ # combine limits, sort and remove duplicates lat_bnd = np.hstack((lat1_bnd,lat2_bnd)); lat_bnd.sort(); lat_bnd = np.unique(lat_bnd) lon_bnd = np.hstack((lon1_bnd,lon2_bnd)); lon_bnd.sort(); lon_bnd = np.unique(lon_bnd) # need centroids of new grid for nearest-neighbor interpolation lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) # interpolate datasets at new grid d1 = NearestNeighborInterpolation(lat1,lon1,data1,lat,lon) d2 = NearestNeighborInterpolation(lat2,lon2,data2,lat,lon) # relative to the first grid/data error = d2-d1 return lat_bnd,lon_bnd,lat,lon,error
[docs]def SympifyWithArgsUnits(expression,args,units): """Uses symbolic algebra to determine the final unit of an expression. Parameters ---------- expression : str the expression whose units you wish to simplify args : dict a dictionary of numpy arrays whose keys are the variables written in the input expression units : dict a dictionary of strings representing units whose keys are the variables written in the input expression """ from sympy import sympify,postorder_traversal expression = sympify(expression) # try to convert all arguments to same units if possible, it # catches most use cases keys = list(args.keys()) for i,key0 in enumerate(keys): for key in keys[(i+1):]: try: Unit(units[key]).convert(args[key],Unit(units[key0]),inplace=True) units[key] = units[key0] except: pass for expr in postorder_traversal(expression): ekey = str(expr) if expr.is_Add: # if there are scalars in the expression, these will not # be in the units dictionary. Add them and give them an # implicit unit of 1 keys = [str(arg) for arg in expr.args] for key in keys: if key not in units: units[key] = "1" # if we are adding, all arguments must have the same unit. key0 = keys[0] for key in keys: Unit(units[key]).convert(np.ones(1),Unit(units[key0])) units[key] = units[key0] units[ekey] = "%s" % (units[key0]) elif expr.is_Pow: # if raising to a power, just create the new unit keys = [str(arg) for arg in expr.args] units[ekey] = "(%s)%s" % (units[keys[0]],keys[1]) elif expr.is_Mul: # just create the new unit keys = [str(arg) for arg in expr.args] units[ekey] = " ".join(["(%s)" % units[key] for key in keys if key in units]) return sympify(str(expression),locals=args),units[ekey]
def ComputeIndexingArrays(lat2d,lon2d,lat,lon): """Blah. Parameters ---------- lat : numpy.ndarray A 1D array of latitudes of cell centroids lon : numpy.ndarray A 1D array of longitudes of cell centroids """ # Prepare the interpolator points = np.asarray([lat2d.flatten(),lon2d.flatten()]).T values = np.asarray([(np.arange(lat2d.shape[0])[:,np.newaxis]*np.ones (lat2d.shape[1])).flatten(), (np.ones (lat2d.shape[0])[:,np.newaxis]*np.arange(lat2d.shape[1])).flatten()]).T fcn = NearestNDInterpolator(points,values) LAT,LON = np.meshgrid(lat,lon,indexing='ij') gmap = fcn(LAT.flatten(),LON.flatten()).astype(int) return gmap[:,0].reshape(LAT.shape),gmap[:,1].reshape(LAT.shape) def ExtendAnnualCycle(time,cycle_data,cycle_time): ind = np.abs((time[:,np.newaxis] % 365)-(cycle_time % 365)).argmin(axis=1) assert (ind.max() < 12)*(ind.min() >= 0) return cycle_data[ind] def _shiftDatum(t,datum,calendar): return date2num(num2date(t[...],datum,calendar=calendar), "days since 1850-1-1", calendar=calendar) def _removeLeapDay(t,v,datum=None,calendar=None,t0=None,tf=None): """ Shifts the datum and removes leap day if present. Parameters ---------- t : netCDF4._netCDF4.Variable the variable representing time in any calendar with any datum v : netCDF4._netCDF4.Variable the variable representing the data datum : str, optional the datum to which we will shift the variable t0 : float, optional the initial time in 'days since 1850-1-1' tf : float, optional the final time in 'days since 1850-1-1' """ datum0 = "days since 1850-1-1" if calendar is None: if "calendar" in t.ncattrs(): calendar = t.calendar if datum is None: if "units" in t.ncattrs(): datum = t.units # shift their datum to our datum tdata = _shiftDatum(t,datum,calendar) if tdata.ndim > 1: tdata = tdata[:,0] # convert the time array to datetime objects in the native calendar T = num2date(tdata,"days since 1850-1-1",calendar=calendar) # find a boolean array which is true where time values are on leap day leap_day = np.asarray([1 if (x.month == 2 and x.day == 29) else 0 for x in T],dtype=bool) # then we need to shift the times by the times we will remove dt = np.hstack([0,np.diff(tdata)]) # assumes that the time is at the beginning of the interval tdata -= (leap_day*dt).cumsum() # shift by removed time t_index = np.where(leap_day==False)[0] # indices that we keep tdata = tdata[t_index] # remove leap day # finally we need to shift by the number of leap days since our new datum tdata -= date2num(T[0],"days since 1850-1-1",calendar) - date2num(T[0],"days since 1850-1-1","noleap") # where do we slice the array begin = 0; end = t.size if t0 is not None: begin = max(tdata.searchsorted(t0)-1,begin) if tf is not None: end = min(tdata.searchsorted(tf)+1,end) tdata = tdata[ begin:end ] vdata = v[t_index,...][begin:end,...] # not as memory efficient as it could be return tdata,vdata
[docs]def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=None,convert_calendar=True): """Extracts data from a netCDF4 datafile for use in a Variable object. Intended to be used inside of the Variable constructor. Some of the return arguments will be None depending on the contents of the netCDF4 file. Parameters ---------- filename : str Name of the netCDF4 file from which to extract a variable variable_name : str Name of the variable to extract from the netCDF4 file alternate_vars : list of str, optional A list of possible alternate variable names to find t0 : float, optional If temporal, specifying the initial time can reduce memory usage and speed up access time. tf : float, optional If temporal, specifying the final time can reduce memory usage and speed up access time. Returns ------- data : numpy.ma.masked_array The array which contains the data which constitutes the variable unit : str The unit of the input data name : str The name of the variable, will be how it is saved in an output netCDF4 file time : numpy.ndarray A 1D array of times in days since 1850-01-01 00:00:00 time_bnds : numpy.ndarray A 1D array of time bounds in days since 1850-01-01 00:00:00 lat : numpy.ndarray A 1D array of latitudes of cell centroids lon : numpy.ndarray A 1D array of longitudes of cell centroids area : numpy.ndarray A 2D array of the cell areas ndata : int Number of data sites this data represents depth_bnds : numpy.ndarray A 1D array of the depth boundaries of each cell """ try: dset = Dataset(filename,mode="r") if parse_version(get_distribution("netCDF4").version) >= parse_version("1.4.1"): dset.set_always_mask(False) if group is None: grp = dset else: grp = dset.groups[group] except RuntimeError: raise RuntimeError("Unable to open the file: %s" % filename) found = False if variable_name in grp.variables.keys(): found = True var = grp.variables[variable_name] else: while alternate_vars.count(None) > 0: alternate_vars.pop(alternate_vars.index(None)) for var_name in alternate_vars: if var_name in grp.variables.keys(): found = True var = grp.variables[var_name] if found == False: alternate_vars.insert(0,variable_name) raise RuntimeError("Unable to find [%s] in the file: %s" % (",".join(alternate_vars),filename)) # Copy attributes into a dictionary attr = { attr: var.getncattr(attr) for attr in var.ncattrs() } # Check on dimensions time_name = [name for name in var.dimensions if "time" in name.lower()] lat_name = [name for name in var.dimensions if "lat" in name.lower()] lon_name = [name for name in var.dimensions if "lon" in name.lower()] data_name = [name for name in var.dimensions if name.lower() in ["data","lndgrid","gridcell"]] missed = [name for name in var.dimensions if name not in (time_name + lat_name + lon_name + data_name)] # Lat/lon might be indexing arrays, find their shape shp = None if (len(lat_name) == 0 and len(lon_name) == 0 and len(missed) >= 2 and len(data_name) == 0): # remove these dimensions from the missed variables i,j = var.dimensions[-2],var.dimensions[-1] if i in missed: missed.pop(missed.index(i)) if j in missed: missed.pop(missed.index(j)) i = grp.variables[i] j = grp.variables[j] if (np.issubdtype(i.dtype,np.integer) and np.issubdtype(j.dtype,np.integer)): shp = [len(i),len(j)] # Lat/lon might just be sizes if (len(lat_name) == 1 and len(lon_name) == 1): if not (lat_name[0] in grp.variables and lon_name[0] in grp.variables): shp = [len(grp.dimensions[lat_name[0]]),len(grp.dimensions[lon_name[0]])] # If these were sizes, then we need to find the correct 2D lat/lon arrays if shp is not None: # We want to remove any false positives we might find. I don't # want to consider variables which are 'bounds' or dimensions # of others, nor those that don't have the correct shape. bnds = [grp.variables[v].bounds for v in grp.variables if "bounds" in grp.variables[v].ncattrs()] dims = [v for v in grp.variables if (v in grp.dimensions)] poss = [v for v in grp.variables if (v not in dims and v not in bnds and np.allclose(shp,grp.variables[v].shape) if len(shp) == len(grp.variables[v].shape) else False)] lat_name = [name for name in poss if "lat" in name.lower()] lon_name = [name for name in poss if "lon" in name.lower()] # If still ambiguous, look inside the variable attributes for # the presence of the variable name to give further # preference. attrs = [str(var.getncattr(attr)) for attr in var.ncattrs()] if len(lat_name) == 0: raise ValueError("Unable to find values for the latitude dimension in %s" % (filename)) if len(lat_name) > 1: tmp_name = [name for name in lat_name if np.any([name in attr for attr in attrs])] if len(tmp_name) > 0: lat_name = tmp_name if len(lon_name) == 0: raise ValueError("Unable to find values for the longitude dimension in %s" % (filename)) if len(lon_name) > 1: tmp_name = [name for name in lon_name if np.any([name in attr for attr in attrs])] if len(tmp_name) > 0: lon_name = tmp_name # Lat dimension if len(lat_name) == 1: lat_name = lat_name[0] lat_bnd_name = grp.variables[lat_name].bounds if (lat_name in grp.variables and "bounds" in grp.variables[lat_name].ncattrs()) else None if lat_bnd_name not in grp.variables: lat_bnd_name = None elif len(lat_name) >= 1: raise ValueError("Ambiguous choice of values for the latitude dimension [%s] in %s" % (",".join(lat_name),filename)) else: lat_name = None lat_bnd_name = None # Lon dimension if len(lon_name) == 1: lon_name = lon_name[0] lon_bnd_name = grp.variables[lon_name].bounds if (lon_name in grp.variables and "bounds" in grp.variables[lon_name].ncattrs()) else None if lon_bnd_name not in grp.variables: lon_bnd_name = None elif len(lon_name) >= 1: raise ValueError("Ambiguous choice of values for the longitude dimension [%s] in %s" % (",".join(lon_name),filename)) else: lon_name = None lon_bnd_name = None # Data dimension if len(data_name) == 1: data_name = data_name[0] elif len(data_name) >= 1: raise ValueError("Ambiguous choice of values for the data dimension [%s] in %s" % (",".join(data_name),filename)) else: data_name = None # The layered dimension is whatever is leftover since its name # could be many things if len(missed) == 1: depth_name = missed[0] depth_bnd_name = grp.variables[depth_name].bounds if (depth_name in grp.variables and "bounds" in grp.variables[depth_name].ncattrs()) else None if depth_bnd_name not in grp.variables: depth_bnd_name = None elif len(missed) >= 1: raise ValueError("Ambiguous choice of values for the layered dimension [%s] in %s" % (",".join(missed),filename)) else: depth_name = None depth_bnd_name = None # Based on present values, get dimensions and bounds t = None; t_bnd = None lat = None; lat_bnd = None lon = None; lon_bnd = None depth = None; depth_bnd = None data = None; cbounds = None t,t_bnd,cbounds,begin,end,calendar = GetTime(var,t0=t0,tf=tf,convert_calendar=convert_calendar) # Are there uncertainties? v_bnd = None if "bounds" in var.ncattrs(): v_bnd = var.bounds v_bnd = grp.variables[v_bnd] if v_bnd in grp.variables.keys() else None if begin is None: v = var[...] if v_bnd: v_bnd = v_bnd[...] else: v = var[begin:(end+1),...] if v_bnd: v_bnd = v_bnd[begin:(end+1),...] if lat_name is not None: lat = grp.variables[lat_name] [...] if lat_bnd_name is not None: lat_bnd = grp.variables[lat_bnd_name] [...] if lon_name is not None: lon = grp.variables[lon_name] [...] if lon_bnd_name is not None: lon_bnd = grp.variables[lon_bnd_name] [...] if depth_name is not None: dunit = None if "units" in grp.variables[depth_name].ncattrs(): dunit = grp.variables[depth_name].units depth = grp.variables[depth_name][...] if depth_bnd_name is not None: depth_bnd = grp.variables[depth_bnd_name][...] if dunit is not None: if not (Unit(dunit).is_convertible(Unit("m")) or Unit(dunit).is_convertible(Unit("Pa"))): raise ValueError("Units [%s] not compatible with [m,Pa] of the layered dimension [%s] in %s" % (dunit,depth_name,filename)) if Unit(dunit).is_convertible(Unit("m")): depth = Unit(dunit).convert(depth,Unit("m"),inplace=True) if depth_bnd is not None: depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) if Unit(dunit).is_convertible(Unit("Pa")): # FIX: converts the pressure to meters, but assumes it # is coming from the atmosphere. This will be # problematic for ocean quantities. depth = Unit(dunit).convert(depth,Unit("Pa"),inplace=True) Pb = 101325.; Tb = 273.15; R = 8.3144598; g = 9.80665; M = 0.0289644 depth = -np.log(depth/Pb)*R*Tb/M/g if depth_bnd is not None: depth_bnd = Unit(dunit).convert(depth_bnd,Unit("Pa"),inplace=True) depth_bnd = -np.log(depth_bnd/Pb)*R*Tb/M/g if data_name is not None: data = len(grp.dimensions[data_name]) # if we have data sites, there may be lat/lon/depth data to # come along with them although not a dimension of the # variable lat_name = []; lon_name = [] for key in grp.variables.keys(): if "lat" in key: lat_name.append(key) if "lon" in key: lon_name.append(key) if "altitude" in key: depth_name = key if len(lat_name) > 1: lat_name = [n for n in lat_name if grp.variables[n].dimensions[0] in var.dimensions] if len(lon_name) > 1: lon_name = [n for n in lon_name if grp.variables[n].dimensions[0] in var.dimensions] if len(lat_name) > 0: lat = grp.variables[lat_name[0]][...] if len(lon_name) > 0: lon = grp.variables[lon_name[0]][...] if depth_name is not None: depth = grp.variables[depth_name][...] if lat is not None: if lat.size != data: lat = None if lon is not None: if lon.size != data: lon = None if depth is not None: if depth.size != data: depth = None # If lat and lon are 2D, then we will need to interpolate things if lat is not None and lon is not None: if lat.ndim == 2 and lon.ndim == 2: assert lat.shape == lon.shape # Create the grid res = 1.0 lat_bnds = np.arange(round(lat.min(),0), round(lat.max(),0)+res/2.,res) lon_bnds = np.arange(round(lon.min(),0), round(lon.max(),0)+res/2.,res) lats = 0.5*(lat_bnds[:-1]+lat_bnds[1:]) lons = 0.5*(lon_bnds[:-1]+lon_bnds[1:]) ilat,ilon = ComputeIndexingArrays(lat,lon,lats,lons) r = np.sqrt( (lat[ilat,ilon]-lats[:,np.newaxis])**2 + (lon[ilat,ilon]-lons[np.newaxis,:])**2 ) v = v[...,ilat,ilon] v = np.ma.masked_array(v,mask=v.mask+(r>2*res)) lat = lats lon = lons lat_bnd = np.zeros((lat.size,2)) lat_bnd[:,0] = lat_bnds[:-1] lat_bnd[:,1] = lat_bnds[+1:] lon_bnd = lon_bnds lon_bnd = np.zeros((lon.size,2)) lon_bnd[:,0] = lon_bnds[:-1] lon_bnd[:,1] = lon_bnds[+1:] # handle incorrect or absent masking of arrays if type(v) != type(np.ma.empty(1)): mask = np.zeros(v.shape,dtype=int) if "_FillValue" in var.ncattrs(): mask += (np.abs(v-var._FillValue )<1e-12) if "missing_value" in var.ncattrs(): mask += (np.abs(v-var.missing_value)<1e-12) v = np.ma.masked_array(v,mask=mask,copy=False) if "units" in var.ncattrs(): units = FixDumbUnits(var.units) else: units = "1" dset.close() return v,v_bnd,units,variable_name,t,t_bnd,lat,lat_bnd,lon,lon_bnd,depth,depth_bnd,cbounds,data,calendar,attr
[docs]def Score(var,normalizer): """Remaps a normalized variable to the interval [0,1]. Parameters ---------- var : ILAMB.Variable.Variable The variable to normalize, usually represents an error of some sort normalizer : ILAMB.Variable.Variable The variable by which we normalize """ from .Variable import Variable name = var.name.replace("bias","bias_score") name = name.replace("diff","diff_score") name = name.replace("rmse","rmse_score") name = name.replace("iav" ,"iav_score") np.seterr(over='ignore',under='ignore') data = np.exp(-np.abs(var.data/normalizer.data)) data[data<1e-16] = 0. np.seterr(over='raise',under='raise') return Variable(name = name, data = data, unit = "1", ndata = var.ndata, lat = var.lat, lat_bnds = var.lat_bnds, lon = var.lon, lon_bnds = var.lon_bnds, area = var.area)
[docs]def ComposeSpatialGrids(var1,var2): """Creates a grid which conforms the boundaries of both variables. This routine takes the union of the latitude and longitude cell boundaries of both grids and returns a new set of latitudes and longitudes which represent cell centers of the new grid. Parameters ---------- var1,var2 : ILAMB.Variable.Variable The two variables for which we wish to find a common grid Returns ------- lat : numpy.ndarray a 1D array of latitudes of cell centroids lon : numpy.ndarray a 1D array of longitudes of cell centroids """ if not var1.spatial: il.NotSpatialVariable() if not var2.spatial: il.NotSpatialVariable() def _make_bnds(x): bnds = np.zeros(x.size+1) bnds[1:-1] = 0.5*(x[1:]+x[:-1]) bnds[ 0] = max(x[ 0]-0.5*(x[ 1]-x[ 0]),-180) bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) return bnds lat1_bnd = _make_bnds(var1.lat) lon1_bnd = _make_bnds(var1.lon) lat2_bnd = _make_bnds(var2.lat) lon2_bnd = _make_bnds(var2.lon) lat_bnd = np.hstack((lat1_bnd,lat2_bnd)); lat_bnd.sort(); lat_bnd = np.unique(lat_bnd) lon_bnd = np.hstack((lon1_bnd,lon2_bnd)); lon_bnd.sort(); lon_bnd = np.unique(lon_bnd) lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) return lat,lon
[docs]def ScoreSeasonalCycle(phase_shift): """Computes the seasonal cycle score from the phase shift. Possible remove this function as we do not compute other score components via a ilamblib function. """ from .Variable import Variable return Variable(data = (1+np.cos(np.abs(phase_shift.data)/365*2*np.pi))*0.5, unit = "1", name = phase_shift.name.replace("phase_shift","phase_shift_score"), ndata = phase_shift.ndata, lat = phase_shift.lat, lat_bnds = phase_shift.lat_bnds, lon = phase_shift.lon, lon_bnds = phase_shift.lon_bnds, area = phase_shift.area)
def _composeGrids(v1,v2): lat_bnds = np.unique(np.hstack([v1.lat_bnds.flatten(),v2.lat_bnds.flatten()])) lon_bnds = np.unique(np.hstack([v1.lon_bnds.flatten(),v2.lon_bnds.flatten()])) lat_bnds = lat_bnds[(lat_bnds>=- 90)*(lat_bnds<=+ 90)] lon_bnds = lon_bnds[(lon_bnds>=-180)*(lon_bnds<=+180)] lat_bnds = np.vstack([lat_bnds[:-1],lat_bnds[+1:]]).T lon_bnds = np.vstack([lon_bnds[:-1],lon_bnds[+1:]]).T lat = lat_bnds.mean(axis=1) lon = lon_bnds.mean(axis=1) return lat,lon,lat_bnds,lon_bnds def AnalysisMeanStateSites(ref,com,**keywords): """Perform a mean state analysis. This mean state analysis examines the model mean state in space and time. We compute the mean variable value over the time period at each spatial cell or data site as appropriate, as well as the bias and RMSE relative to the observational variable. We will output maps of the period mean values and bias. For each spatial cell or data site we also estimate the phase of the variable by finding the mean time of year when the maximum occurs and the phase shift by computing the difference in phase with respect to the observational variable. In the spatial dimension, we compute a spatial mean for each of the desired regions and an average annual cycle. Parameters ---------- obs : ILAMB.Variable.Variable the observational (reference) variable mod : ILAMB.Variable.Variable the model (comparison) variable regions : list of str, optional the regions overwhich to apply the analysis dataset : netCDF4.Dataset, optional a open dataset in write mode for caching the results of the analysis which pertain to the model benchmark_dataset : netCDF4.Dataset, optional a open dataset in write mode for caching the results of the analysis which pertain to the observations space_mean : bool, optional disable to compute sums of the variable over space instead of mean values table_unit : str, optional the unit to use when displaying output in tables on the HTML page plots_unit : str, optional the unit to use when displaying output on plots on the HTML page """ from .Variable import Variable regions = keywords.get("regions" ,["global"]) dataset = keywords.get("dataset" ,None) benchmark_dataset = keywords.get("benchmark_dataset",None) space_mean = keywords.get("space_mean" ,True) table_unit = keywords.get("table_unit" ,None) plot_unit = keywords.get("plot_unit" ,None) mass_weighting = keywords.get("mass_weighting" ,False) skip_rmse = keywords.get("skip_rmse" ,False) skip_iav = keywords.get("skip_iav" ,True ) skip_cycle = keywords.get("skip_cycle" ,False) ILAMBregions = Regions() normalizer = None # Convert str types to booleans if type(skip_rmse) == type(""): skip_rmse = (skip_rmse.lower() == "true") if type(skip_iav ) == type(""): skip_iav = (skip_iav .lower() == "true") if type(skip_cycle) == type(""): skip_cycle = (skip_cycle.lower() == "true") # Only study the annual cycle if it makes sense if not ref.monthly: skip_cycle = True if ref.time.size < 12: skip_cycle = True if skip_rmse : skip_iav = True # We find the mean values over the time period on the original # grid/datasites of each dataset ref_timeint = ref.integrateInTime(mean=True) com_timeint = com.integrateInTime(mean=True) REF = ref COM = com REF_timeint = ref_timeint COM_timeint = com_timeint if mass_weighting: normalizer = REF_timeint.data # Compute the bias, RMSE, and RMS maps using the interpolated # quantities bias = REF_timeint.bias(COM_timeint) cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), time = REF.time, time_bnds = REF.time_bnds, lat = REF.lat , lat_bnds = REF.lat_bnds, lon = REF.lon , lon_bnds = REF.lon_bnds, area = REF.area, ndata = REF.ndata) crms = cREF.rms () bias_score_map = Score(bias,crms) if not skip_rmse: cCOM = Variable(name = "centralized %s" % COM.name, unit = COM.unit, data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), time = COM.time, time_bnds = COM.time_bnds, lat = COM.lat , lat_bnds = COM.lat_bnds, lon = COM.lon , lon_bnds = COM.lon_bnds, area = COM.area, ndata = COM.ndata) rmse = REF.rmse( COM) crmse = cREF.rmse(cCOM) rmse_score_map = Score(crmse,crms) if not skip_iav: ref_iav = Variable(name = "centralized %s" % ref.name, unit = ref.unit, data = np.ma.masked_array(ref.data-ref_timeint.data[np.newaxis,...],mask=ref.data.mask), time = ref.time, time_bnds = ref.time_bnds, lat = ref.lat , lat_bnds = ref.lat_bnds, lon = ref.lon , lon_bnds = ref.lon_bnds, area = ref.area, ndata = ref.ndata).rms() com_iav = Variable(name = "centralized %s" % com.name, unit = com.unit, data = np.ma.masked_array(com.data-com_timeint.data[np.newaxis,...],mask=com.data.mask), time = com.time, time_bnds = com.time_bnds, lat = com.lat , lat_bnds = com.lat_bnds, lon = com.lon , lon_bnds = com.lon_bnds, area = com.area, ndata = com.ndata).rms() REF_iav = Variable(name = "centralized %s" % REF.name, unit = REF.unit, data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), time = REF.time, time_bnds = REF.time_bnds, lat = REF.lat , lat_bnds = REF.lat_bnds, lon = REF.lon , lon_bnds = REF.lon_bnds, area = REF.area, ndata = REF.ndata).rms() COM_iav = Variable(name = "centralized %s" % COM.name, unit = COM.unit, data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), time = COM.time, time_bnds = COM.time_bnds, lat = COM.lat , lat_bnds = COM.lat_bnds, lon = COM.lon , lon_bnds = COM.lon_bnds, area = COM.area, ndata = COM.ndata).rms() iav_score_map = Score(Variable(name = "diff %s" % REF.name, unit = REF.unit, data = (COM_iav.data-REF_iav.data), lat = REF.lat , lat_bnds = REF.lat_bnds, lon = REF.lon , lon_bnds = REF.lon_bnds, area = REF.area, ndata = REF.ndata), REF_iav) # The phase shift comes from the interpolated quantities if not skip_cycle: ref_cycle = REF.annualCycle() com_cycle = COM.annualCycle() ref_maxt_map = ref_cycle.timeOfExtrema(etype="max") com_maxt_map = com_cycle.timeOfExtrema(etype="max") shift_map = ref_maxt_map.phaseShift(com_maxt_map) shift_score_map = ScoreSeasonalCycle(shift_map) shift_map.data /= 30.; shift_map.unit = "months" # Scalars ref_period_mean = {}; ref_spaceint = {}; ref_mean_cycle = {}; ref_dtcycle = {} com_period_mean = {}; com_spaceint = {}; com_mean_cycle = {}; com_dtcycle = {} bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {}; iav_score = {} ref_union_mean = {}; ref_comp_mean = {} com_union_mean = {}; com_comp_mean = {} for region in regions: ref_period_mean[region] = ref_timeint .siteStats(region=region) ref_spaceint [region] = ref .siteStats(region=region) com_period_mean[region] = com_timeint .siteStats(region=region) com_spaceint [region] = com .siteStats(region=region) bias_val [region] = bias .siteStats(region=region) bias_score [region] = bias_score_map .siteStats(region=region,weight=normalizer) if not skip_cycle: ref_mean_cycle [region] = ref_cycle .siteStats(region=region) ref_dtcycle [region] = deepcopy(ref_mean_cycle[region]) ref_dtcycle [region].data -= ref_mean_cycle[region].data.mean() com_mean_cycle [region] = com_cycle .siteStats(region=region) com_dtcycle [region] = deepcopy(com_mean_cycle[region]) com_dtcycle [region].data -= com_mean_cycle[region].data.mean() shift [region] = shift_map .siteStats(region=region,intabs=True) shift_score [region] = shift_score_map.siteStats(region=region,weight=normalizer) if not skip_rmse: rmse_val [region] = rmse .siteStats(region=region) rmse_score [region] = rmse_score_map .siteStats(region=region,weight=normalizer) if not skip_iav: iav_score [region] = iav_score_map .siteStats(region=region,weight=normalizer) ref_period_mean[region].name = "Period Mean (original grids) %s" % (region) ref_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) com_period_mean[region].name = "Period Mean (original grids) %s" % (region) com_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) bias_val [region].name = "Bias %s" % (region) bias_score [region].name = "Bias Score %s" % (region) if not skip_rmse: rmse_val [region].name = "RMSE %s" % (region) rmse_score [region].name = "RMSE Score %s" % (region) if not skip_iav: iav_score [region].name = "Interannual Variability Score %s" % (region) if not skip_cycle: ref_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) ref_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) com_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) com_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) shift [region].name = "Phase Shift %s" % (region) shift_score [region].name = "Seasonal Cycle Score %s" % (region) # Unit conversions def _convert(var,unit): if type(var) == type({}): for key in var.keys(): var[key].convert(unit) else: var.convert(unit) if table_unit is not None: for var in [ref_period_mean,com_period_mean,ref_union_mean,com_union_mean,ref_comp_mean,com_comp_mean]: _convert(var,table_unit) if plot_unit is not None: plot_vars = [com_timeint,ref_timeint,bias,com_spaceint,ref_spaceint,bias_val] if not skip_rmse: plot_vars += [rmse,rmse_val] if not skip_cycle: plot_vars += [com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] if not skip_iav: plot_vars += [com_iav] for var in plot_vars: _convert(var,plot_unit) # Rename and optionally dump out information to netCDF4 files com_timeint .name = "timeint_of_%s" % ref.name bias .name = "bias_map_of_%s" % ref.name bias_score_map .name = "biasscore_map_of_%s" % ref.name out_vars = [com_period_mean, ref_union_mean, com_union_mean, ref_comp_mean, com_comp_mean, com_timeint, com_mean_cycle, com_dtcycle, bias, bias_score_map, bias_val, bias_score, shift, shift_score] if com_spaceint[list(com_spaceint.keys())[0]].data.size > 1: out_vars.append(com_spaceint) if not skip_cycle: com_maxt_map .name = "phase_map_of_%s" % ref.name shift_map .name = "shift_map_of_%s" % ref.name shift_score_map.name = "shiftscore_map_of_%s" % ref.name out_vars.append(com_maxt_map) out_vars.append(shift_map) out_vars.append(shift_score_map) if not skip_rmse: rmse .name = "rmse_map_of_%s" % ref.name rmse_score_map.name = "rmsescore_map_of_%s" % ref.name out_vars.append(rmse) out_vars.append(rmse_score_map) out_vars.append(rmse_val) out_vars.append(rmse_score) if not skip_iav: com_iav.name = "iav_map_of_%s" % ref.name iav_score_map.name = "iavscore_map_of_%s" % ref.name out_vars.append(com_iav) out_vars.append(iav_score_map) out_vars.append(iav_score) if dataset is not None: for var in out_vars: if type(var) == type({}): for key in var.keys(): var[key].toNetCDF4(dataset,group="MeanState") else: var.toNetCDF4(dataset,group="MeanState") for key in sd_score.keys(): sd_score[key].toNetCDF4(dataset,group="MeanState", attributes={"std":space_std[key].data, "R" :space_cor[key].data}) # Rename and optionally dump out information to netCDF4 files out_vars = [ref_period_mean,ref_timeint] if ref_spaceint[list(ref_spaceint.keys())[0]].data.size > 1: out_vars.append(ref_spaceint) ref_timeint .name = "timeint_of_%s" % ref.name if not skip_cycle: ref_maxt_map.name = "phase_map_of_%s" % ref.name out_vars += [ref_maxt_map,ref_mean_cycle,ref_dtcycle] if not skip_iav: ref_iav.name = "iav_map_of_%s" % ref.name out_vars.append(ref_iav) if benchmark_dataset is not None: for var in out_vars: if type(var) == type({}): for key in var.keys(): var[key].toNetCDF4(benchmark_dataset,group="MeanState") else: var.toNetCDF4(benchmark_dataset,group="MeanState") return def AnalysisMeanStateSpace(ref,com,**keywords): """Perform a mean state analysis. This mean state analysis examines the model mean state in space and time. We compute the mean variable value over the time period at each spatial cell or data site as appropriate, as well as the bias and RMSE relative to the observational variable. We will output maps of the period mean values and bias. For each spatial cell or data site we also estimate the phase of the variable by finding the mean time of year when the maximum occurs and the phase shift by computing the difference in phase with respect to the observational variable. In the spatial dimension, we compute a spatial mean for each of the desired regions and an average annual cycle. Parameters ---------- obs : ILAMB.Variable.Variable the observational (reference) variable mod : ILAMB.Variable.Variable the model (comparison) variable regions : list of str, optional the regions overwhich to apply the analysis dataset : netCDF4.Dataset, optional a open dataset in write mode for caching the results of the analysis which pertain to the model benchmark_dataset : netCDF4.Dataset, optional a open dataset in write mode for caching the results of the analysis which pertain to the observations space_mean : bool, optional disable to compute sums of the variable over space instead of mean values table_unit : str, optional the unit to use when displaying output in tables on the HTML page plots_unit : str, optional the unit to use when displaying output on plots on the HTML page """ from .Variable import Variable regions = keywords.get("regions" ,["global"]) dataset = keywords.get("dataset" ,None) benchmark_dataset = keywords.get("benchmark_dataset",None) space_mean = keywords.get("space_mean" ,True) table_unit = keywords.get("table_unit" ,None) plot_unit = keywords.get("plot_unit" ,None) mass_weighting = keywords.get("mass_weighting" ,False) skip_rmse = keywords.get("skip_rmse" ,False) skip_iav = keywords.get("skip_iav" ,True ) skip_cycle = keywords.get("skip_cycle" ,False) ref_timeint = keywords.get("ref_timeint" ,None) com_timeint = keywords.get("com_timeint" ,None) rmse_score_basis = keywords.get("rmse_score_basis" ,"cycle") ILAMBregions = Regions() spatial = ref.spatial # Convert str types to booleans if type(skip_rmse) == type(""): skip_rmse = (skip_rmse.lower() == "true") if type(skip_iav ) == type(""): skip_iav = (skip_iav .lower() == "true") if type(skip_cycle) == type(""): skip_cycle = (skip_cycle.lower() == "true") # Check if we need to skip parts of the analysis if not ref.monthly : skip_cycle = True if ref.time.size < 12: skip_cycle = True if ref.time.size == 1: skip_rmse = True if skip_rmse : skip_iav = True name = ref.name # Interpolate both reference and comparison to a grid composed of # their cell breaks ref.convert(plot_unit) com.convert(plot_unit) lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) unit = REF.unit area = REF.area ndata = REF.ndata # Find the mean values over the time period if ref_timeint is None: ref_timeint = ref.integrateInTime(mean=True).convert(plot_unit) REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) else: ref_timeint.convert(plot_unit) REF_timeint = ref_timeint.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) if com_timeint is None: com_timeint = com.integrateInTime(mean=True).convert(plot_unit) COM_timeint = COM.integrateInTime(mean=True).convert(plot_unit) else: com_timeint.convert(plot_unit) COM_timeint = com_timeint.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) normalizer = REF_timeint.data if mass_weighting else None # Report period mean values over all possible representations of # land ref_and_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == False) ref_not_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == True ) com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) if benchmark_dataset is not None: ref_timeint.name = "timeint_of_%s" % name ref_timeint.toNetCDF4(benchmark_dataset,group="MeanState") for region in regions: # reference period mean on original grid ref_period_mean = ref_timeint.integrateInSpace(region=region,mean=space_mean).convert(table_unit) ref_period_mean.name = "Period Mean (original grids) %s" % region ref_period_mean.toNetCDF4(benchmark_dataset,group="MeanState") if dataset is not None: com_timeint.name = "timeint_of_%s" % name com_timeint.toNetCDF4(dataset,group="MeanState") for region in regions: # reference period mean on intersection of land ref_union_mean = Variable(name = "REF_and_com", unit = REF_timeint.unit, data = np.ma.masked_array(REF_timeint.data,mask=(ref_and_com==False)), lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) ref_union_mean.name = "Benchmark Period Mean (intersection) %s" % region ref_union_mean.toNetCDF4(dataset,group="MeanState") # reference period mean on complement of land ref_comp_mean = Variable(name = "REF_not_com", unit = REF_timeint.unit, data = np.ma.masked_array(REF_timeint.data,mask=(ref_not_com==False)), lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) ref_comp_mean.name = "Benchmark Period Mean (complement) %s" % region ref_comp_mean.toNetCDF4(dataset,group="MeanState") # comparison period mean on original grid com_period_mean = com_timeint.integrateInSpace(region=region,mean=space_mean).convert(table_unit) com_period_mean.name = "Period Mean (original grids) %s" % region com_period_mean.toNetCDF4(dataset,group="MeanState") # comparison period mean on intersection of land com_union_mean = Variable(name = "ref_and_COM", unit = COM_timeint.unit, data = np.ma.masked_array(COM_timeint.data,mask=(ref_and_com==False)), lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = COM_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) com_union_mean.name = "Model Period Mean (intersection) %s" % region com_union_mean.toNetCDF4(dataset,group="MeanState") # comparison period mean on complement of land com_comp_mean = Variable(name = "COM_not_ref", unit = COM_timeint.unit, data = np.ma.masked_array(COM_timeint.data,mask=(com_not_ref==False)), lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = COM_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) com_comp_mean.name = "Model Period Mean (complement) %s" % region com_comp_mean.toNetCDF4(dataset,group="MeanState") # Now that we are done reporting on the intersection / complement, # set all masks to the intersection REF.data.mask += np.ones(REF.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) COM.data.mask += np.ones(COM.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) REF_timeint.data.mask = (ref_and_com==False) COM_timeint.data.mask = (ref_and_com==False) if mass_weighting: normalizer.mask = (ref_and_com==False) # Spatial Distribution: scalars and scores if dataset is not None: for region in regions: space_std,space_cor,sd_score = REF_timeint.spatialDistribution(COM_timeint,region=region) sd_score.name = "Spatial Distribution Score %s" % region sd_score.toNetCDF4(dataset,group="MeanState", attributes={"std":space_std.data, "R" :space_cor.data}) # Cycle: maps, scalars, and scores if not skip_cycle: ref_cycle = REF.annualCycle() ref_maxt_map = ref_cycle.timeOfExtrema(etype="max") ref_maxt_map.name = "phase_map_of_%s" % name com_cycle = COM.annualCycle() com_maxt_map = com_cycle.timeOfExtrema(etype="max") com_maxt_map.name = "phase_map_of_%s" % name shift_map = ref_maxt_map.phaseShift(com_maxt_map) shift_map.name = "shift_map_of_%s" % name shift_score_map = ScoreSeasonalCycle(shift_map) shift_score_map.name = "shiftscore_map_of_%s" % name shift_map.data /= 30.; shift_map.unit = "months" if benchmark_dataset is not None: ref_maxt_map.toNetCDF4(benchmark_dataset,group="MeanState") for region in regions: ref_mean_cycle = ref_cycle.integrateInSpace(region=region,mean=True) ref_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) ref_mean_cycle.toNetCDF4(benchmark_dataset,group="MeanState") ref_dtcycle = deepcopy(ref_mean_cycle) ref_dtcycle.data -= ref_mean_cycle.data.mean() ref_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) ref_dtcycle.toNetCDF4(benchmark_dataset,group="MeanState") if dataset is not None: com_maxt_map.toNetCDF4(dataset,group="MeanState") shift_map .toNetCDF4(dataset,group="MeanState") shift_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: com_mean_cycle = com_cycle.integrateInSpace(region=region,mean=True) com_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) com_mean_cycle.toNetCDF4(dataset,group="MeanState") com_dtcycle = deepcopy(com_mean_cycle) com_dtcycle.data -= com_mean_cycle.data.mean() com_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) com_dtcycle.toNetCDF4(dataset,group="MeanState") shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) shift_score = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) shift .name = "Phase Shift %s" % region shift .toNetCDF4(dataset,group="MeanState") shift_score.name = "Seasonal Cycle Score %s" % region shift_score.toNetCDF4(dataset,group="MeanState") del shift_map,shift_score_map # IAV: maps, scalars, scores if not skip_iav: REF_iav = Variable(data = np.ma.masked_array(REF.data-ExtendAnnualCycle(REF.time,ref_cycle.data,ref_cycle.time),mask=REF.data.mask), unit = unit, time = REF.time, time_bnds = REF.time_bnds, lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF.area, ndata = REF.ndata).rms() COM_iav = Variable(data = np.ma.masked_array(COM.data-ExtendAnnualCycle(COM.time,com_cycle.data,com_cycle.time),mask=COM.data.mask), unit = unit, time = COM.time, time_bnds = COM.time_bnds, lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = COM.area, ndata = COM.ndata).rms() iav_score_map = Score(Variable(name = "diff %s" % name, unit = unit, data = (COM_iav.data-REF_iav.data), lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = area, ndata = ndata), REF_iav) if benchmark_dataset is not None: REF_iav.name = "iav_map_of_%s" % name REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") if dataset is not None: COM_iav.name = "iav_map_of_%s" % name COM_iav.toNetCDF4(dataset,group="MeanState") iav_score_map.name = "iavscore_map_of_%s" % name iav_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) iav_score.name = "Interannual Variability Score %s" % region iav_score.toNetCDF4(dataset,group="MeanState") del ref_cycle,com_cycle,REF_iav,COM_iav,iav_score_map # Bias: maps, scalars, and scores bias = REF_timeint.bias(COM_timeint).convert(plot_unit) cREF = Variable(name = "centralized %s" % name, unit = REF.unit, data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), time = REF.time, time_bnds = REF.time_bnds, ndata = REF.ndata, lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF.area).convert(plot_unit) REF_std = cREF.rms() if skip_rmse: del cREF bias_score_map = Score(bias,REF_std if REF.time.size > 1 else REF_timeint) bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask if dataset is not None: bias.name = "bias_map_of_%s" % name bias.toNetCDF4(dataset,group="MeanState") bias_score_map.name = "biasscore_map_of_%s" % name bias_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) bias_val.name = "Bias %s" % region bias_val.toNetCDF4(dataset,group="MeanState") bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) bias_score.name = "Bias Score %s" % region bias_score.toNetCDF4(dataset,group="MeanState") del bias,bias_score_map # Spatial mean: plots if REF.time.size > 1: if benchmark_dataset is not None: for region in regions: ref_spaceint = REF.integrateInSpace(region=region,mean=True) ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") if dataset is not None: for region in regions: com_spaceint = COM.integrateInSpace(region=region,mean=True) com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) com_spaceint.toNetCDF4(dataset,group="MeanState") # RMSE: maps, scalars, and scores if (not skip_rmse) and (rmse_score_basis == "series"): rmse = REF.rmse(COM).convert(plot_unit) del REF cCOM = Variable(name = "centralized %s" % name, unit = unit, data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), time = COM.time, time_bnds = COM.time_bnds, lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = COM.area, ndata = COM.ndata).convert(plot_unit) try: import psutil comm = MPI.COMM_WORLD rank = comm.Get_rank() pname = MPI.Get_processor_name() process = psutil.Process(os.getpid()) used = process.memory_info().rss*1e-9 msg = "[%d][%s] Process peak memory %.2f [Gb]" % (rank,pname,used) logger.info(msg) except: pass del COM crmse = cREF.rmse(cCOM).convert(plot_unit) del cREF,cCOM rmse_score_map = Score(crmse,REF_std) if dataset is not None: rmse.name = "rmse_map_of_%s" % name rmse.toNetCDF4(dataset,group="MeanState") rmse_score_map.name = "rmsescore_map_of_%s" % name rmse_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) rmse_val.name = "RMSE %s" % region rmse_val.toNetCDF4(dataset,group="MeanState") rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) rmse_score.name = "RMSE Score %s" % region rmse_score.toNetCDF4(dataset,group="MeanState") del rmse,crmse,rmse_score_map # RMSE based on annual cycle if (not skip_rmse) and (rmse_score_basis == "cycle"): ref_cycle = REF.annualCycle() ref_dtcycle = deepcopy(ref_cycle) com_cycle = COM.annualCycle() com_dtcycle = deepcopy(com_cycle) with np.errstate(under='ignore'): ref_dtcycle.data -= ref_cycle.data.mean(axis=0) com_dtcycle.data -= com_cycle.data.mean(axis=0) del REF,COM,cREF rmse = ref_cycle.rmse(com_cycle).convert(plot_unit) crmse = ref_dtcycle.rmse(com_dtcycle).convert(plot_unit) rmse_score_map = Score(crmse,REF_std) if dataset is not None: rmse.name = "rmse_map_of_%s" % name rmse.toNetCDF4(dataset,group="MeanState") rmse_score_map.name = "rmsescore_map_of_%s" % name rmse_score_map.toNetCDF4(dataset,group="MeanState") for region in regions: rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) rmse_val.name = "RMSE %s" % region rmse_val.toNetCDF4(dataset,group="MeanState") rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) rmse_score.name = "RMSE Score %s" % region rmse_score.toNetCDF4(dataset,group="MeanState") del rmse,crmse,rmse_score_map return
[docs]def ClipTime(v,t0,tf): """Remove time from a variable based on input bounds. Parameters ---------- v : ILAMB.Variable.Variable the variable to trim t0,tf : float the times at which to trim Returns ------- vtrim : ILAMB.Variable.Variable the trimmed variable """ begin = np.argmin(np.abs(v.time_bnds[:,0]-t0)) end = np.argmin(np.abs(v.time_bnds[:,1]-tf)) if np.abs(v.time_bnds[begin,0]-t0) > 1e-6: while v.time_bnds[begin,0] > t0: begin -= 1 if begin <= 0: begin = 0 break if np.abs(v.time_bnds[end,1]-tf) > 1e-6: while v.time_bnds[end, 1] < tf: end += 1 if end >= v.time.size-1: end = v.time.size-1 break v.time = v.time [begin:(end+1) ] v.time_bnds = v.time_bnds[begin:(end+1),...] v.data = v.data [begin:(end+1),...] if v.data_bnds is not None: v.data_bnds = v.data_bnds[begin:(end+1),...] return v
[docs]def MakeComparable(ref,com,**keywords): r"""Make two variables comparable. Given a reference variable and a comparison variable, make the two variables comparable or raise an exception explaining why they are not. Parameters ---------- ref : ILAMB.Variable.Variable the reference variable object com : ILAMB.Variable.Variable the comparison variable object clip_ref : bool, optional enable in order to clip the reference variable time using the limits of the comparison variable (defult is False) mask_ref : bool, optional enable in order to mask the reference variable using an interpolation of the comparison variable (defult is False) eps : float, optional used to determine how close you can be to a specific time (expressed in days since 1-1-1850) and still be considered the same time (default is 30 minutes) window : float, optional specify to extend the averaging intervals (in days since 1-1-1850) when a variable must be coarsened (default is 0) Returns ------- ref : ILAMB.Variable.Variable the modified reference variable object com : ILAMB.Variable.Variable the modified comparison variable object """ # Process keywords clip_ref = keywords.get("clip_ref" ,False) mask_ref = keywords.get("mask_ref" ,False) eps = keywords.get("eps" ,30./60./24.) window = keywords.get("window" ,0.) extents = keywords.get("extents" ,np.asarray([[-90.,+90.],[-180.,+180.]])) logstring = keywords.get("logstring","") prune_sites = keywords.get("prune_sites",False) site_atol = keywords.get("site_atol",0.25) allow_diff_times = keywords.get("allow_diff_times",False) # If one variable is temporal, then they both must be if ref.temporal != com.temporal: msg = "%s Datasets are not uniformly temporal: " % logstring msg += "reference = %s, comparison = %s" % (ref.temporal,com.temporal) logger.debug(msg) raise VarsNotComparable() # If the reference is spatial, the comparison must be if ref.spatial and not com.spatial: ref = ref.extractDatasites(com.lat,com.lon) msg = "%s The reference dataset is spatial but the comparison is site-based. " % logstring msg += "Extracted %s sites from the reference to match the comparison." % ref.ndata logger.info(msg) # If the reference is layered, the comparison must be if ref.layered and not com.layered: if ref.depth.size == 1: com.layered = True com.depth = ref.depth com.depth_bnds = ref.depth_bnds shp = list(com.data.shape) insert = 0 if com.temporal: insert = 1 shp.insert(insert,1) com.data = com.data.reshape(shp) else: msg = "%s Datasets are not uniformly layered: " % logstring msg += "reference = %s, comparison = %s" % (ref.layered,com.layered) logger.debug(msg) raise NotLayeredVariable() # If the reference represents observation sites, extract them from # the comparison if ref.ndata is not None and com.spatial: com = com.extractDatasites(ref.lat,ref.lon) # If both variables represent observations sites, make sure you # have the same number of sites and that they represent the same # location. Note this is after the above extraction so at this # point the ndata field of both variables should be equal. if (prune_sites) and (ref.ndata is not None) and (com.ndata is not None): deps = 1.0 # prune the reference r = np.sqrt((ref.lat[:,np.newaxis]-com.lat)**2+(ref.lon[:,np.newaxis]-com.lon)**2) rind = r.argmin(axis=0) rind = rind[np.where(r[rind,range(com.ndata)]<deps)] ref.lat = ref.lat[rind]; ref.lon = ref.lon[rind]; ref.data = ref.data[...,rind] msg = "%s Pruned %d sites from the reference and " % (logstring,ref.ndata-ref.lat.size) ref.ndata = ref.lat.size # prune the comparison r = np.sqrt((com.lat[:,np.newaxis]-ref.lat)**2+(com.lon[:,np.newaxis]-ref.lon)**2) rind = r.argmin(axis=0) rind = rind[np.where(r[rind,range(ref.ndata)]<deps)] com.lat = com.lat[rind]; com.lon = com.lon[rind]; com.data = com.data[...,rind] msg += "%d sites from the comparison." % (com.ndata-com.lat.size) com.ndata = com.lat.size logger.info(msg) else: if ref.ndata != com.ndata: msg = "%s One or both datasets are understood as site data but differ in number of sites: " % logstring msg += "reference = %d, comparison = %d" % (ref.ndata,com.ndata) logger.debug(msg) raise VarsNotComparable() if ref.ndata is not None: if not (np.allclose(ref.lat,com.lat,atol=site_atol) or np.allclose(ref.lon,com.lon,atol=site_atol)): msg = "%s Datasets represent sites, but the locations are different: " % logstring msg += "maximum difference lat = %.2f, lon = %.2f" % (np.abs((ref.lat-com.lat)).max(), np.abs((ref.lon-com.lon)).max()) logger.debug(msg) raise VarsNotComparable() # If the datasets are both spatial, ensure that both represent the # same spatial area and trim the datasets if not. if ref.spatial and com.spatial: lat_bnds = (max(ref.lat_bnds[ 0,0],com.lat_bnds[ 0,0],extents[0,0]), min(ref.lat_bnds[-1,1],com.lat_bnds[-1,1],extents[0,1])) lon_bnds = (max(ref.lon_bnds[ 0,0],com.lon_bnds[ 0,0],extents[1,0]), min(ref.lon_bnds[-1,1],com.lon_bnds[-1,1],extents[1,1])) # Clip reference diff = np.abs([ref.lat_bnds[[0,-1],[0,1]]-lat_bnds, ref.lon_bnds[[0,-1],[0,1]]-lon_bnds]) if diff.max() >= 5.: shp0 = np.asarray(np.copy(ref.data.shape),dtype=int) ref.trim(lat=[lat_bnds[0] if diff[0,0] >= 5. else -90., lat_bnds[1] if diff[0,1] >= 5. else +90], lon=[lon_bnds[0] if diff[1,0] >= 5. else -180., lon_bnds[1] if diff[1,1] >= 5. else +180]) shp = np.asarray(np.copy(ref.data.shape),dtype=int) msg = "%s Spatial data was clipped from the reference: " % logstring msg += " before: %s" % (shp0) msg += " after: %s" % (shp ) logger.info(msg) # Clip comparison diff = np.abs([com.lat_bnds[[0,-1],[0,1]]-lat_bnds, com.lon_bnds[[0,-1],[0,1]]-lon_bnds]) if diff.max() >= 5.: shp0 = np.asarray(np.copy(com.data.shape),dtype=int) com.trim(lat=[lat_bnds[0] if diff[0,0] >= 5. else -90., lat_bnds[1] if diff[0,1] >= 5. else +90], lon=[lon_bnds[0] if diff[1,0] >= 5. else -180., lon_bnds[1] if diff[1,1] >= 5. else +180]) shp = np.asarray(np.copy(com.data.shape),dtype=int) msg = "%s Spatial data was clipped from the comparison: " % logstring msg += " before: %s" % (shp0) msg += " after: %s" % (shp ) logger.info(msg) if ref.temporal: # If the reference time scale is significantly larger than the # comparison, coarsen the comparison if np.log10(ref.dt/com.dt) > 0.5: com = com.coarsenInTime(ref.time_bnds,window=window) # Time bounds of the reference dataset t0 = ref.time_bnds[ 0,0] tf = ref.time_bnds[-1,1] # Find the comparison time range which fully encompasses the reference com = ClipTime(com,t0,tf) if clip_ref: # We will clip the reference dataset too t0 = max(t0,com.time_bnds[ 0,0]) tf = min(tf,com.time_bnds[-1,1]) ref = ref.trim(t=[t0,tf]) # Check that we now are on the same time intervals if not allow_diff_times: if ref.time.size != com.time.size: # Special case - it frequently works out that we are 1 # time interval off for some reason. For now, we will # detect this and push a fix. if ref.time.size == (com.time.size+1): if(np.abs(com.time[0]-ref.time[1])/(ref.time_bnds[0,1]-ref.time_bnds[0,0])<1e-2): ref.time = ref.time[1:] ref.time_bnds = ref.time_bnds[1:] ref.data = ref.data[1:] else: msg = "%s Datasets have differing numbers of time intervals: " % logstring msg += "reference = %d, comparison = %d" % (ref.time.size,com.time.size) logger.debug(msg) raise VarsNotComparable() if not np.allclose(ref.time_bnds,com.time_bnds,atol=0.75*ref.dt): msg = "%s Datasets are defined at different times" % logstring logger.debug(msg) raise VarsNotComparable() if ref.layered: # Try to resolve if the layers from the two quantities are # different if ref.depth.size == com.depth.size == 1: ref = ref.integrateInDepth(mean = True) com = com.integrateInDepth(mean = True) elif ref.depth.size != com.depth.size: # Compute the mean values from the comparison over the # layer breaks of the reference. if ref.depth.size == 1 and com.depth.size > 1: com = com.integrateInDepth(z0=ref.depth_bnds[ 0,0], zf=ref.depth_bnds[-1,1], mean = True) ref = ref.integrateInDepth(mean = True) # just removing the depth dimension else: if not np.allclose(ref.depth,com.depth): msg = "%s Datasets have a different layering scheme" % logstring logger.debug(msg) raise VarsNotComparable() # Convert the comparison to the units of the reference com = com.convert(ref.unit) return ref,com
[docs]def CombineVariables(V): """Combines a list of variables into a single variable. This routine is intended to be used to merge variables when separate moments in time are scattered over several files. Parameters ---------- V : list of ILAMB.Variable.Variable a list of variables to merge into a single variable Returns ------- v : ILAMB.Variable.Variable the merged variable """ from .Variable import Variable # checks on data assert type(V) == type([]) if len(V) == 1: return V[0] for v in V: assert v.temporal # Put list in order by initial time V.sort(key=lambda v: v.time[0]) # Check the beginning and ends times for monotonicity nV = len(V) t0 = np.zeros(nV) tf = np.zeros(nV) nt = np.zeros(nV,dtype=int) ind = [0] for i,v in enumerate(V): t0[i] = v.time[ 0] tf[i] = v.time[-1] nt[i] = v.time.size ind.append(nt[:(i+1)].sum()) # Checks on monotonicity if (((t0[1:]-t0[:-1]).min() < 0) or ((tf[1:]-tf[:-1]).min() < 0) or ((t0[1:]-tf[:-1]).min() < 0)): msg = "[MonotonicityError]" for i in range(nV): err = "" if i > 0 : err += "" if t0[i] > tf[i-1] else "*" err += "" if t0[i] > t0[i-1] else "*" if i < (nV-1): err += "" if tf[i+1] > t0[i ] else "*" err += "" if tf[i+1] > tf[i ] else "*" msg += "\n %2d: t = [%.3f, %.3f] %2s %s" % (i,t0[i],tf[i],err,V[i].filename) logger.debug(msg) raise MonotonicityError() # Assemble the data shp = (nt.sum(),)+V[0].data.shape[1:] time = np.zeros(shp[0]) time_bnds = np.zeros((shp[0],2)) data = np.zeros(shp) mask = np.zeros(shp,dtype=bool) for i,v in enumerate(V): time [ind[i]:ind[i+1]] = v.time time_bnds[ind[i]:ind[i+1],...] = v.time_bnds data [ind[i]:ind[i+1],...] = v.data mask [ind[i]:ind[i+1],...] = v.data.mask # If assembled from single slice files and no time bounds were # provided, they will not be reflective of true bounds here. If # any dt's are 0, make time_bounds none and recompute in the # constructor. if np.any((time_bnds[:,1]-time_bnds[:,0])<1e-12): time_bnds = None v = V[0] v = Variable(data = np.ma.masked_array(data,mask=mask), unit = v.unit, name = v.name, time = time, time_bnds = time_bnds, depth = v.depth, depth_bnds = v.depth_bnds, lat = v.lat, lat_bnds = v.lat_bnds, lon = v.lon, lon_bnds = v.lon_bnds, area = v.area, ndata = v.ndata) v.attr = V[0].attr return v
def ConvertBoundsTypes(x): y = None if x.ndim == 2: y = np.zeros(x.shape[0]+1) y[:-1] = x[ :, 0] y[ -1] = x[-1,-1] if x.ndim == 1: y = np.zeros((x.shape[0]-1,2)) y[:,0] = x[:-1] y[:,1] = x[+1:] return y def LandLinInterMissingValues(mdata): land = np.any(mdata.mask,axis=0)==False data = np.ma.masked_array(mdata) data.data[data.mask] = 0. data.fill_value = 0. data = data.data land = land.astype(int) smooth = data*land[np.newaxis,...] suml = np.copy(land) smooth[:,1:-1,1:-1] += data[:, :-2, :-2]*land[np.newaxis, :-2, :-2] suml [ 1:-1,1:-1] += land[ :-2, :-2] smooth[:,1:-1,1:-1] += data[:, :-2,1:-1]*land[np.newaxis, :-2,1:-1] suml [ 1:-1,1:-1] += land[ :-2,1:-1] smooth[:,1:-1,1:-1] += data[:, :-2, +2:]*land[np.newaxis, :-2, +2:] suml [ 1:-1,1:-1] += land[ :-2, +2:] smooth[:,1:-1,1:-1] += data[:,1:-1, :-2]*land[np.newaxis,1:-1, :-2] suml [ 1:-1,1:-1] += land[ 1:-1, :-2] smooth[:,1:-1,1:-1] += data[:,1:-1, +2:]*land[np.newaxis,1:-1, +2:] suml [ 1:-1,1:-1] += land[ 1:-1, +2:] smooth[:,1:-1,1:-1] += data[:, +2:, :-2]*land[np.newaxis, +2:, :-2] suml [ 1:-1,1:-1] += land[ +2:, :-2] smooth[:,1:-1,1:-1] += data[:, +2:,1:-1]*land[np.newaxis, +2:,1:-1] suml [ 1:-1,1:-1] += land[ +2:,1:-1] smooth[:,1:-1,1:-1] += data[:, +2:, +2:]*land[np.newaxis, +2:, +2:] suml [ 1:-1,1:-1] += land[ +2:, +2:] smooth /= suml.clip(1) smooth = (mdata.mask==True)*smooth + (mdata.mask==False)*mdata.data return smooth class FileContextManager(): def __init__(self,master,mod_results,obs_results): self.master = master self.mod_results = mod_results self.obs_results = obs_results self.mod_dset = None self.obs_dset = None def __enter__(self): # Open the file on entering, both if you are the master self.mod_dset = Dataset(self.mod_results,mode="w") if self.master: self.obs_dset = Dataset(self.obs_results,mode="w") return self def __exit__(self, exc_type, exc_value, traceback): # Always close the file(s) on exit self.mod_dset.close() if self.master: self.obs_dset.close() # If an exception occurred, also remove the files if exc_type is not None: os.system("rm -f %s" % self.mod_results)