Skip to content

input4mips_validation.xarray_helpers.variables#

input4mips_validation.xarray_helpers.variables #

Helpers for determining what is and isn't a variable

XRVariableHelper #

Helper for processing the variables in an xarray object

This is required because the rules for handling bounds variables compared to data variables are not uniformly applied in xarray.

Source code in src/input4mips_validation/xarray_helpers/variables.py
@define
class XRVariableHelper:
    """
    Helper for processing the variables in an xarray object

    This is required because the rules for handling bounds variables
    compared to data variables are not uniformly applied in xarray.
    """

    bounds_coord_indicators: tuple[str, ...] = ("bounds", "bnds")
    """
    Strings that show that the variable represents bounds

    If any of these strings is found in the variable's name,
    it is assumed that the variable is a bounds variable.
    """

    climatology_bounds_coord_indicators: tuple[str, ...] = ("climatology",)
    """
    Strings that show that the variable represents climatology bounds

    If any of these strings is found in the variable's name,
    it is assumed that the variable is a climatology bounds variable.
    """

    def get_ds_bounds_variables(
        self,
        ds: xr.Dataset,
    ) -> tuple[str, ...]:
        """
        Get the bounds variables in a dataset

        Parameters
        ----------
        ds
            Dataset to check

        Returns
        -------
        :
            Bounds variables in the dataset.
        """
        return tuple(
            str(v)
            for v in ds.data_vars
            if any(bci in str(v) for bci in self.bounds_coord_indicators)
        )

    def get_ds_climatology_bounds_variables(
        self,
        ds: xr.Dataset,
    ) -> tuple[str, ...]:
        """
        Get the climatology bounds variables in a dataset

        Parameters
        ----------
        ds
            Dataset to check

        Returns
        -------
        :
            Climatology bounds variables in the dataset.
        """
        return tuple(
            str(v)
            for v in ds.data_vars
            if any(
                clim_i in str(v) for clim_i in self.climatology_bounds_coord_indicators
            )
        )

    def get_ds_variables(
        self,
        ds: xr.Dataset,
    ) -> tuple[str, ...]:
        """
        Get the variables in a dataset

        Parameters
        ----------
        ds
            Dataset to check

        Returns
        -------
        :
            Variables in the dataset, excluding bounds, climatology
            and non-number variables.
        """
        non_variables = (
            *self.get_ds_bounds_variables(ds),
            *self.get_ds_climatology_bounds_variables(ds),
            *(
                k
                for k, v in ds.data_vars.items()
                if not np.issubdtype(v.dtype, np.number)
            ),
        )

        return tuple(str(v) for v in ds.data_vars if v not in non_variables)

bounds_coord_indicators: tuple[str, ...] = ('bounds', 'bnds') class-attribute instance-attribute #

Strings that show that the variable represents bounds

If any of these strings is found in the variable's name, it is assumed that the variable is a bounds variable.

climatology_bounds_coord_indicators: tuple[str, ...] = ('climatology') class-attribute instance-attribute #

Strings that show that the variable represents climatology bounds

If any of these strings is found in the variable's name, it is assumed that the variable is a climatology bounds variable.

get_ds_bounds_variables(ds) #

Get the bounds variables in a dataset

Parameters:

Name Type Description Default
ds Dataset

Dataset to check

required

Returns:

Type Description
tuple[str, ...]

Bounds variables in the dataset.

Source code in src/input4mips_validation/xarray_helpers/variables.py
def get_ds_bounds_variables(
    self,
    ds: xr.Dataset,
) -> tuple[str, ...]:
    """
    Get the bounds variables in a dataset

    Parameters
    ----------
    ds
        Dataset to check

    Returns
    -------
    :
        Bounds variables in the dataset.
    """
    return tuple(
        str(v)
        for v in ds.data_vars
        if any(bci in str(v) for bci in self.bounds_coord_indicators)
    )

get_ds_climatology_bounds_variables(ds) #

Get the climatology bounds variables in a dataset

Parameters:

Name Type Description Default
ds Dataset

Dataset to check

required

Returns:

Type Description
tuple[str, ...]

Climatology bounds variables in the dataset.

Source code in src/input4mips_validation/xarray_helpers/variables.py
def get_ds_climatology_bounds_variables(
    self,
    ds: xr.Dataset,
) -> tuple[str, ...]:
    """
    Get the climatology bounds variables in a dataset

    Parameters
    ----------
    ds
        Dataset to check

    Returns
    -------
    :
        Climatology bounds variables in the dataset.
    """
    return tuple(
        str(v)
        for v in ds.data_vars
        if any(
            clim_i in str(v) for clim_i in self.climatology_bounds_coord_indicators
        )
    )

get_ds_variables(ds) #

Get the variables in a dataset

Parameters:

Name Type Description Default
ds Dataset

Dataset to check

required

Returns:

Type Description
tuple[str, ...]

Variables in the dataset, excluding bounds, climatology and non-number variables.

Source code in src/input4mips_validation/xarray_helpers/variables.py
def get_ds_variables(
    self,
    ds: xr.Dataset,
) -> tuple[str, ...]:
    """
    Get the variables in a dataset

    Parameters
    ----------
    ds
        Dataset to check

    Returns
    -------
    :
        Variables in the dataset, excluding bounds, climatology
        and non-number variables.
    """
    non_variables = (
        *self.get_ds_bounds_variables(ds),
        *self.get_ds_climatology_bounds_variables(ds),
        *(
            k
            for k, v in ds.data_vars.items()
            if not np.issubdtype(v.dtype, np.number)
        ),
    )

    return tuple(str(v) for v in ds.data_vars if v not in non_variables)

XRVariableProcessorLike #

Bases: Protocol

Interface for helpers for processing the variables in an xarray object

This is required because the rules for handling bounds variables compared to data variables are not uniformly applied in xarray. There are CF-rules, but these are not applied consistently in xarray hence we use this instead. We would use https://github.com/xarray-contrib/cf-xarray, but that just hard-codes the name "bounds", which doesn't work when the variable is saved as "bnds", which seems to be the case when we save with e.g. iris.

Source code in src/input4mips_validation/xarray_helpers/variables.py
class XRVariableProcessorLike(Protocol):
    """
    Interface for helpers for processing the variables in an xarray object

    This is required because the rules for handling bounds variables
    compared to data variables are not uniformly applied in xarray.
    There are CF-rules, but these are not applied consistently in xarray
    hence we use this instead.
    We would use https://github.com/xarray-contrib/cf-xarray,
    but that just hard-codes the name "bounds",
    which doesn't work when the variable is saved as "bnds",
    which seems to be the case when we save with e.g. iris.
    """

    def get_ds_bounds_variables(
        self,
        ds: xr.Dataset,
    ) -> tuple[str, ...]:
        """
        Get the bounds variables in a dataset

        Parameters
        ----------
        ds
            Dataset to check

        Returns
        -------
        :
            Bounds variables in the dataset.
        """

    def get_ds_climatology_bounds_variables(
        self,
        ds: xr.Dataset,
    ) -> tuple[str, ...]:
        """
        Get the climatology bounds variables in a dataset

        Parameters
        ----------
        ds
            Dataset to check

        Returns
        -------
        :
            Climatology bounds variables in the dataset.
        """

    def get_ds_variables(
        self,
        ds: xr.Dataset,
    ) -> tuple[str, ...]:
        """
        Get the variables in a dataset

        Parameters
        ----------
        ds
            Dataset to check

        Returns
        -------
        :
            Variables in the dataset, excluding bounds variables.
        """

get_ds_bounds_variables(ds) #

Get the bounds variables in a dataset

Parameters:

Name Type Description Default
ds Dataset

Dataset to check

required

Returns:

Type Description
tuple[str, ...]

Bounds variables in the dataset.

Source code in src/input4mips_validation/xarray_helpers/variables.py
def get_ds_bounds_variables(
    self,
    ds: xr.Dataset,
) -> tuple[str, ...]:
    """
    Get the bounds variables in a dataset

    Parameters
    ----------
    ds
        Dataset to check

    Returns
    -------
    :
        Bounds variables in the dataset.
    """

get_ds_climatology_bounds_variables(ds) #

Get the climatology bounds variables in a dataset

Parameters:

Name Type Description Default
ds Dataset

Dataset to check

required

Returns:

Type Description
tuple[str, ...]

Climatology bounds variables in the dataset.

Source code in src/input4mips_validation/xarray_helpers/variables.py
def get_ds_climatology_bounds_variables(
    self,
    ds: xr.Dataset,
) -> tuple[str, ...]:
    """
    Get the climatology bounds variables in a dataset

    Parameters
    ----------
    ds
        Dataset to check

    Returns
    -------
    :
        Climatology bounds variables in the dataset.
    """

get_ds_variables(ds) #

Get the variables in a dataset

Parameters:

Name Type Description Default
ds Dataset

Dataset to check

required

Returns:

Type Description
tuple[str, ...]

Variables in the dataset, excluding bounds variables.

Source code in src/input4mips_validation/xarray_helpers/variables.py
def get_ds_variables(
    self,
    ds: xr.Dataset,
) -> tuple[str, ...]:
    """
    Get the variables in a dataset

    Parameters
    ----------
    ds
        Dataset to check

    Returns
    -------
    :
        Variables in the dataset, excluding bounds variables.
    """