Skip to content

input4mips_validation.validation.datasets_to_write_to_disk#

input4mips_validation.validation.datasets_to_write_to_disk #

Validation of datasets that we are writing to disk

InvalidDatasetToWriteToDiskError #

Bases: ValueError

Raised when a dataset to write to disk does not pass all of the validation

Source code in src/input4mips_validation/validation/datasets_to_write_to_disk.py
class InvalidDatasetToWriteToDiskError(ValueError):
    """
    Raised when a dataset to write to disk does not pass all of the validation
    """

    def __init__(
        self,
        ds: xr.Dataset,
        error_container: list[tuple[str, Exception]],
    ) -> None:
        """
        Initialise the error

        Parameters
        ----------
        ds
            The dataset we tried to validate

        error_container
            The thing which was being done
            and the error which was caught
            while validating `ds`.
        """
        error_msgs: list[str] = []
        for error in error_container:
            process, exc = error
            formatted_exc = f"{type(exc).__name__}: {exc}"
            error_msgs.append(f"{process} failed. Exception: {formatted_exc}")

        error_msgs_str = "\n\n".join(error_msgs)

        error_msg = (
            f"Failed to validate {ds=}\n"
            "Caught error messages:\n\n"
            f"{error_msgs_str}"
        )

        super().__init__(error_msg)

__init__(ds, error_container) #

Initialise the error

Parameters:

Name Type Description Default
ds Dataset

The dataset we tried to validate

required
error_container list[tuple[str, Exception]]

The thing which was being done and the error which was caught while validating ds.

required
Source code in src/input4mips_validation/validation/datasets_to_write_to_disk.py
def __init__(
    self,
    ds: xr.Dataset,
    error_container: list[tuple[str, Exception]],
) -> None:
    """
    Initialise the error

    Parameters
    ----------
    ds
        The dataset we tried to validate

    error_container
        The thing which was being done
        and the error which was caught
        while validating `ds`.
    """
    error_msgs: list[str] = []
    for error in error_container:
        process, exc = error
        formatted_exc = f"{type(exc).__name__}: {exc}"
        error_msgs.append(f"{process} failed. Exception: {formatted_exc}")

    error_msgs_str = "\n\n".join(error_msgs)

    error_msg = (
        f"Failed to validate {ds=}\n"
        "Caught error messages:\n\n"
        f"{error_msgs_str}"
    )

    super().__init__(error_msg)

get_ds_to_write_to_disk_validation_result(ds, out_path, cvs, vrs=None, xr_variable_processor=XRVariableHelper(), frequency_metadata_keys=FrequencyMetadataKeys(), bounds_info=None, time_dimension='time') #

Get the result of validating a dataset that is going to be written to disk

Parameters:

Name Type Description Default
ds Dataset

Dataset to write to disk. May contain one or more variables.

required
out_path Path

Path in which to the dataset will be written

required
cvs Input4MIPsCVs

CVs to use to validate the dataset before writing

required
vrs Union[ValidationResultsStore, None]

The validation results store to use for the validation.

If not supplied, we instantiate a new ValidationResultsStore instance.

None
xr_variable_processor XRVariableProcessorLike

Helper to use for processing the variables in xarray objects.

XRVariableHelper()
frequency_metadata_keys FrequencyMetadataKeys

Metadata definitions for frequency information

FrequencyMetadataKeys()
bounds_info BoundsInfo | None

Metadata definitions for bounds handling

If None, this will be inferred from ds.

None
time_dimension str

The time dimension of the data

'time'

Returns:

Type Description
ValidationResultsStore

The validation results store.

Source code in src/input4mips_validation/validation/datasets_to_write_to_disk.py
def get_ds_to_write_to_disk_validation_result(  # noqa: PLR0913
    ds: xr.Dataset,
    out_path: Path,
    cvs: Input4MIPsCVs,
    vrs: Union[ValidationResultsStore, None] = None,
    xr_variable_processor: XRVariableProcessorLike = XRVariableHelper(),
    frequency_metadata_keys: FrequencyMetadataKeys = FrequencyMetadataKeys(),
    bounds_info: BoundsInfo | None = None,
    time_dimension: str = "time",
) -> ValidationResultsStore:
    """
    Get the result of validating a dataset that is going to be written to disk

    Parameters
    ----------
    ds
        Dataset to write to disk.
        May contain one or more variables.

    out_path
        Path in which to the dataset will be written

    cvs
        CVs to use to validate the dataset before writing

    vrs
        The validation results store to use for the validation.

        If not supplied, we instantiate a new
        [`ValidationResultsStore`][input4mips_validation.validation.error_catching.ValidationResultsStore]
        instance.

    xr_variable_processor
        Helper to use for processing the variables in xarray objects.

    frequency_metadata_keys
        Metadata definitions for frequency information

    bounds_info
        Metadata definitions for bounds handling

        If `None`, this will be inferred from `ds`.

    time_dimension
        The time dimension of the data

    Returns
    -------
    :
        The validation results store.
    """
    if vrs is None:
        vrs = ValidationResultsStore()

    if bounds_info is None:
        bounds_info = BoundsInfo.from_ds(ds=ds, time_dimension=time_dimension)

    # Metadata that can be validated standalone
    verification_standalone = (
        ("creation_date", validate_creation_date),
        ("tracking_id", validate_tracking_id),
    )
    verification_standalone_optional = (
        ("comment", validate_comment),
        # TODO: add verification of this to validate-tree.
        # That's where the real power would come in.
        ("external_variables", validate_external_variables),
    )

    # Metadata that depends on the data
    ds_variables = xr_variable_processor.get_ds_variables(
        ds=ds,
    )
    verification_based_on_data = (
        (
            # Can also be validated against CVs,
            # but the validate_frequency will only use CV values,
            # so would be duplication and isn't strictly needed right now
            # (although could be good to add).
            "frequency",
            partial(
                validate_frequency,
                ds=ds,
                time_dimension=time_dimension,
                frequency_metadata_keys=frequency_metadata_keys,
                bounds_info=bounds_info,
            ),
        ),
        (
            "variable_id",
            partial(
                validate_variable_id,
                ds_variables=ds_variables,
            ),
        ),
    )

    # Metadata that has to be consistent with the CVs,
    # but is not defined by the CVs
    verification_must_be_in_cvs = (
        (
            "activity_id",
            cvs.validate_activity_id,
        ),
    )

    for attribute, validation_function in (
        *verification_standalone,
        *verification_based_on_data,
        *verification_must_be_in_cvs,
    ):
        vrs.wrap(
            validate_attribute,
            func_description=f"Validate the {attribute!r} attribute",
        )(ds, attribute, validation_function)

    # Optional attributes
    for attribute_optional, validation_function_optional in (
        *verification_standalone_optional,
    ):
        if attribute_optional in ds.attrs:
            vrs.wrap(
                validate_attribute,
                func_description=f"Validate the {attribute_optional!r} attribute",
            )(ds, attribute_optional, validation_function_optional)

    # Metadata that is defined by the combination of other metadata and the CVs
    verification_defined_by_cvs_and_other_metadata = (
        (
            "contact",
            "source_id",
            cvs.validate_contact,
        ),
        (
            "source_version",
            "source_id",
            cvs.validate_source_version,
        ),
    )
    for attribute, attribute_dependent_on, validation_function_dependents in (
        *verification_defined_by_cvs_and_other_metadata,
    ):
        vrs.wrap(
            validate_attribute_that_depends_on_other_attribute,
            func_description=f"Validate the {attribute!r} attribute",
        )(ds, attribute, attribute_dependent_on, validation_function_dependents)

    return vrs

validate_attribute(ds, attribute, validation_function) #

Validate an attribute of the dataset

A convenience function so we get sensible error messages, even if the attribute isn't provided by the dataset.

Parameters:

Name Type Description Default
ds Dataset

Dataset to validate

required
attribute str

Attribute of ds to validate

required
validation_function Callable[[str], None]

Function to use to validate the value of attribute

required

Raises:

Type Description
MissingAttributeError

attribute is not in ds's attributes

Source code in src/input4mips_validation/validation/datasets_to_write_to_disk.py
def validate_attribute(
    ds: xr.Dataset, attribute: str, validation_function: Callable[[str], None]
) -> None:
    """
    Validate an attribute of the dataset

    A convenience function so we get sensible error messages,
    even if the attribute isn't provided by the dataset.

    Parameters
    ----------
    ds
        Dataset to validate

    attribute
        Attribute of `ds` to validate

    validation_function
        Function to use to validate the value of `attribute`

    Raises
    ------
    MissingAttributeError
        `attribute` is not in `ds`'s attributes
    """
    if attribute not in ds.attrs:
        raise MissingAttributeError(attribute)

    attribute_value = ds.attrs[attribute]
    validation_function(attribute_value)

validate_attribute_that_depends_on_other_attribute(ds, attribute, attribute_dependent_on, validation_function) #

Validate an attribute of the dataset that depends on another attribute's value

A convenience function so we get sensible error messages, even if one of the attributes isn't provided by the dataset.

Parameters:

Name Type Description Default
ds Dataset

Dataset to validate

required
attribute str

Attribute of ds to validate

required
attribute_dependent_on str

Attribute of ds that the value of attribute depends on

required
validation_function Callable[[str, str], None]

Function to use to validate the value of attribute, given the value of attribute_dependent_on

required

Raises:

Type Description
MissingAttributeError

attribute or attribute_dependent_on is not in ds's attributes

Source code in src/input4mips_validation/validation/datasets_to_write_to_disk.py
def validate_attribute_that_depends_on_other_attribute(
    ds: xr.Dataset,
    attribute: str,
    attribute_dependent_on: str,
    validation_function: Callable[[str, str], None],
) -> None:
    """
    Validate an attribute of the dataset that depends on another attribute's value

    A convenience function so we get sensible error messages,
    even if one of the attributes isn't provided by the dataset.

    Parameters
    ----------
    ds
        Dataset to validate

    attribute
        Attribute of `ds` to validate

    attribute_dependent_on
        Attribute of `ds` that the value of `attribute` depends on

    validation_function
        Function to use to validate the value of `attribute`,
        given the value of `attribute_dependent_on`

    Raises
    ------
    MissingAttributeError
        `attribute` or `attribute_dependent_on` is not in `ds`'s attributes
    """
    if attribute not in ds.attrs:
        raise MissingAttributeError(attribute)

    if attribute_dependent_on not in ds.attrs:
        raise MissingAttributeError(attribute_dependent_on)

    attribute_value = ds.attrs[attribute]
    attribute_dependent_on_value = ds.attrs[attribute_dependent_on]
    validation_function(attribute_value, attribute_dependent_on_value)