Skip to content

input4mips_validation.validation.file#

input4mips_validation.validation.file #

Validation of an individual file in isolation

get_validate_file_result(infile, cv_source=None, cvs=None, xr_variable_processor=XRVariableHelper(), frequency_metadata_keys=FrequencyMetadataKeys(), bounds_info=None, time_dimension='time', allow_cf_checker_warnings=False, vrs=None) #

Get the result of validating a file

This includes checks that the file can be loaded with standard libraries and passes metadata and data checks.

Parameters:

Name Type Description Default
infile Path | str

Path to the file to validate

required
cv_source str | None

Source from which to load the CVs

Only required if cvs is None.

For full details on options for loading CVs, see get_raw_cvs_loader.

None
cvs Input4MIPsCVs | None

CVs to use when validating the file.

If these are passed, then cv_source is ignored.

None
xr_variable_processor XRVariableProcessorLike

Helper to use for processing the variables in xarray objects.

XRVariableHelper()
frequency_metadata_keys FrequencyMetadataKeys

Metadata definitions for frequency information

FrequencyMetadataKeys()
bounds_info BoundsInfo | None

Metadata definitions for bounds handling

If None, this will be inferred from the file.

None
time_dimension str

The time dimension of the data

'time'
allow_cf_checker_warnings bool

Should warnings from the CF-checker be allowed?

In otherwise, is a file allowed to pass validation, even if there are warnings from the CF-checker?

False
vrs Union[ValidationResultsStore, None]

The validation results store to use for the validation.

If not supplied, we instantiate a new ValidationResultsStore instance.

None

Returns:

Type Description
ValidationResultsStore

The validation results store.

Source code in src/input4mips_validation/validation/file.py
def get_validate_file_result(  # noqa: PLR0913
    infile: Path | str,
    cv_source: str | None = None,
    cvs: Input4MIPsCVs | None = None,
    xr_variable_processor: XRVariableProcessorLike = XRVariableHelper(),
    frequency_metadata_keys: FrequencyMetadataKeys = FrequencyMetadataKeys(),
    bounds_info: BoundsInfo | None = None,
    time_dimension: str = "time",
    allow_cf_checker_warnings: bool = False,
    vrs: Union[ValidationResultsStore, None] = None,
) -> ValidationResultsStore:
    """
    Get the result of validating a file

    This includes checks that the file can be loaded with standard libraries
    and passes metadata and data checks.

    Parameters
    ----------
    infile
        Path to the file to validate

    cv_source
        Source from which to load the CVs

        Only required if `cvs` is `None`.

        For full details on options for loading CVs,
        see
        [`get_raw_cvs_loader`][input4mips_validation.cvs.loading_raw.get_raw_cvs_loader].

    cvs
        CVs to use when validating the file.

        If these are passed, then `cv_source` is ignored.

    xr_variable_processor
        Helper to use for processing the variables in xarray objects.

    frequency_metadata_keys
        Metadata definitions for frequency information

    bounds_info
        Metadata definitions for bounds handling

        If `None`, this will be inferred from the file.

    time_dimension
        The time dimension of the data

    allow_cf_checker_warnings
        Should warnings from the CF-checker be allowed?

        In otherwise, is a file allowed to pass validation,
        even if there are warnings from the CF-checker?

    vrs
        The validation results store to use for the validation.

        If not supplied, we instantiate a new
        [`ValidationResultsStore`][input4mips_validation.validation.error_catching.ValidationResultsStore]
        instance.

    Returns
    -------
    :
        The validation results store.
    """
    logger.log(
        LOG_LEVEL_INFO_FILE.name, f"Creating validation results for file: {infile}"
    )

    if vrs is None:
        logger.debug("Instantiating a new `ValidationResultsStore`")
        vrs = ValidationResultsStore()

    if cvs is None:
        # Load CVs, we need them for the following steps
        cvs = vrs.wrap(
            load_cvs,
            func_description="Load controlled vocabularies to use during validation",
        )(cv_source=cv_source).result

    elif cv_source is not None:
        logger.warning(
            "Ignoring provided value for `cv_source` (using provided cvs instead)."
        )

    # Basic loading - xarray
    ds_xr_open = vrs.wrap(
        xr.open_dataset, func_description="Open data with `xr.open_dataset`"
    )(infile, use_cftime=True).result
    # # The below actually loads the data into memory.
    # # This can be very slow, hence turn off for now.
    # # TODO: discuss whether we want to have actual data loading checks or not.
    # ds_xr_load = vrs.wrap(
    #     xr.load_dataset, func_description="Load data with `xr.load_dataset`"
    # )(infile)

    # Basic loading - iris
    cubes = vrs.wrap(iris.load, func_description="Load data with `iris.load`")(
        infile
    ).result
    if cubes is not None and len(cubes) == 1:
        vrs.wrap(iris.load_cube, func_description="Load data with `iris.load_cube`")(
            infile
        )

    if ds_xr_open is None:
        logger.error("Not running cf-checker, file wouldn't load with xarray")

    else:
        # CF-checker
        logger.log(
            LOG_LEVEL_INFO_INDIVIDUAL_CHECK.name,
            f"Using the cf-checker to check {infile}",
        )
        logger.debug(f"{allow_cf_checker_warnings=}")
        vrs.wrap(check_with_cf_checker, func_description="Check data with cf-checker")(
            infile, ds=ds_xr_open, no_raise_if_only_warnings=allow_cf_checker_warnings
        )

    if cvs is None:
        logger.error("Skipping checks of CV consistency because cvs loading failed")

    elif cubes is None:
        logger.error("Skipping checks of CV consistency because cubes loading failed")

    else:
        # TODO: check consistency with CVs

        # TODO: Check that the data, metadata and CVs are all consistent
        # Check that the filename and metadata are consistent
        # Checking of the directory and metadata conssitency
        # can only be done in validate_tree.

        ds_careful_load = ds_from_iris_cubes(
            cubes,
            xr_variable_processor=xr_variable_processor,
            raw_file=infile,
            time_dimension=time_dimension,
        )
        vrs = get_ds_to_write_to_disk_validation_result(
            ds=ds_careful_load,
            out_path=Path(infile),
            cvs=cvs,
            vrs=vrs,
            xr_variable_processor=xr_variable_processor,
            frequency_metadata_keys=frequency_metadata_keys,
            bounds_info=bounds_info,
            time_dimension=time_dimension,
        )

    logger.log(
        LOG_LEVEL_INFO_FILE.name, f"Created validation results for file: {infile}"
    )
    return vrs