Skip to content

input4mips_validation.cvs.loading_raw#

input4mips_validation.cvs.loading_raw #

Tools for loading the raw CVs

This allows us to access CVs defined locally as well as in remote sources, specifically on GitHub.

RawCVLoader #

Bases: Protocol

Loader of raw CV data

Source code in src/input4mips_validation/cvs/loading_raw.py
class RawCVLoader(Protocol):
    """Loader of raw CV data"""

    def load_raw(self, filename: str) -> str:
        """
        Load raw CV data

        Parameters
        ----------
        filename
            Filename from which to load raw CV data

        Returns
        -------
            Raw CV data
        """

load_raw(filename) #

Load raw CV data

Parameters:

Name Type Description Default
filename str

Filename from which to load raw CV data

required

Returns:

Type Description
Raw CV data
Source code in src/input4mips_validation/cvs/loading_raw.py
def load_raw(self, filename: str) -> str:
    """
    Load raw CV data

    Parameters
    ----------
    filename
        Filename from which to load raw CV data

    Returns
    -------
        Raw CV data
    """

RawCVLoaderBaseURL #

Loader of raw CV data from some base URL

Uses pooch.retrieve to manage downloading and storage of files.

Source code in src/input4mips_validation/cvs/loading_raw.py
@frozen
class RawCVLoaderBaseURL:
    """
    Loader of raw CV data from some base URL

    Uses [pooch.retrieve][] to manage downloading and storage of files.
    """

    base_url: str = field(validator=attrs.validators.instance_of(str))
    """
    Base URL from which to load files

    The filename is simply appended to the end of the base URL
    to create the URL from which to request the file.
    """

    download_path: Path = HERE / "user_cvs"
    """
    Path in which to save downloaded files

    Passed to [pooch.retrieve][].

    Defaults to being inside the package so that downloaded files
    are removed when the package is removed.
    """

    force_download: bool = False
    """
    Whether to force a new download of the file if it already exists
    """

    @base_url.validator
    def ends_with_forward_slash(
        self, attribute: attr.Attribute[Any], value: str
    ) -> None:
        """
        Assert that the value ends with a forward slash
        """
        if not value.endswith("/"):
            msg = f"{attribute.name} must end with a '/', received: {value=!r}"
            raise ValueError(msg)

    def load_raw(
        self, filename: str, downloader: pooch.HTTPDownloader | None = None
    ) -> str:
        """
        Load raw CV data

        Parameters
        ----------
        filename
            Filename from which to load raw CV data

        downloader
            Downloader to use when retrieving data with pooch.

        Returns
        -------
            Raw CV data
        """
        url = f"{self.base_url}{filename}"
        fname_pooch = pooch.utils.unique_file_name(url)

        if self.force_download:
            expected_out_file = self.download_path / fname_pooch
            if expected_out_file.exists():
                expected_out_file.unlink()

        if downloader is None:
            downloader = DEFAULT_DOWNLOADER

        with open(
            Path(
                pooch.retrieve(
                    url=url,
                    fname=fname_pooch,
                    path=self.download_path,
                    known_hash=None,
                    downloader=downloader,
                )
            )
        ) as fh:
            raw = fh.read()

        return raw

base_url: str = field(validator=attrs.validators.instance_of(str)) class-attribute instance-attribute #

Base URL from which to load files

The filename is simply appended to the end of the base URL to create the URL from which to request the file.

download_path: Path = HERE / 'user_cvs' class-attribute instance-attribute #

Path in which to save downloaded files

Passed to pooch.retrieve.

Defaults to being inside the package so that downloaded files are removed when the package is removed.

force_download: bool = False class-attribute instance-attribute #

Whether to force a new download of the file if it already exists

ends_with_forward_slash(attribute, value) #

Assert that the value ends with a forward slash

Source code in src/input4mips_validation/cvs/loading_raw.py
@base_url.validator
def ends_with_forward_slash(
    self, attribute: attr.Attribute[Any], value: str
) -> None:
    """
    Assert that the value ends with a forward slash
    """
    if not value.endswith("/"):
        msg = f"{attribute.name} must end with a '/', received: {value=!r}"
        raise ValueError(msg)

load_raw(filename, downloader=None) #

Load raw CV data

Parameters:

Name Type Description Default
filename str

Filename from which to load raw CV data

required
downloader HTTPDownloader | None

Downloader to use when retrieving data with pooch.

None

Returns:

Type Description
Raw CV data
Source code in src/input4mips_validation/cvs/loading_raw.py
def load_raw(
    self, filename: str, downloader: pooch.HTTPDownloader | None = None
) -> str:
    """
    Load raw CV data

    Parameters
    ----------
    filename
        Filename from which to load raw CV data

    downloader
        Downloader to use when retrieving data with pooch.

    Returns
    -------
        Raw CV data
    """
    url = f"{self.base_url}{filename}"
    fname_pooch = pooch.utils.unique_file_name(url)

    if self.force_download:
        expected_out_file = self.download_path / fname_pooch
        if expected_out_file.exists():
            expected_out_file.unlink()

    if downloader is None:
        downloader = DEFAULT_DOWNLOADER

    with open(
        Path(
            pooch.retrieve(
                url=url,
                fname=fname_pooch,
                path=self.download_path,
                known_hash=None,
                downloader=downloader,
            )
        )
    ) as fh:
        raw = fh.read()

    return raw

RawCVLoaderKnownRemoteRegistry #

Loader of raw CV data from a known remote registry

Known remote registries are assumed to be represented as pooch.Pooch.

Source code in src/input4mips_validation/cvs/loading_raw.py
@frozen
class RawCVLoaderKnownRemoteRegistry:
    """
    Loader of raw CV data from a known remote registry

    Known remote registries are assumed to be represented as [pooch.Pooch][].
    """

    registry: pooch.Pooch
    """
    Pooch registry to use for retrieving and managing files
    """

    force_download: bool = False
    """
    Whether to force a new download of the file if it already exists
    """

    def load_raw(
        self, filename: str, downloader: pooch.HTTPDownloader | None = None
    ) -> str:
        """
        Load raw CV data

        Parameters
        ----------
        filename
            Filename from which to load raw CV data

        downloader
            Downloader to use when fetching data with pooch.

            If not supplied, we use a basic default HTTP downloader.

        Returns
        -------
            Raw CV data
        """
        if self.force_download:
            expected_out_file = Path(self.registry.path) / filename
            if expected_out_file.exists():
                expected_out_file.unlink()

        if downloader is None:
            downloader = DEFAULT_DOWNLOADER

        with open(Path(self.registry.fetch(filename, downloader=downloader))) as fh:
            raw = fh.read()

        return raw

force_download: bool = False class-attribute instance-attribute #

Whether to force a new download of the file if it already exists

registry: pooch.Pooch instance-attribute #

Pooch registry to use for retrieving and managing files

load_raw(filename, downloader=None) #

Load raw CV data

Parameters:

Name Type Description Default
filename str

Filename from which to load raw CV data

required
downloader HTTPDownloader | None

Downloader to use when fetching data with pooch.

If not supplied, we use a basic default HTTP downloader.

None

Returns:

Type Description
Raw CV data
Source code in src/input4mips_validation/cvs/loading_raw.py
def load_raw(
    self, filename: str, downloader: pooch.HTTPDownloader | None = None
) -> str:
    """
    Load raw CV data

    Parameters
    ----------
    filename
        Filename from which to load raw CV data

    downloader
        Downloader to use when fetching data with pooch.

        If not supplied, we use a basic default HTTP downloader.

    Returns
    -------
        Raw CV data
    """
    if self.force_download:
        expected_out_file = Path(self.registry.path) / filename
        if expected_out_file.exists():
            expected_out_file.unlink()

    if downloader is None:
        downloader = DEFAULT_DOWNLOADER

    with open(Path(self.registry.fetch(filename, downloader=downloader))) as fh:
        raw = fh.read()

    return raw

RawCVLoaderLocal #

Loader of raw CV data from local data

Source code in src/input4mips_validation/cvs/loading_raw.py
@frozen
class RawCVLoaderLocal:
    """
    Loader of raw CV data from local data
    """

    root_dir: Path
    """
    Root directory in which the raw CV data is stored
    """

    def load_raw(self, filename: str) -> str:
        """
        Load raw CV data

        Parameters
        ----------
        filename
            Filename from which to load raw CV data

        Returns
        -------
            Raw CV data
        """
        with open(self.root_dir / filename) as fh:
            raw = fh.read()

        return raw

root_dir: Path instance-attribute #

Root directory in which the raw CV data is stored

load_raw(filename) #

Load raw CV data

Parameters:

Name Type Description Default
filename str

Filename from which to load raw CV data

required

Returns:

Type Description
Raw CV data
Source code in src/input4mips_validation/cvs/loading_raw.py
def load_raw(self, filename: str) -> str:
    """
    Load raw CV data

    Parameters
    ----------
    filename
        Filename from which to load raw CV data

    Returns
    -------
        Raw CV data
    """
    with open(self.root_dir / filename) as fh:
        raw = fh.read()

    return raw

convert_force_download_from_env_to_bool(force_download_from_env) #

Convert the value of force download retrieved from an environment variable to a bool

Parameters:

Name Type Description Default
force_download_from_env str

Force download value retrieved from an environment variable

required

Returns:

Type Description
bool

Boolean equivalent of force_download_from_env

Source code in src/input4mips_validation/cvs/loading_raw.py
def convert_force_download_from_env_to_bool(force_download_from_env: str) -> bool:
    """
    Convert the value of force download retrieved from an environment variable to a bool

    Parameters
    ----------
    force_download_from_env
        Force download value retrieved from an environment variable

    Returns
    -------
    :
        Boolean equivalent of `force_download_from_env`
    """
    if force_download_from_env.lower() == "true":
        force_download = True
    elif force_download_from_env.lower() == "false":
        force_download = False
    else:
        raise NotImplementedError(force_download_from_env)

    return force_download

get_raw_cvs_loader(cv_source=None, force_download=None) #

Get the raw CVs loader

Parameters:

Name Type Description Default
cv_source None | str | Path

String identifying the source of the CVs.

If not supplied, this is retrieved from the environment variable INPUT4MIPS_VALIDATION_CV_SOURCE.

If this environment variable is also not set, we raise a NotImplementedError.

If this starts with "gh:", we retrieve the data from PCMD's GitHub, using everything after the colon as the ID for the Git commit to use (where the ID can be a branch name, a tag or a commit ID).

Otherwise we simply return the path as provided and use the validators package to decide if the source points to a URL or not.

None
force_download bool | None

If we are downloading from a remote source, should the raw CV loader be configured so that downloads are forced.

If not supplied, this is retrieved from the environment variable INPUT4MIPS_VALIDATION_CV_SOURCE_FORCE_DOWNLOAD.

If this environment variable is also not set, we assume False.

None

Returns:

Type Description
RawCVLoader

Raw CV loader

Raises:

Type Description
NotImplementedError

cv_source is not supplied and INPUT4MIPS_VALIDATION_CV_SOURCE is also not set.

Source code in src/input4mips_validation/cvs/loading_raw.py
def get_raw_cvs_loader(
    cv_source: None | str | Path = None, force_download: bool | None = None
) -> RawCVLoader:
    """
    Get the raw CVs loader

    Parameters
    ----------
    cv_source
        String identifying the source of the CVs.

        If not supplied, this is retrieved from the environment variable
        `INPUT4MIPS_VALIDATION_CV_SOURCE`.

        If this environment variable is also not set,
        we raise a `NotImplementedError`.

        If this starts with "gh:", we retrieve the data from PCMD's GitHub,
        using everything after the colon as the ID for the Git commit to use
        (where the ID can be a branch name, a tag or a commit ID).

        Otherwise we simply return the path as provided
        and use the [validators](https://validators.readthedocs.io/en/stable)
        package to decide if the source points to a URL or not.

    force_download
        If we are downloading from a remote source,
        should the raw CV loader be configured so that downloads are forced.

        If not supplied, this is retrieved from the environment variable
        `INPUT4MIPS_VALIDATION_CV_SOURCE_FORCE_DOWNLOAD`.

        If this environment variable is also not set,
        we assume `False`.

    Returns
    -------
    :
        Raw CV loader

    Raises
    ------
    NotImplementedError
        `cv_source` is not supplied and
        `INPUT4MIPS_VALIDATION_CV_SOURCE` is also not set.
    """
    if cv_source is None:
        cv_source = os.environ.get("INPUT4MIPS_VALIDATION_CV_SOURCE", None)

    if cv_source is None:
        msg = "Default source has not been decided yet"
        raise NotImplementedError(msg)

    if force_download is None:
        try:
            force_download_from_env = os.environ[
                "INPUT4MIPS_VALIDATION_CV_SOURCE_FORCE_DOWNLOAD"
            ]
            force_download = convert_force_download_from_env_to_bool(
                force_download_from_env
            )

        except KeyError:
            # Nothing provided as environment variable, hence set a default
            force_download = False

    if isinstance(cv_source, str) and cv_source.startswith("gh:"):
        # Expand out the given value
        source = cv_source.split("gh:")[1]
        cv_source = (
            f"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/{source}/CVs/"
        )

    if isinstance(cv_source, Path):
        res: RawCVLoader = RawCVLoaderLocal(cv_source)

    elif not validators.url(cv_source):
        res = RawCVLoaderLocal(Path(cv_source))

    else:
        try:
            res = RawCVLoaderKnownRemoteRegistry(
                KNOWN_REGISTRIES[cv_source], force_download=force_download
            )
        except KeyError:
            res = RawCVLoaderBaseURL(base_url=cv_source, force_download=force_download)

    return res