source_id -> source_provenance

Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>
This commit is contained in:
Todd Gamblin 2024-10-28 01:16:16 -07:00
parent bb77a7733c
commit eda1a6f60f
No known key found for this signature in database
GPG Key ID: C16729F1AACF66C6
2 changed files with 35 additions and 60 deletions

View File

@ -33,7 +33,7 @@
import urllib.parse import urllib.parse
import urllib.request import urllib.request
from pathlib import PurePath from pathlib import PurePath
from typing import List, Optional from typing import Dict, List, Optional
import llnl.url import llnl.url
import llnl.util import llnl.util
@ -111,10 +111,18 @@ def __init__(self, **kwargs):
self.package = None self.package = None
def spec_attrs(self): def source_provenance(self) -> Dict:
"""Create a dictionary of attributes that describe this fetch strategy for a Spec. """Create a metadata dictionary that describes the artifacts fetched by this FetchStrategy.
The returned dictionary is added to the content used to determine the full hash
for a package. It should be serializable as JSON.
It should include data like sha256 hashes for archives, commits for source
repositories, and any information needed to describe exactly what artifacts went
into a build.
If a package has no soruce artifacts, it should return an empty dictionary.
This is included in the serialized Spec format to store provenance (like hashes).
""" """
attrs = syaml.syaml_dict() attrs = syaml.syaml_dict()
if self.url_attr: if self.url_attr:
@ -167,17 +175,6 @@ def cachable(self):
bool: True if can cache, False otherwise. bool: True if can cache, False otherwise.
""" """
def source_id(self):
"""A unique ID for the source.
It is intended that a human could easily generate this themselves using
the information available to them in the Spack package.
The returned value is added to the content which determines the full
hash for a package using `str()`.
"""
raise NotImplementedError
def mirror_id(self): def mirror_id(self):
"""This is a unique ID for a source that is intended to help identify """This is a unique ID for a source that is intended to help identify
reuse of resources across packages. reuse of resources across packages.
@ -228,9 +225,9 @@ def cachable(self):
"""Report False as there is no code to cache.""" """Report False as there is no code to cache."""
return False return False
def source_id(self): def source_provenance(self) -> Dict:
"""BundlePackages don't have a source id.""" """BundlePackages don't have a source of their own."""
return "" return {}
def mirror_id(self): def mirror_id(self):
"""BundlePackages don't have a mirror id.""" """BundlePackages don't have a mirror id."""
@ -269,8 +266,14 @@ def __init__(self, *, url: str, checksum: Optional[str] = None, **kwargs) -> Non
self.extension: Optional[str] = kwargs.get("extension", None) self.extension: Optional[str] = kwargs.get("extension", None)
self._effective_url: Optional[str] = None self._effective_url: Optional[str] = None
def spec_attrs(self): @property
attrs = super().spec_attrs() def curl(self) -> Executable:
if not self._curl:
self._curl = web_util.require_curl()
return self._curl
def source_provenance(self) -> Dict:
attrs = super().source_provenance()
if self.digest: if self.digest:
try: try:
hash_type = spack.util.crypto.hash_algo_for_digest(self.digest) hash_type = spack.util.crypto.hash_algo_for_digest(self.digest)
@ -279,15 +282,6 @@ def spec_attrs(self):
attrs[hash_type] = self.digest attrs[hash_type] = self.digest
return attrs return attrs
@property
def curl(self) -> Executable:
if not self._curl:
self._curl = web_util.require_curl()
return self._curl
def source_id(self):
return self.digest
def mirror_id(self): def mirror_id(self):
if not self.digest: if not self.digest:
return None return None
@ -759,16 +753,6 @@ def __init__(self, **kwargs):
self.get_full_repo = kwargs.get("get_full_repo", False) self.get_full_repo = kwargs.get("get_full_repo", False)
self.git_sparse_paths = kwargs.get("git_sparse_paths", None) self.git_sparse_paths = kwargs.get("git_sparse_paths", None)
def spec_attrs(self):
attrs = super().spec_attrs()
# need to fully resolve submodule callbacks for node dicts
submodules = attrs.get("submodules", None)
if submodules and callable(submodules):
attrs["submodules"] = submodules(self.package)
return attrs
@property @property
def git_version(self): def git_version(self):
return GitFetchStrategy.version_from_git(self.git) return GitFetchStrategy.version_from_git(self.git)
@ -807,9 +791,15 @@ def git(self):
def cachable(self): def cachable(self):
return self.cache_enabled and bool(self.commit) return self.cache_enabled and bool(self.commit)
def source_id(self): def source_provenance(self) -> Dict:
# TODO: tree-hash would secure download cache and mirrors, commit only secures checkouts. attrs = super().source_provenance()
return self.commit
# need to fully resolve submodule callbacks for node dicts
submodules = attrs.get("submodules", None)
if submodules and callable(submodules):
attrs["submodules"] = submodules(self.package)
return attrs
def mirror_id(self): def mirror_id(self):
if self.commit: if self.commit:
@ -1119,17 +1109,6 @@ def cvs(self):
def cachable(self): def cachable(self):
return self.cache_enabled and (bool(self.branch) or bool(self.date)) return self.cache_enabled and (bool(self.branch) or bool(self.date))
def source_id(self):
if not (self.branch or self.date):
# We need a branch or a date to make a checkout reproducible
return None
id = "id"
if self.branch:
id += "-branch=" + self.branch
if self.date:
id += "-date=" + self.date
return id
def mirror_id(self): def mirror_id(self):
if not (self.branch or self.date): if not (self.branch or self.date):
# We need a branch or a date to make a checkout reproducible # We need a branch or a date to make a checkout reproducible
@ -1232,9 +1211,6 @@ def svn(self):
def cachable(self): def cachable(self):
return self.cache_enabled and bool(self.revision) return self.cache_enabled and bool(self.revision)
def source_id(self):
return self.revision
def mirror_id(self): def mirror_id(self):
if self.revision: if self.revision:
repo_path = urllib.parse.urlparse(self.url).path repo_path = urllib.parse.urlparse(self.url).path
@ -1342,9 +1318,6 @@ def hg(self):
def cachable(self): def cachable(self):
return self.cache_enabled and bool(self.revision) return self.cache_enabled and bool(self.revision)
def source_id(self):
return self.revision
def mirror_id(self): def mirror_id(self):
if self.revision: if self.revision:
repo_path = urllib.parse.urlparse(self.url).path repo_path = urllib.parse.urlparse(self.url).path

View File

@ -1788,7 +1788,9 @@ def artifact_hashes(self, content=None):
sources = [] sources = []
try: try:
fetcher = fs.for_package_version(self) fetcher = fs.for_package_version(self)
sources.append(fetcher.spec_attrs()) provenance_dict = fetcher.source_provenance()
if provenance_dict:
sources.append(provenance_dict)
except (fs.ExtrapolationError, fs.InvalidArgsError): except (fs.ExtrapolationError, fs.InvalidArgsError):
# ExtrapolationError happens if the package has no fetchers defined. # ExtrapolationError happens if the package has no fetchers defined.