source_id -> source_provenance

Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>
This commit is contained in:
Todd Gamblin 2024-10-28 01:16:16 -07:00
parent bb77a7733c
commit eda1a6f60f
No known key found for this signature in database
GPG Key ID: C16729F1AACF66C6
2 changed files with 35 additions and 60 deletions

View File

@ -33,7 +33,7 @@
import urllib.parse
import urllib.request
from pathlib import PurePath
from typing import List, Optional
from typing import Dict, List, Optional
import llnl.url
import llnl.util
@ -111,10 +111,18 @@ def __init__(self, **kwargs):
self.package = None
def spec_attrs(self):
"""Create a dictionary of attributes that describe this fetch strategy for a Spec.
def source_provenance(self) -> Dict:
"""Create a metadata dictionary that describes the artifacts fetched by this FetchStrategy.
The returned dictionary is added to the content used to determine the full hash
for a package. It should be serializable as JSON.
It should include data like sha256 hashes for archives, commits for source
repositories, and any information needed to describe exactly what artifacts went
into a build.
If a package has no soruce artifacts, it should return an empty dictionary.
This is included in the serialized Spec format to store provenance (like hashes).
"""
attrs = syaml.syaml_dict()
if self.url_attr:
@ -167,17 +175,6 @@ def cachable(self):
bool: True if can cache, False otherwise.
"""
def source_id(self):
"""A unique ID for the source.
It is intended that a human could easily generate this themselves using
the information available to them in the Spack package.
The returned value is added to the content which determines the full
hash for a package using `str()`.
"""
raise NotImplementedError
def mirror_id(self):
"""This is a unique ID for a source that is intended to help identify
reuse of resources across packages.
@ -228,9 +225,9 @@ def cachable(self):
"""Report False as there is no code to cache."""
return False
def source_id(self):
"""BundlePackages don't have a source id."""
return ""
def source_provenance(self) -> Dict:
"""BundlePackages don't have a source of their own."""
return {}
def mirror_id(self):
"""BundlePackages don't have a mirror id."""
@ -269,8 +266,14 @@ def __init__(self, *, url: str, checksum: Optional[str] = None, **kwargs) -> Non
self.extension: Optional[str] = kwargs.get("extension", None)
self._effective_url: Optional[str] = None
def spec_attrs(self):
attrs = super().spec_attrs()
@property
def curl(self) -> Executable:
if not self._curl:
self._curl = web_util.require_curl()
return self._curl
def source_provenance(self) -> Dict:
attrs = super().source_provenance()
if self.digest:
try:
hash_type = spack.util.crypto.hash_algo_for_digest(self.digest)
@ -279,15 +282,6 @@ def spec_attrs(self):
attrs[hash_type] = self.digest
return attrs
@property
def curl(self) -> Executable:
if not self._curl:
self._curl = web_util.require_curl()
return self._curl
def source_id(self):
return self.digest
def mirror_id(self):
if not self.digest:
return None
@ -759,16 +753,6 @@ def __init__(self, **kwargs):
self.get_full_repo = kwargs.get("get_full_repo", False)
self.git_sparse_paths = kwargs.get("git_sparse_paths", None)
def spec_attrs(self):
attrs = super().spec_attrs()
# need to fully resolve submodule callbacks for node dicts
submodules = attrs.get("submodules", None)
if submodules and callable(submodules):
attrs["submodules"] = submodules(self.package)
return attrs
@property
def git_version(self):
return GitFetchStrategy.version_from_git(self.git)
@ -807,9 +791,15 @@ def git(self):
def cachable(self):
return self.cache_enabled and bool(self.commit)
def source_id(self):
# TODO: tree-hash would secure download cache and mirrors, commit only secures checkouts.
return self.commit
def source_provenance(self) -> Dict:
attrs = super().source_provenance()
# need to fully resolve submodule callbacks for node dicts
submodules = attrs.get("submodules", None)
if submodules and callable(submodules):
attrs["submodules"] = submodules(self.package)
return attrs
def mirror_id(self):
if self.commit:
@ -1119,17 +1109,6 @@ def cvs(self):
def cachable(self):
return self.cache_enabled and (bool(self.branch) or bool(self.date))
def source_id(self):
if not (self.branch or self.date):
# We need a branch or a date to make a checkout reproducible
return None
id = "id"
if self.branch:
id += "-branch=" + self.branch
if self.date:
id += "-date=" + self.date
return id
def mirror_id(self):
if not (self.branch or self.date):
# We need a branch or a date to make a checkout reproducible
@ -1232,9 +1211,6 @@ def svn(self):
def cachable(self):
return self.cache_enabled and bool(self.revision)
def source_id(self):
return self.revision
def mirror_id(self):
if self.revision:
repo_path = urllib.parse.urlparse(self.url).path
@ -1342,9 +1318,6 @@ def hg(self):
def cachable(self):
return self.cache_enabled and bool(self.revision)
def source_id(self):
return self.revision
def mirror_id(self):
if self.revision:
repo_path = urllib.parse.urlparse(self.url).path

View File

@ -1788,7 +1788,9 @@ def artifact_hashes(self, content=None):
sources = []
try:
fetcher = fs.for_package_version(self)
sources.append(fetcher.spec_attrs())
provenance_dict = fetcher.source_provenance()
if provenance_dict:
sources.append(provenance_dict)
except (fs.ExtrapolationError, fs.InvalidArgsError):
# ExtrapolationError happens if the package has no fetchers defined.