repo: refactor indexes to be more extensible
- virtual provider cache and tags were previously generated by nearly identical but separate methods. - factor out an Indexer interface for updating repository caches, and provide implementations for each type of index (TagIndex, ProviderIndex) so that more can be added if needed. - Among other things, this allows all indexes to be updated at once. This is an advantage because loading package files is the real overhead, and building the indexes once the packages are loaded is trivial. We avoid extra bulk read-ins by generating all package indexes at once. - This can be extended for dependents (reverse dependencies) and patches later.
This commit is contained in:
parent
527ff860f0
commit
c1d7adaaac
@ -3,6 +3,7 @@
|
|||||||
#
|
#
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
|
import abc
|
||||||
import collections
|
import collections
|
||||||
import os
|
import os
|
||||||
import stat
|
import stat
|
||||||
@ -14,7 +15,7 @@
|
|||||||
import traceback
|
import traceback
|
||||||
import json
|
import json
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from six import string_types
|
from six import string_types, add_metaclass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
@ -230,111 +231,153 @@ def update_package(self, pkg_name):
|
|||||||
self._tag_dict[tag].append(package.name)
|
self._tag_dict[tag].append(package.name)
|
||||||
|
|
||||||
|
|
||||||
@llnl.util.lang.memoized
|
@add_metaclass(abc.ABCMeta)
|
||||||
def make_provider_index_cache(packages_path, namespace):
|
class Indexer(object):
|
||||||
"""Lazily updates the provider index cache associated with a repository,
|
"""Adaptor for indexes that need to be generated when repos are updated."""
|
||||||
if need be, then returns it. Caches results for later look-ups.
|
|
||||||
|
|
||||||
Args:
|
def create(self):
|
||||||
packages_path: path of the repository
|
self.index = self._create()
|
||||||
namespace: namespace of the repository
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _create(self):
|
||||||
|
"""Create an empty index and return it."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def read(self, stream):
|
||||||
|
"""Read this index from a provided file object."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def update(self, pkg_fullname):
|
||||||
|
"""Update the index in memory with information about a package."""
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def write(self, stream):
|
||||||
|
"""Write the index to a file object."""
|
||||||
|
|
||||||
|
|
||||||
|
class TagIndexer(Indexer):
|
||||||
|
"""Lifecycle methods for a TagIndex on a Repo."""
|
||||||
|
def _create(self):
|
||||||
|
return TagIndex()
|
||||||
|
|
||||||
|
def read(self, stream):
|
||||||
|
self.index = TagIndex.from_json(stream)
|
||||||
|
|
||||||
|
def update(self, pkg_fullname):
|
||||||
|
self.index.update_package(pkg_fullname)
|
||||||
|
|
||||||
|
def write(self, stream):
|
||||||
|
self.index.to_json(stream)
|
||||||
|
|
||||||
|
|
||||||
|
class ProviderIndexer(Indexer):
|
||||||
|
"""Lifecycle methods for virtual package providers."""
|
||||||
|
def _create(self):
|
||||||
|
return ProviderIndex()
|
||||||
|
|
||||||
|
def read(self, stream):
|
||||||
|
self.index = ProviderIndex.from_yaml(stream)
|
||||||
|
|
||||||
|
def update(self, pkg_fullname):
|
||||||
|
self.index.remove_provider(pkg_fullname)
|
||||||
|
self.index.update(pkg_fullname)
|
||||||
|
|
||||||
|
def write(self, stream):
|
||||||
|
self.index.to_yaml(stream)
|
||||||
|
|
||||||
|
|
||||||
|
class RepoIndex(object):
|
||||||
|
"""Container class that manages a set of Indexers for a Repo.
|
||||||
|
|
||||||
|
This class is responsible for checking packages in a repository for
|
||||||
|
updates (using ``FastPackageChecker``) and for regenerating indexes
|
||||||
|
when they're needed.
|
||||||
|
|
||||||
|
``Indexers`` shoudl be added to the ``RepoIndex`` using
|
||||||
|
``add_index(name, indexer)``, and they should support the interface
|
||||||
|
defined by ``Indexer``, so that the ``RepoIndex`` can read, generate,
|
||||||
|
and update stored indices.
|
||||||
|
|
||||||
|
Generated indexes are accessed by name via ``__getitem__()``.
|
||||||
|
|
||||||
Returns:
|
|
||||||
instance of ProviderIndex
|
|
||||||
"""
|
"""
|
||||||
# Map that goes from package names to stat info
|
def __init__(self, package_checker, namespace):
|
||||||
fast_package_checker = FastPackageChecker(packages_path)
|
self.checker = package_checker
|
||||||
|
self.packages_path = self.checker.packages_path
|
||||||
|
self.namespace = namespace
|
||||||
|
|
||||||
# Filename of the provider index cache
|
self.indexers = {}
|
||||||
cache_filename = 'providers/{0}-index.yaml'.format(namespace)
|
self.indexes = {}
|
||||||
|
|
||||||
# Compute which packages needs to be updated in the cache
|
def add_indexer(self, name, indexer):
|
||||||
misc_cache = spack.caches.misc_cache
|
"""Add an indexer to the repo index.
|
||||||
index_mtime = misc_cache.mtime(cache_filename)
|
|
||||||
|
|
||||||
needs_update = [
|
Arguments:
|
||||||
x for x, sinfo in fast_package_checker.items()
|
name (str): name of this indexer
|
||||||
if sinfo.st_mtime > index_mtime
|
|
||||||
]
|
|
||||||
|
|
||||||
# Read the old ProviderIndex, or make a new one.
|
indexer (object): an object that supports create(), read(),
|
||||||
index_existed = misc_cache.init_entry(cache_filename)
|
write(), and get_index() operations
|
||||||
|
|
||||||
if index_existed and not needs_update:
|
"""
|
||||||
|
self.indexers[name] = indexer
|
||||||
|
|
||||||
# If the provider index exists and doesn't need an update
|
def __getitem__(self, name):
|
||||||
# just read from it
|
"""Get the index with the specified name, reindexing if needed."""
|
||||||
with misc_cache.read_transaction(cache_filename) as f:
|
indexer = self.indexers.get(name)
|
||||||
index = ProviderIndex.from_yaml(f)
|
if not indexer:
|
||||||
|
raise KeyError('no such index: %s' % name)
|
||||||
|
|
||||||
else:
|
if name not in self.indexes:
|
||||||
|
self._build_all_indexes()
|
||||||
|
|
||||||
# Otherwise we need a write transaction to update it
|
return self.indexes[name]
|
||||||
with misc_cache.write_transaction(cache_filename) as (old, new):
|
|
||||||
|
|
||||||
index = ProviderIndex.from_yaml(old) if old else ProviderIndex()
|
def _build_all_indexes(self):
|
||||||
|
"""Build all the indexes at once.
|
||||||
|
|
||||||
for pkg_name in needs_update:
|
We regenerate *all* indexes whenever *any* index needs an update,
|
||||||
namespaced_name = '{0}.{1}'.format(namespace, pkg_name)
|
because the main bottleneck here is loading all the packages. It
|
||||||
index.remove_provider(namespaced_name)
|
can take tens of seconds to regenerate sequentially, and we'd
|
||||||
index.update(namespaced_name)
|
rather only pay that cost once rather than on several
|
||||||
|
invocations.
|
||||||
|
|
||||||
index.to_yaml(new)
|
"""
|
||||||
|
for name, indexer in self.indexers.items():
|
||||||
|
self.indexes[name] = self._build_index(name, indexer)
|
||||||
|
|
||||||
return index
|
def _build_index(self, name, indexer):
|
||||||
|
"""Determine which packages need an update, and update indexes."""
|
||||||
|
|
||||||
|
# Filename of the provider index cache (we assume they're all json)
|
||||||
|
cache_filename = '{0}/{1}-index.json'.format(name, self.namespace)
|
||||||
|
|
||||||
@llnl.util.lang.memoized
|
# Compute which packages needs to be updated in the cache
|
||||||
def make_tag_index_cache(packages_path, namespace):
|
misc_cache = spack.caches.misc_cache
|
||||||
"""Lazily updates the tag index cache associated with a repository,
|
index_mtime = misc_cache.mtime(cache_filename)
|
||||||
if need be, then returns it. Caches results for later look-ups.
|
|
||||||
|
|
||||||
Args:
|
needs_update = [
|
||||||
packages_path: path of the repository
|
x for x, sinfo in self.checker.items()
|
||||||
namespace: namespace of the repository
|
if sinfo.st_mtime > index_mtime
|
||||||
|
]
|
||||||
|
|
||||||
Returns:
|
index_existed = misc_cache.init_entry(cache_filename)
|
||||||
instance of TagIndex
|
if index_existed and not needs_update:
|
||||||
"""
|
# If the index exists and doesn't need an update, read it
|
||||||
# Map that goes from package names to stat info
|
with misc_cache.read_transaction(cache_filename) as f:
|
||||||
fast_package_checker = FastPackageChecker(packages_path)
|
indexer.read(f)
|
||||||
|
|
||||||
# Filename of the provider index cache
|
else:
|
||||||
cache_filename = 'tags/{0}-index.json'.format(namespace)
|
# Otherwise update it and rewrite the cache file
|
||||||
|
with misc_cache.write_transaction(cache_filename) as (old, new):
|
||||||
|
indexer.read(old) if old else indexer.create()
|
||||||
|
|
||||||
# Compute which packages needs to be updated in the cache
|
for pkg_name in needs_update:
|
||||||
misc_cache = spack.caches.misc_cache
|
namespaced_name = '%s.%s' % (self.namespace, pkg_name)
|
||||||
index_mtime = misc_cache.mtime(cache_filename)
|
indexer.update(namespaced_name)
|
||||||
|
|
||||||
needs_update = [
|
indexer.write(new)
|
||||||
x for x, sinfo in fast_package_checker.items()
|
|
||||||
if sinfo.st_mtime > index_mtime
|
|
||||||
]
|
|
||||||
|
|
||||||
# Read the old ProviderIndex, or make a new one.
|
return indexer.index
|
||||||
index_existed = misc_cache.init_entry(cache_filename)
|
|
||||||
|
|
||||||
if index_existed and not needs_update:
|
|
||||||
|
|
||||||
# If the provider index exists and doesn't need an update
|
|
||||||
# just read from it
|
|
||||||
with misc_cache.read_transaction(cache_filename) as f:
|
|
||||||
index = TagIndex.from_json(f)
|
|
||||||
|
|
||||||
else:
|
|
||||||
|
|
||||||
# Otherwise we need a write transaction to update it
|
|
||||||
with misc_cache.write_transaction(cache_filename) as (old, new):
|
|
||||||
|
|
||||||
index = TagIndex.from_json(old) if old else TagIndex()
|
|
||||||
|
|
||||||
for pkg_name in needs_update:
|
|
||||||
namespaced_name = '{0}.{1}'.format(namespace, pkg_name)
|
|
||||||
index.update_package(namespaced_name)
|
|
||||||
|
|
||||||
index.to_json(new)
|
|
||||||
|
|
||||||
return index
|
|
||||||
|
|
||||||
|
|
||||||
class RepoPath(object):
|
class RepoPath(object):
|
||||||
@ -658,11 +701,8 @@ def check(condition, msg):
|
|||||||
# Maps that goes from package name to corresponding file stat
|
# Maps that goes from package name to corresponding file stat
|
||||||
self._fast_package_checker = None
|
self._fast_package_checker = None
|
||||||
|
|
||||||
# Index of virtual dependencies, computed lazily
|
# Indexes for this repository, computed lazily
|
||||||
self._provider_index = None
|
self._repo_index = None
|
||||||
|
|
||||||
# Index of tags, computed lazily
|
|
||||||
self._tag_index = None
|
|
||||||
|
|
||||||
# make sure the namespace for packages in this repo exists.
|
# make sure the namespace for packages in this repo exists.
|
||||||
self._create_namespace()
|
self._create_namespace()
|
||||||
@ -847,27 +887,24 @@ def purge(self):
|
|||||||
"""Clear entire package instance cache."""
|
"""Clear entire package instance cache."""
|
||||||
self._instances.clear()
|
self._instances.clear()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def index(self):
|
||||||
|
"""Construct the index for this repo lazily."""
|
||||||
|
if self._repo_index is None:
|
||||||
|
self._repo_index = RepoIndex(self._pkg_checker, self.namespace)
|
||||||
|
self._repo_index.add_indexer('providers', ProviderIndexer())
|
||||||
|
self._repo_index.add_indexer('tags', TagIndexer())
|
||||||
|
return self._repo_index
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def provider_index(self):
|
def provider_index(self):
|
||||||
"""A provider index with names *specific* to this repo."""
|
"""A provider index with names *specific* to this repo."""
|
||||||
|
return self.index['providers']
|
||||||
if self._provider_index is None:
|
|
||||||
self._provider_index = make_provider_index_cache(
|
|
||||||
self.packages_path, self.namespace
|
|
||||||
)
|
|
||||||
|
|
||||||
return self._provider_index
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tag_index(self):
|
def tag_index(self):
|
||||||
"""A provider index with names *specific* to this repo."""
|
"""Index of tags and which packages they're defined on."""
|
||||||
|
return self.index['tags']
|
||||||
if self._tag_index is None:
|
|
||||||
self._tag_index = make_tag_index_cache(
|
|
||||||
self.packages_path, self.namespace
|
|
||||||
)
|
|
||||||
|
|
||||||
return self._tag_index
|
|
||||||
|
|
||||||
@_autospec
|
@_autospec
|
||||||
def providers_for(self, vpkg_spec):
|
def providers_for(self, vpkg_spec):
|
||||||
|
Loading…
Reference in New Issue
Block a user