specs: use lazy lexicographic comparison instead of key_ordering
We have been using the `@llnl.util.lang.key_ordering` decorator for specs and most of their components. This leverages the fact that in Python, tuple comparison is lexicographic. It allows you to implement a `_cmp_key` method on your class, and have `__eq__`, `__lt__`, etc. implemented automatically using that key. For example, you might use tuple keys to implement comparison, e.g.: ```python class Widget: # author implements this def _cmp_key(self): return ( self.a, self.b, (self.c, self.d), self.e ) # operators are generated by @key_ordering def __eq__(self, other): return self._cmp_key() == other._cmp_key() def __lt__(self): return self._cmp_key() < other._cmp_key() # etc. ``` The issue there for simple comparators is that we have to bulid the tuples *and* we have to generate all the values in them up front. When implementing comparisons for large data structures, this can be costly. This PR replaces `@key_ordering` with a new decorator, `@lazy_lexicographic_ordering`. Lazy lexicographic comparison maps the tuple comparison shown above to generator functions. Instead of comparing based on pre-constructed tuple keys, users of this decorator can compare using elements from a generator. So, you'd write: ```python @lazy_lexicographic_ordering class Widget: def _cmp_iter(self): yield a yield b def cd_fun(): yield c yield d yield cd_fun yield e # operators are added by decorator (but are a bit more complex) There are no tuples that have to be pre-constructed, and the generator does not have to complete. Instead of tuples, we simply make functions that lazily yield what would've been in the tuple. If a yielded value is a `callable`, the comparison functions will call it and recursively compar it. The comparator just walks the data structure like you'd expect it to. The ``@lazy_lexicographic_ordering`` decorator handles the details of implementing comparison operators, and the ``Widget`` implementor only has to worry about writing ``_cmp_iter``, and making sure the elements in it are also comparable. Using this PR shaves another 1.5 sec off the runtime of `spack buildcache list`, and it also speeds up Spec comparison by about 30%. The runtime improvement comes mostly from *not* calling `hash()` `_cmp_iter()`.
This commit is contained in:

committed by
Greg Becker

parent
fd12cba18b
commit
01a6adb5f7
@@ -14,6 +14,11 @@
|
||||
from six import string_types
|
||||
import sys
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
from itertools import izip_longest # novm
|
||||
zip_longest = izip_longest
|
||||
else:
|
||||
from itertools import zip_longest # novm
|
||||
|
||||
if sys.version_info >= (3, 3):
|
||||
from collections.abc import Hashable, MutableMapping # novm
|
||||
@@ -227,48 +232,222 @@ def list_modules(directory, **kwargs):
|
||||
yield re.sub('.py$', '', name)
|
||||
|
||||
|
||||
def key_ordering(cls):
|
||||
"""Decorates a class with extra methods that implement rich comparison
|
||||
operations and ``__hash__``. The decorator assumes that the class
|
||||
implements a function called ``_cmp_key()``. The rich comparison
|
||||
operations will compare objects using this key, and the ``__hash__``
|
||||
function will return the hash of this key.
|
||||
def decorator_with_or_without_args(decorator):
|
||||
"""Allows a decorator to be used with or without arguments, e.g.::
|
||||
|
||||
If a class already has ``__eq__``, ``__ne__``, ``__lt__``, ``__le__``,
|
||||
``__gt__``, or ``__ge__`` defined, this decorator will overwrite them.
|
||||
# Calls the decorator function some args
|
||||
@decorator(with, arguments, and=kwargs)
|
||||
|
||||
or::
|
||||
|
||||
# Calls the decorator function with zero arguments
|
||||
@decorator
|
||||
|
||||
Raises:
|
||||
TypeError: If the class does not have a ``_cmp_key`` method
|
||||
"""
|
||||
def setter(name, value):
|
||||
value.__name__ = name
|
||||
setattr(cls, name, value)
|
||||
# See https://stackoverflow.com/questions/653368 for more on this
|
||||
@functools.wraps(decorator)
|
||||
def new_dec(*args, **kwargs):
|
||||
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
|
||||
# actual decorated function
|
||||
return decorator(args[0])
|
||||
else:
|
||||
# decorator arguments
|
||||
return lambda realf: decorator(realf, *args, **kwargs)
|
||||
|
||||
if not has_method(cls, '_cmp_key'):
|
||||
raise TypeError("'%s' doesn't define _cmp_key()." % cls.__name__)
|
||||
return new_dec
|
||||
|
||||
setter('__eq__',
|
||||
lambda s, o:
|
||||
(s is o) or (o is not None and s._cmp_key() == o._cmp_key()))
|
||||
setter('__lt__',
|
||||
lambda s, o: o is not None and s._cmp_key() < o._cmp_key())
|
||||
setter('__le__',
|
||||
lambda s, o: o is not None and s._cmp_key() <= o._cmp_key())
|
||||
|
||||
setter('__ne__',
|
||||
lambda s, o:
|
||||
(s is not o) and (o is None or s._cmp_key() != o._cmp_key()))
|
||||
setter('__gt__',
|
||||
lambda s, o: o is None or s._cmp_key() > o._cmp_key())
|
||||
setter('__ge__',
|
||||
lambda s, o: o is None or s._cmp_key() >= o._cmp_key())
|
||||
#: sentinel for testing that iterators are done in lazy_lexicographic_ordering
|
||||
done = object()
|
||||
|
||||
setter('__hash__', lambda self: hash(self._cmp_key()))
|
||||
|
||||
def tuplify(seq):
|
||||
"""Helper for lazy_lexicographic_ordering()."""
|
||||
return tuple((tuplify(x) if callable(x) else x) for x in seq())
|
||||
|
||||
|
||||
def lazy_eq(lseq, rseq):
|
||||
"""Equality comparison for two lazily generated sequences.
|
||||
|
||||
See ``lazy_lexicographic_ordering``.
|
||||
"""
|
||||
liter = lseq() # call generators
|
||||
riter = rseq()
|
||||
|
||||
# zip_longest is implemented in native code, so use it for speed.
|
||||
# use zip_longest instead of zip because it allows us to tell
|
||||
# which iterator was longer.
|
||||
for left, right in zip_longest(liter, riter, fillvalue=done):
|
||||
if (left is done) or (right is done):
|
||||
return False
|
||||
|
||||
# recursively enumerate any generators, otherwise compare
|
||||
equal = lazy_eq(left, right) if callable(left) else left == right
|
||||
if not equal:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def lazy_lt(lseq, rseq):
|
||||
"""Less-than comparison for two lazily generated sequences.
|
||||
|
||||
See ``lazy_lexicographic_ordering``.
|
||||
"""
|
||||
liter = lseq()
|
||||
riter = rseq()
|
||||
|
||||
for left, right in zip_longest(liter, riter, fillvalue=done):
|
||||
if (left is done) or (right is done):
|
||||
return left is done # left was shorter than right
|
||||
|
||||
sequence = callable(left)
|
||||
equal = lazy_eq(left, right) if sequence else left == right
|
||||
if equal:
|
||||
continue
|
||||
|
||||
if sequence:
|
||||
return lazy_lt(left, right)
|
||||
if left is None:
|
||||
return True
|
||||
if right is None:
|
||||
return False
|
||||
|
||||
return left < right
|
||||
|
||||
return False # if equal, return False
|
||||
|
||||
|
||||
@decorator_with_or_without_args
|
||||
def lazy_lexicographic_ordering(cls, set_hash=True):
|
||||
"""Decorates a class with extra methods that implement rich comparison.
|
||||
|
||||
This is a lazy version of the tuple comparison used frequently to
|
||||
implement comparison in Python. Given some objects with fields, you
|
||||
might use tuple keys to implement comparison, e.g.::
|
||||
|
||||
class Widget:
|
||||
def _cmp_key(self):
|
||||
return (
|
||||
self.a,
|
||||
self.b,
|
||||
(self.c, self.d),
|
||||
self.e
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._cmp_key() == other._cmp_key()
|
||||
|
||||
def __lt__(self):
|
||||
return self._cmp_key() < other._cmp_key()
|
||||
|
||||
# etc.
|
||||
|
||||
Python would compare ``Widgets`` lexicographically based on their
|
||||
tuples. The issue there for simple comparators is that we have to
|
||||
bulid the tuples *and* we have to generate all the values in them up
|
||||
front. When implementing comparisons for large data structures, this
|
||||
can be costly.
|
||||
|
||||
Lazy lexicographic comparison maps the tuple comparison shown above
|
||||
to generator functions. Instead of comparing based on pre-constructed
|
||||
tuple keys, users of this decorator can compare using elements from a
|
||||
generator. So, you'd write::
|
||||
|
||||
@lazy_lexicographic_ordering
|
||||
class Widget:
|
||||
def _cmp_iter(self):
|
||||
yield a
|
||||
yield b
|
||||
def cd_fun():
|
||||
yield c
|
||||
yield d
|
||||
yield cd_fun
|
||||
yield e
|
||||
|
||||
# operators are added by decorator
|
||||
|
||||
There are no tuples preconstructed, and the generator does not have
|
||||
to complete. Instead of tuples, we simply make functions that lazily
|
||||
yield what would've been in the tuple. The
|
||||
``@lazy_lexicographic_ordering`` decorator handles the details of
|
||||
implementing comparison operators, and the ``Widget`` implementor
|
||||
only has to worry about writing ``_cmp_iter``, and making sure the
|
||||
elements in it are also comparable.
|
||||
|
||||
Some things to note:
|
||||
|
||||
* If a class already has ``__eq__``, ``__ne__``, ``__lt__``,
|
||||
``__le__``, ``__gt__``, ``__ge__``, or ``__hash__`` defined, this
|
||||
decorator will overwrite them.
|
||||
|
||||
* If ``set_hash`` is ``False``, this will not overwrite
|
||||
``__hash__``.
|
||||
|
||||
* This class uses Python 2 None-comparison semantics. If you yield
|
||||
None and it is compared to a non-None type, None will always be
|
||||
less than the other object.
|
||||
|
||||
Raises:
|
||||
TypeError: If the class does not have a ``_cmp_iter`` method
|
||||
|
||||
"""
|
||||
if not has_method(cls, "_cmp_iter"):
|
||||
raise TypeError("'%s' doesn't define _cmp_iter()." % cls.__name__)
|
||||
|
||||
# comparison operators are implemented in terms of lazy_eq and lazy_lt
|
||||
def eq(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return (other is not None) and lazy_eq(self._cmp_iter, other._cmp_iter)
|
||||
|
||||
def lt(self, other):
|
||||
if self is other:
|
||||
return False
|
||||
return (other is not None) and lazy_lt(self._cmp_iter, other._cmp_iter)
|
||||
|
||||
def ne(self, other):
|
||||
if self is other:
|
||||
return False
|
||||
return (other is None) or not lazy_eq(self._cmp_iter, other._cmp_iter)
|
||||
|
||||
def gt(self, other):
|
||||
if self is other:
|
||||
return False
|
||||
return (other is None) or lazy_lt(other._cmp_iter, self._cmp_iter)
|
||||
|
||||
def le(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return (other is not None) and not lazy_lt(other._cmp_iter,
|
||||
self._cmp_iter)
|
||||
|
||||
def ge(self, other):
|
||||
if self is other:
|
||||
return True
|
||||
return (other is None) or not lazy_lt(self._cmp_iter, other._cmp_iter)
|
||||
|
||||
def h(self):
|
||||
return hash(tuplify(self._cmp_iter))
|
||||
|
||||
def add_func_to_class(name, func):
|
||||
"""Add a function to a class with a particular name."""
|
||||
func.__name__ = name
|
||||
setattr(cls, name, func)
|
||||
|
||||
add_func_to_class("__eq__", eq)
|
||||
add_func_to_class("__ne__", ne)
|
||||
add_func_to_class("__lt__", lt)
|
||||
add_func_to_class("__le__", le)
|
||||
add_func_to_class("__gt__", gt)
|
||||
add_func_to_class("__ge__", ge)
|
||||
if set_hash:
|
||||
add_func_to_class("__hash__", h)
|
||||
|
||||
return cls
|
||||
|
||||
|
||||
@key_ordering
|
||||
@lazy_lexicographic_ordering
|
||||
class HashableMap(MutableMapping):
|
||||
"""This is a hashable, comparable dictionary. Hash is performed on
|
||||
a tuple of the values in the dictionary."""
|
||||
@@ -291,8 +470,9 @@ def __len__(self):
|
||||
def __delitem__(self, key):
|
||||
del self.dict[key]
|
||||
|
||||
def _cmp_key(self):
|
||||
return tuple(sorted(self.values()))
|
||||
def _cmp_iter(self):
|
||||
for _, v in sorted(self.items()):
|
||||
yield v
|
||||
|
||||
def copy(self):
|
||||
"""Type-agnostic clone method. Preserves subclass type."""
|
||||
|
Reference in New Issue
Block a user