llnl.util.filesystem: multiple entrypoints and max_depth (#47495)
If a package `foo` doesn't implement `libs`, the default was to search recursively for `libfoo` whenever asking for `spec[foo].libs` (this also happens automatically if a package includes `foo` as a link dependency). This can lead to some strange behavior: 1. A package that is normally used as a build dependency (e.g. `cmake` at one point) is referenced like `depends_on(cmake)` which leads to a fully-recursive search for `libcmake` (this can take "forever" when CMake is registered as an external with a prefix like `/usr`, particularly on NFS mounts). 2. A similar hang can occur if a package is registered as an external with an incorrect prefix - [x] Update the default library search to stop after a maximum depth (by default, search the root prefix and each directory in it, but no lower). - [x] The following is a list of known changes to `find` compared to `develop`: 1. Matching directories are no longer returned -- `find` consistently only finds non-dirs, even at `max_depth` 2. Symlinked directories are followed (needed to support max_depth) 3. `find(..., "dir/*.txt")` is allowed, for finding files inside certain dirs. These "complex" patterns are delegated to `glob`, like they are on `develop`. 4. `root` and `files` arguments both support generic sequences, and `root` allows both `str` and `path` types. This allows us to specify multiple entry points to `find`. --------- Co-authored-by: Peter Scheibel <scheibel1@llnl.gov>
This commit is contained in:
		@@ -20,11 +20,23 @@
 | 
			
		||||
import tempfile
 | 
			
		||||
from contextlib import contextmanager
 | 
			
		||||
from itertools import accumulate
 | 
			
		||||
from typing import Callable, Iterable, List, Match, Optional, Tuple, Union
 | 
			
		||||
from typing import (
 | 
			
		||||
    Callable,
 | 
			
		||||
    Deque,
 | 
			
		||||
    Dict,
 | 
			
		||||
    Iterable,
 | 
			
		||||
    List,
 | 
			
		||||
    Match,
 | 
			
		||||
    Optional,
 | 
			
		||||
    Sequence,
 | 
			
		||||
    Set,
 | 
			
		||||
    Tuple,
 | 
			
		||||
    Union,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
import llnl.util.symlink
 | 
			
		||||
from llnl.util import tty
 | 
			
		||||
from llnl.util.lang import dedupe, memoized
 | 
			
		||||
from llnl.util.lang import dedupe, fnmatch_translate_multiple, memoized
 | 
			
		||||
from llnl.util.symlink import islink, readlink, resolve_link_target_relative_to_the_link, symlink
 | 
			
		||||
 | 
			
		||||
from ..path import path_to_os_path, system_path_filter
 | 
			
		||||
@@ -85,6 +97,8 @@
 | 
			
		||||
    "visit_directory_tree",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
Path = Union[str, pathlib.Path]
 | 
			
		||||
 | 
			
		||||
if sys.version_info < (3, 7, 4):
 | 
			
		||||
    # monkeypatch shutil.copystat to fix PermissionError when copying read-only
 | 
			
		||||
    # files on Lustre when using Python < 3.7.4
 | 
			
		||||
@@ -1673,105 +1687,199 @@ def find_first(root: str, files: Union[Iterable[str], str], bfs_depth: int = 2)
 | 
			
		||||
    return FindFirstFile(root, *files, bfs_depth=bfs_depth).find()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def find(root, files, recursive=True):
 | 
			
		||||
    """Search for ``files`` starting from the ``root`` directory.
 | 
			
		||||
 | 
			
		||||
    Like GNU/BSD find but written entirely in Python.
 | 
			
		||||
 | 
			
		||||
    Examples:
 | 
			
		||||
 | 
			
		||||
    .. code-block:: console
 | 
			
		||||
 | 
			
		||||
       $ find /usr -name python
 | 
			
		||||
 | 
			
		||||
    is equivalent to:
 | 
			
		||||
 | 
			
		||||
    >>> find('/usr', 'python')
 | 
			
		||||
 | 
			
		||||
    .. code-block:: console
 | 
			
		||||
 | 
			
		||||
       $ find /usr/local/bin -maxdepth 1 -name python
 | 
			
		||||
 | 
			
		||||
    is equivalent to:
 | 
			
		||||
 | 
			
		||||
    >>> find('/usr/local/bin', 'python', recursive=False)
 | 
			
		||||
def find(
 | 
			
		||||
    root: Union[Path, Sequence[Path]],
 | 
			
		||||
    files: Union[str, Sequence[str]],
 | 
			
		||||
    recursive: bool = True,
 | 
			
		||||
    max_depth: Optional[int] = None,
 | 
			
		||||
) -> List[str]:
 | 
			
		||||
    """Finds all non-directory files matching the patterns from ``files`` starting from ``root``.
 | 
			
		||||
    This function returns a deterministic result for the same input and directory structure when
 | 
			
		||||
    run multiple times. Symlinked directories are followed, and unique directories are searched
 | 
			
		||||
    only once. Each matching file is returned only once at lowest depth in case multiple paths
 | 
			
		||||
    exist due to symlinked directories.
 | 
			
		||||
 | 
			
		||||
    Accepts any glob characters accepted by fnmatch:
 | 
			
		||||
 | 
			
		||||
    ==========  ====================================
 | 
			
		||||
    Pattern     Meaning
 | 
			
		||||
    ==========  ====================================
 | 
			
		||||
    ``*``       matches everything
 | 
			
		||||
    ``*``       matches one or more characters
 | 
			
		||||
    ``?``       matches any single character
 | 
			
		||||
    ``[seq]``   matches any character in ``seq``
 | 
			
		||||
    ``[!seq]``  matches any character not in ``seq``
 | 
			
		||||
    ==========  ====================================
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
        root (str): The root directory to start searching from
 | 
			
		||||
        files (str or collections.abc.Sequence): Library name(s) to search for
 | 
			
		||||
        recursive (bool): if False search only root folder,
 | 
			
		||||
            if True descends top-down from the root. Defaults to True.
 | 
			
		||||
    Examples:
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        list: The files that have been found
 | 
			
		||||
    >>> find("/usr", "*.txt", recursive=True, max_depth=2)
 | 
			
		||||
 | 
			
		||||
    finds all files with the extension ``.txt`` in the directory ``/usr`` and subdirectories up to
 | 
			
		||||
    depth 2.
 | 
			
		||||
 | 
			
		||||
    >>> find(["/usr", "/var"], ["*.txt", "*.log"], recursive=True)
 | 
			
		||||
 | 
			
		||||
    finds all files with the extension ``.txt`` or ``.log`` in the directories ``/usr`` and
 | 
			
		||||
    ``/var`` at any depth.
 | 
			
		||||
 | 
			
		||||
    >>> find("/usr", "GL/*.h", recursive=True)
 | 
			
		||||
 | 
			
		||||
    finds all header files in a directory GL at any depth in the directory ``/usr``.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
        root: One or more root directories to start searching from
 | 
			
		||||
        files: One or more filename patterns to search for
 | 
			
		||||
        recursive: if False search only root, if True descends from roots. Defaults to True.
 | 
			
		||||
        max_depth: if set, don't search below this depth. Cannot be set if recursive is False
 | 
			
		||||
 | 
			
		||||
    Returns a list of absolute, matching file paths.
 | 
			
		||||
    """
 | 
			
		||||
    if isinstance(root, (str, pathlib.Path)):
 | 
			
		||||
        root = [root]
 | 
			
		||||
    elif not isinstance(root, collections.abc.Sequence):
 | 
			
		||||
        raise TypeError(f"'root' arg must be a path or a sequence of paths, not '{type(root)}']")
 | 
			
		||||
 | 
			
		||||
    if isinstance(files, str):
 | 
			
		||||
        files = [files]
 | 
			
		||||
    elif not isinstance(files, collections.abc.Sequence):
 | 
			
		||||
        raise TypeError(f"'files' arg must be str or a sequence of str, not '{type(files)}']")
 | 
			
		||||
 | 
			
		||||
    if recursive:
 | 
			
		||||
        tty.debug(f"Find (recursive): {root} {str(files)}")
 | 
			
		||||
        result = _find_recursive(root, files)
 | 
			
		||||
    else:
 | 
			
		||||
        tty.debug(f"Find (not recursive): {root} {str(files)}")
 | 
			
		||||
        result = _find_non_recursive(root, files)
 | 
			
		||||
    # If recursive is false, max_depth can only be None or 0
 | 
			
		||||
    if max_depth and not recursive:
 | 
			
		||||
        raise ValueError(f"max_depth ({max_depth}) cannot be set if recursive is False")
 | 
			
		||||
 | 
			
		||||
    tty.debug(f"Find complete: {root} {str(files)}")
 | 
			
		||||
    tty.debug(f"Find (max depth = {max_depth}): {root} {files}")
 | 
			
		||||
    if not recursive:
 | 
			
		||||
        max_depth = 0
 | 
			
		||||
    elif max_depth is None:
 | 
			
		||||
        max_depth = sys.maxsize
 | 
			
		||||
    result = _find_max_depth(root, files, max_depth)
 | 
			
		||||
    tty.debug(f"Find complete: {root} {files}")
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@system_path_filter
 | 
			
		||||
def _find_recursive(root, search_files):
 | 
			
		||||
    # The variable here is **on purpose** a defaultdict. The idea is that
 | 
			
		||||
    # we want to poke the filesystem as little as possible, but still maintain
 | 
			
		||||
    # stability in the order of the answer. Thus we are recording each library
 | 
			
		||||
    # found in a key, and reconstructing the stable order later.
 | 
			
		||||
    found_files = collections.defaultdict(list)
 | 
			
		||||
 | 
			
		||||
    # Make the path absolute to have os.walk also return an absolute path
 | 
			
		||||
    root = os.path.abspath(root)
 | 
			
		||||
    for path, _, list_files in os.walk(root):
 | 
			
		||||
        for search_file in search_files:
 | 
			
		||||
            matches = glob.glob(os.path.join(path, search_file))
 | 
			
		||||
            matches = [os.path.join(path, x) for x in matches]
 | 
			
		||||
            found_files[search_file].extend(matches)
 | 
			
		||||
 | 
			
		||||
    answer = []
 | 
			
		||||
    for search_file in search_files:
 | 
			
		||||
        answer.extend(found_files[search_file])
 | 
			
		||||
 | 
			
		||||
    return answer
 | 
			
		||||
def _log_file_access_issue(e: OSError, path: str) -> None:
 | 
			
		||||
    errno_name = errno.errorcode.get(e.errno, "UNKNOWN")
 | 
			
		||||
    tty.debug(f"find must skip {path}: {errno_name} {e}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@system_path_filter
 | 
			
		||||
def _find_non_recursive(root, search_files):
 | 
			
		||||
    # The variable here is **on purpose** a defaultdict as os.list_dir
 | 
			
		||||
    # can return files in any order (does not preserve stability)
 | 
			
		||||
    found_files = collections.defaultdict(list)
 | 
			
		||||
def _file_id(s: os.stat_result) -> Tuple[int, int]:
 | 
			
		||||
    # Note: on windows, st_ino is the file index and st_dev is the volume serial number. See
 | 
			
		||||
    # https://github.com/python/cpython/blob/3.9/Python/fileutils.c
 | 
			
		||||
    return (s.st_ino, s.st_dev)
 | 
			
		||||
 | 
			
		||||
    # Make the path absolute to have absolute path returned
 | 
			
		||||
    root = os.path.abspath(root)
 | 
			
		||||
 | 
			
		||||
    for search_file in search_files:
 | 
			
		||||
        matches = glob.glob(os.path.join(root, search_file))
 | 
			
		||||
        matches = [os.path.join(root, x) for x in matches]
 | 
			
		||||
        found_files[search_file].extend(matches)
 | 
			
		||||
def _dedupe_files(paths: List[str]) -> List[str]:
 | 
			
		||||
    """Deduplicate files by inode and device, dropping files that cannot be accessed."""
 | 
			
		||||
    unique_files: List[str] = []
 | 
			
		||||
    # tuple of (inode, device) for each file without following symlinks
 | 
			
		||||
    visited: Set[Tuple[int, int]] = set()
 | 
			
		||||
    for path in paths:
 | 
			
		||||
        try:
 | 
			
		||||
            stat_info = os.lstat(path)
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            _log_file_access_issue(e, path)
 | 
			
		||||
            continue
 | 
			
		||||
        file_id = _file_id(stat_info)
 | 
			
		||||
        if file_id not in visited:
 | 
			
		||||
            unique_files.append(path)
 | 
			
		||||
            visited.add(file_id)
 | 
			
		||||
    return unique_files
 | 
			
		||||
 | 
			
		||||
    answer = []
 | 
			
		||||
    for search_file in search_files:
 | 
			
		||||
        answer.extend(found_files[search_file])
 | 
			
		||||
 | 
			
		||||
    return answer
 | 
			
		||||
def _find_max_depth(
 | 
			
		||||
    roots: Sequence[Path], globs: Sequence[str], max_depth: int = sys.maxsize
 | 
			
		||||
) -> List[str]:
 | 
			
		||||
    """See ``find`` for the public API."""
 | 
			
		||||
    # We optimize for the common case of simple filename only patterns: a single, combined regex
 | 
			
		||||
    # is used. For complex patterns that include path components, we use a slower glob call from
 | 
			
		||||
    # every directory we visit within max_depth.
 | 
			
		||||
    filename_only_patterns = {
 | 
			
		||||
        f"pattern_{i}": os.path.normcase(x) for i, x in enumerate(globs) if "/" not in x
 | 
			
		||||
    }
 | 
			
		||||
    complex_patterns = {f"pattern_{i}": x for i, x in enumerate(globs) if "/" in x}
 | 
			
		||||
    regex = re.compile(fnmatch_translate_multiple(filename_only_patterns))
 | 
			
		||||
    # Ordered dictionary that keeps track of what pattern found which files
 | 
			
		||||
    matched_paths: Dict[str, List[str]] = {f"pattern_{i}": [] for i, _ in enumerate(globs)}
 | 
			
		||||
    # Ensure returned paths are always absolute
 | 
			
		||||
    roots = [os.path.abspath(r) for r in roots]
 | 
			
		||||
    # Breadth-first search queue. Each element is a tuple of (depth, dir)
 | 
			
		||||
    dir_queue: Deque[Tuple[int, str]] = collections.deque()
 | 
			
		||||
    # Set of visited directories. Each element is a tuple of (inode, device)
 | 
			
		||||
    visited_dirs: Set[Tuple[int, int]] = set()
 | 
			
		||||
 | 
			
		||||
    for root in roots:
 | 
			
		||||
        try:
 | 
			
		||||
            stat_root = os.stat(root)
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            _log_file_access_issue(e, root)
 | 
			
		||||
            continue
 | 
			
		||||
        dir_id = _file_id(stat_root)
 | 
			
		||||
        if dir_id not in visited_dirs:
 | 
			
		||||
            dir_queue.appendleft((0, root))
 | 
			
		||||
            visited_dirs.add(dir_id)
 | 
			
		||||
 | 
			
		||||
    while dir_queue:
 | 
			
		||||
        depth, curr_dir = dir_queue.pop()
 | 
			
		||||
        try:
 | 
			
		||||
            dir_iter = os.scandir(curr_dir)
 | 
			
		||||
        except OSError as e:
 | 
			
		||||
            _log_file_access_issue(e, curr_dir)
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        # Use glob.glob for complex patterns.
 | 
			
		||||
        for pattern_name, pattern in complex_patterns.items():
 | 
			
		||||
            matched_paths[pattern_name].extend(
 | 
			
		||||
                path
 | 
			
		||||
                for path in glob.glob(os.path.join(curr_dir, pattern))
 | 
			
		||||
                if not os.path.isdir(path)
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        with dir_iter:
 | 
			
		||||
            ordered_entries = sorted(dir_iter, key=lambda x: x.name)
 | 
			
		||||
            for dir_entry in ordered_entries:
 | 
			
		||||
                try:
 | 
			
		||||
                    it_is_a_dir = dir_entry.is_dir(follow_symlinks=True)
 | 
			
		||||
                except OSError as e:
 | 
			
		||||
                    # Possible permission issue, or a symlink that cannot be resolved (ELOOP).
 | 
			
		||||
                    _log_file_access_issue(e, dir_entry.path)
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                if it_is_a_dir:
 | 
			
		||||
                    if depth >= max_depth:
 | 
			
		||||
                        continue
 | 
			
		||||
                    try:
 | 
			
		||||
                        # The stat should be performed in a try/except block. We repeat that here
 | 
			
		||||
                        # vs. moving to the above block because we only want to call `stat` if we
 | 
			
		||||
                        # haven't exceeded our max_depth
 | 
			
		||||
                        if sys.platform == "win32":
 | 
			
		||||
                            # Note: st_ino/st_dev on DirEntry.stat are not set on Windows, so we
 | 
			
		||||
                            # have to call os.stat
 | 
			
		||||
                            stat_info = os.stat(dir_entry.path, follow_symlinks=True)
 | 
			
		||||
                        else:
 | 
			
		||||
                            stat_info = dir_entry.stat(follow_symlinks=True)
 | 
			
		||||
                    except OSError as e:
 | 
			
		||||
                        _log_file_access_issue(e, dir_entry.path)
 | 
			
		||||
                        continue
 | 
			
		||||
 | 
			
		||||
                    dir_id = _file_id(stat_info)
 | 
			
		||||
                    if dir_id not in visited_dirs:
 | 
			
		||||
                        dir_queue.appendleft((depth + 1, dir_entry.path))
 | 
			
		||||
                        visited_dirs.add(dir_id)
 | 
			
		||||
                elif filename_only_patterns:
 | 
			
		||||
                    m = regex.match(os.path.normcase(dir_entry.name))
 | 
			
		||||
                    if not m:
 | 
			
		||||
                        continue
 | 
			
		||||
                    for pattern_name in filename_only_patterns:
 | 
			
		||||
                        if m.group(pattern_name):
 | 
			
		||||
                            matched_paths[pattern_name].append(dir_entry.path)
 | 
			
		||||
                            break
 | 
			
		||||
 | 
			
		||||
    all_matching_paths = [path for paths in matched_paths.values() for path in paths]
 | 
			
		||||
 | 
			
		||||
    # we only dedupe files if we have any complex patterns, since only they can match the same file
 | 
			
		||||
    # multiple times
 | 
			
		||||
    return _dedupe_files(all_matching_paths) if complex_patterns else all_matching_paths
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Utilities for libraries and headers
 | 
			
		||||
@@ -2210,7 +2318,9 @@ def find_system_libraries(libraries, shared=True):
 | 
			
		||||
    return libraries_found
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def find_libraries(libraries, root, shared=True, recursive=False, runtime=True):
 | 
			
		||||
def find_libraries(
 | 
			
		||||
    libraries, root, shared=True, recursive=False, runtime=True, max_depth: Optional[int] = None
 | 
			
		||||
):
 | 
			
		||||
    """Returns an iterable of full paths to libraries found in a root dir.
 | 
			
		||||
 | 
			
		||||
    Accepts any glob characters accepted by fnmatch:
 | 
			
		||||
@@ -2231,6 +2341,8 @@ def find_libraries(libraries, root, shared=True, recursive=False, runtime=True):
 | 
			
		||||
            otherwise for static. Defaults to True.
 | 
			
		||||
        recursive (bool): if False search only root folder,
 | 
			
		||||
            if True descends top-down from the root. Defaults to False.
 | 
			
		||||
        max_depth (int): if set, don't search below this depth. Cannot be set
 | 
			
		||||
            if recursive is False
 | 
			
		||||
        runtime (bool): Windows only option, no-op elsewhere. If true,
 | 
			
		||||
            search for runtime shared libs (.DLL), otherwise, search
 | 
			
		||||
            for .Lib files. If shared is false, this has no meaning.
 | 
			
		||||
@@ -2239,6 +2351,7 @@ def find_libraries(libraries, root, shared=True, recursive=False, runtime=True):
 | 
			
		||||
    Returns:
 | 
			
		||||
        LibraryList: The libraries that have been found
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    if isinstance(libraries, str):
 | 
			
		||||
        libraries = [libraries]
 | 
			
		||||
    elif not isinstance(libraries, collections.abc.Sequence):
 | 
			
		||||
@@ -2271,8 +2384,10 @@ def find_libraries(libraries, root, shared=True, recursive=False, runtime=True):
 | 
			
		||||
    libraries = ["{0}.{1}".format(lib, suffix) for lib in libraries for suffix in suffixes]
 | 
			
		||||
 | 
			
		||||
    if not recursive:
 | 
			
		||||
        if max_depth:
 | 
			
		||||
            raise ValueError(f"max_depth ({max_depth}) cannot be set if recursive is False")
 | 
			
		||||
        # If not recursive, look for the libraries directly in root
 | 
			
		||||
        return LibraryList(find(root, libraries, False))
 | 
			
		||||
        return LibraryList(find(root, libraries, recursive=False))
 | 
			
		||||
 | 
			
		||||
    # To speedup the search for external packages configured e.g. in /usr,
 | 
			
		||||
    # perform first non-recursive search in root/lib then in root/lib64 and
 | 
			
		||||
@@ -2290,7 +2405,7 @@ def find_libraries(libraries, root, shared=True, recursive=False, runtime=True):
 | 
			
		||||
        if found_libs:
 | 
			
		||||
            break
 | 
			
		||||
    else:
 | 
			
		||||
        found_libs = find(root, libraries, True)
 | 
			
		||||
        found_libs = find(root, libraries, recursive=True, max_depth=max_depth)
 | 
			
		||||
 | 
			
		||||
    return LibraryList(found_libs)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -5,15 +5,17 @@
 | 
			
		||||
 | 
			
		||||
import collections.abc
 | 
			
		||||
import contextlib
 | 
			
		||||
import fnmatch
 | 
			
		||||
import functools
 | 
			
		||||
import itertools
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import sys
 | 
			
		||||
import traceback
 | 
			
		||||
import typing
 | 
			
		||||
import warnings
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
from typing import Callable, Iterable, List, Tuple, TypeVar
 | 
			
		||||
from typing import Callable, Dict, Iterable, List, Tuple, TypeVar
 | 
			
		||||
 | 
			
		||||
# Ignore emacs backups when listing modules
 | 
			
		||||
ignore_modules = r"^\.#|~$"
 | 
			
		||||
@@ -859,6 +861,19 @@ def elide_list(line_list: List[str], max_num: int = 10) -> List[str]:
 | 
			
		||||
    return line_list
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if sys.version_info >= (3, 9):
 | 
			
		||||
    PatternStr = re.Pattern[str]
 | 
			
		||||
else:
 | 
			
		||||
    PatternStr = typing.Pattern[str]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def fnmatch_translate_multiple(named_patterns: Dict[str, str]) -> str:
 | 
			
		||||
    """Similar to ``fnmatch.translate``, but takes an ordered dictionary where keys are pattern
 | 
			
		||||
    names, and values are filename patterns. The output is a regex that matches any of the
 | 
			
		||||
    patterns in order, and named capture groups are used to identify which pattern matched."""
 | 
			
		||||
    return "|".join(f"(?P<{n}>{fnmatch.translate(p)})" for n, p in named_patterns.items())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@contextlib.contextmanager
 | 
			
		||||
def nullcontext(*args, **kwargs):
 | 
			
		||||
    """Empty context manager.
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@
 | 
			
		||||
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
from llnl.util.filesystem import HeaderList, LibraryList, find, find_headers, find_libraries
 | 
			
		||||
from llnl.util.filesystem import HeaderList, LibraryList, find_headers, find_libraries
 | 
			
		||||
 | 
			
		||||
import spack.paths
 | 
			
		||||
 | 
			
		||||
@@ -324,33 +324,3 @@ def test_searching_order(search_fn, search_list, root, kwargs):
 | 
			
		||||
 | 
			
		||||
    # List should be empty here
 | 
			
		||||
    assert len(rlist) == 0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "root,search_list,kwargs,expected",
 | 
			
		||||
    [
 | 
			
		||||
        (
 | 
			
		||||
            search_dir,
 | 
			
		||||
            "*/*bar.tx?",
 | 
			
		||||
            {"recursive": False},
 | 
			
		||||
            [
 | 
			
		||||
                os.path.join(search_dir, os.path.join("a", "foobar.txt")),
 | 
			
		||||
                os.path.join(search_dir, os.path.join("b", "bar.txp")),
 | 
			
		||||
                os.path.join(search_dir, os.path.join("c", "bar.txt")),
 | 
			
		||||
            ],
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            search_dir,
 | 
			
		||||
            "*/*bar.tx?",
 | 
			
		||||
            {"recursive": True},
 | 
			
		||||
            [
 | 
			
		||||
                os.path.join(search_dir, os.path.join("a", "foobar.txt")),
 | 
			
		||||
                os.path.join(search_dir, os.path.join("b", "bar.txp")),
 | 
			
		||||
                os.path.join(search_dir, os.path.join("c", "bar.txt")),
 | 
			
		||||
            ],
 | 
			
		||||
        ),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_find_with_globbing(root, search_list, kwargs, expected):
 | 
			
		||||
    matches = find(root, search_list, **kwargs)
 | 
			
		||||
    assert sorted(matches) == sorted(expected)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@
 | 
			
		||||
"""Tests for ``llnl/util/filesystem.py``"""
 | 
			
		||||
import filecmp
 | 
			
		||||
import os
 | 
			
		||||
import pathlib
 | 
			
		||||
import shutil
 | 
			
		||||
import stat
 | 
			
		||||
import sys
 | 
			
		||||
@@ -14,7 +15,8 @@
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
import llnl.util.filesystem as fs
 | 
			
		||||
from llnl.util.symlink import islink, readlink, symlink
 | 
			
		||||
import llnl.util.symlink
 | 
			
		||||
from llnl.util.symlink import _windows_can_symlink, islink, readlink, symlink
 | 
			
		||||
 | 
			
		||||
import spack.paths
 | 
			
		||||
 | 
			
		||||
@@ -1035,3 +1037,227 @@ def test_windows_sfn(tmpdir):
 | 
			
		||||
    assert "d\\LONGER~1" in fs.windows_sfn(d)
 | 
			
		||||
    assert "d\\LONGER~2" in fs.windows_sfn(e)
 | 
			
		||||
    shutil.rmtree(tmpdir.join("d"))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture
 | 
			
		||||
def dir_structure_with_things_to_find(tmpdir):
 | 
			
		||||
    """
 | 
			
		||||
    <root>/
 | 
			
		||||
        dir_one/
 | 
			
		||||
            file_one
 | 
			
		||||
        dir_two/
 | 
			
		||||
        dir_three/
 | 
			
		||||
            dir_four/
 | 
			
		||||
                file_two
 | 
			
		||||
            file_three
 | 
			
		||||
        file_four
 | 
			
		||||
    """
 | 
			
		||||
    dir_one = tmpdir.join("dir_one").ensure(dir=True)
 | 
			
		||||
    tmpdir.join("dir_two").ensure(dir=True)
 | 
			
		||||
    dir_three = tmpdir.join("dir_three").ensure(dir=True)
 | 
			
		||||
    dir_four = dir_three.join("dir_four").ensure(dir=True)
 | 
			
		||||
 | 
			
		||||
    locations = {}
 | 
			
		||||
    locations["file_one"] = str(dir_one.join("file_one").ensure())
 | 
			
		||||
    locations["file_two"] = str(dir_four.join("file_two").ensure())
 | 
			
		||||
    locations["file_three"] = str(dir_three.join("file_three").ensure())
 | 
			
		||||
    locations["file_four"] = str(tmpdir.join("file_four").ensure())
 | 
			
		||||
 | 
			
		||||
    return str(tmpdir), locations
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_find_path_glob_matches(dir_structure_with_things_to_find):
 | 
			
		||||
    root, locations = dir_structure_with_things_to_find
 | 
			
		||||
    # both file name and path match
 | 
			
		||||
    assert (
 | 
			
		||||
        fs.find(root, "file_two")
 | 
			
		||||
        == fs.find(root, "*/*/file_two")
 | 
			
		||||
        == fs.find(root, "dir_t*/*/*two")
 | 
			
		||||
        == [locations["file_two"]]
 | 
			
		||||
    )
 | 
			
		||||
    # ensure that * does not match directory separators
 | 
			
		||||
    assert fs.find(root, "dir*file_two") == []
 | 
			
		||||
    # ensure that file name matches after / are matched from the start of the file name
 | 
			
		||||
    assert fs.find(root, "*/ile_two") == []
 | 
			
		||||
    # file name matches exist, but not with these paths
 | 
			
		||||
    assert fs.find(root, "dir_one/*/*two") == fs.find(root, "*/*/*/*/file_two") == []
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_find_max_depth(dir_structure_with_things_to_find):
 | 
			
		||||
    root, locations = dir_structure_with_things_to_find
 | 
			
		||||
 | 
			
		||||
    # Make sure the paths we use to verify are absolute
 | 
			
		||||
    assert os.path.isabs(locations["file_one"])
 | 
			
		||||
 | 
			
		||||
    assert set(fs.find(root, "file_*", max_depth=0)) == {locations["file_four"]}
 | 
			
		||||
    assert set(fs.find(root, "file_*", max_depth=1)) == {
 | 
			
		||||
        locations["file_one"],
 | 
			
		||||
        locations["file_three"],
 | 
			
		||||
        locations["file_four"],
 | 
			
		||||
    }
 | 
			
		||||
    assert set(fs.find(root, "file_two", max_depth=2)) == {locations["file_two"]}
 | 
			
		||||
    assert not set(fs.find(root, "file_two", max_depth=1))
 | 
			
		||||
    assert set(fs.find(root, "file_two")) == {locations["file_two"]}
 | 
			
		||||
    assert set(fs.find(root, "file_*")) == set(locations.values())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_find_max_depth_relative(dir_structure_with_things_to_find):
 | 
			
		||||
    """find_max_depth should return absolute paths even if the provided path is relative."""
 | 
			
		||||
    root, locations = dir_structure_with_things_to_find
 | 
			
		||||
    with fs.working_dir(root):
 | 
			
		||||
        assert set(fs.find(".", "file_*", max_depth=0)) == {locations["file_four"]}
 | 
			
		||||
        assert set(fs.find(".", "file_two", max_depth=2)) == {locations["file_two"]}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("recursive,max_depth", [(False, -1), (False, 1)])
 | 
			
		||||
def test_max_depth_and_recursive_errors(tmpdir, recursive, max_depth):
 | 
			
		||||
    root = str(tmpdir)
 | 
			
		||||
    error_str = "cannot be set if recursive is False"
 | 
			
		||||
    with pytest.raises(ValueError, match=error_str):
 | 
			
		||||
        fs.find(root, ["some_file"], recursive=recursive, max_depth=max_depth)
 | 
			
		||||
 | 
			
		||||
    with pytest.raises(ValueError, match=error_str):
 | 
			
		||||
        fs.find_libraries(["some_lib"], root, recursive=recursive, max_depth=max_depth)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture(params=[True, False])
 | 
			
		||||
def complex_dir_structure(request, tmpdir):
 | 
			
		||||
    """
 | 
			
		||||
    "lx-dy" means "level x, directory y"
 | 
			
		||||
    "lx-fy" means "level x, file y"
 | 
			
		||||
    "lx-sy" means "level x, symlink y"
 | 
			
		||||
 | 
			
		||||
    <root>/
 | 
			
		||||
        l1-d1/
 | 
			
		||||
            l2-d1/
 | 
			
		||||
                l3-s1 -> l1-d2 # points to directory above l2-d1
 | 
			
		||||
                l3-d2/
 | 
			
		||||
                    l4-f1
 | 
			
		||||
                l3-s3 -> l1-d1 # cyclic link
 | 
			
		||||
                l3-d4/
 | 
			
		||||
                    l4-f2
 | 
			
		||||
        l1-d2/
 | 
			
		||||
            l2-f1
 | 
			
		||||
            l2-d2/
 | 
			
		||||
                l3-f3
 | 
			
		||||
            l2-s3 -> l2-d2
 | 
			
		||||
        l1-s3 -> l3-d4 # a link that "skips" a directory level
 | 
			
		||||
        l1-s4 -> l2-s3 # a link to a link to a dir
 | 
			
		||||
    """
 | 
			
		||||
    use_junctions = request.param
 | 
			
		||||
    if sys.platform == "win32" and not use_junctions and not _windows_can_symlink():
 | 
			
		||||
        pytest.skip("This Windows instance is not configured with symlink support")
 | 
			
		||||
    elif sys.platform != "win32" and use_junctions:
 | 
			
		||||
        pytest.skip("Junctions are a Windows-only feature")
 | 
			
		||||
 | 
			
		||||
    l1_d1 = tmpdir.join("l1-d1").ensure(dir=True)
 | 
			
		||||
    l2_d1 = l1_d1.join("l2-d1").ensure(dir=True)
 | 
			
		||||
    l3_d2 = l2_d1.join("l3-d2").ensure(dir=True)
 | 
			
		||||
    l3_d4 = l2_d1.join("l3-d4").ensure(dir=True)
 | 
			
		||||
    l1_d2 = tmpdir.join("l1-d2").ensure(dir=True)
 | 
			
		||||
    l2_d2 = l1_d2.join("l1-d2").ensure(dir=True)
 | 
			
		||||
 | 
			
		||||
    if use_junctions:
 | 
			
		||||
        link_fn = llnl.util.symlink._windows_create_junction
 | 
			
		||||
    else:
 | 
			
		||||
        link_fn = os.symlink
 | 
			
		||||
 | 
			
		||||
    link_fn(l1_d2, pathlib.Path(l2_d1) / "l3-s1")
 | 
			
		||||
    link_fn(l1_d1, pathlib.Path(l2_d1) / "l3-s3")
 | 
			
		||||
    link_fn(l3_d4, pathlib.Path(tmpdir) / "l1-s3")
 | 
			
		||||
    l2_s3 = pathlib.Path(l1_d2) / "l2-s3"
 | 
			
		||||
    link_fn(l2_d2, l2_s3)
 | 
			
		||||
    link_fn(l2_s3, pathlib.Path(tmpdir) / "l1-s4")
 | 
			
		||||
 | 
			
		||||
    locations = {
 | 
			
		||||
        "l4-f1": str(l3_d2.join("l4-f1").ensure()),
 | 
			
		||||
        "l4-f2-full": str(l3_d4.join("l4-f2").ensure()),
 | 
			
		||||
        "l4-f2-link": str(pathlib.Path(tmpdir) / "l1-s3" / "l4-f2"),
 | 
			
		||||
        "l2-f1": str(l1_d2.join("l2-f1").ensure()),
 | 
			
		||||
        "l2-f1-link": str(pathlib.Path(tmpdir) / "l1-d1" / "l2-d1" / "l3-s1" / "l2-f1"),
 | 
			
		||||
        "l3-f3-full": str(l2_d2.join("l3-f3").ensure()),
 | 
			
		||||
        "l3-f3-link-l1": str(pathlib.Path(tmpdir) / "l1-s4" / "l3-f3"),
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return str(tmpdir), locations
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_find_max_depth_symlinks(complex_dir_structure):
 | 
			
		||||
    root, locations = complex_dir_structure
 | 
			
		||||
    root = pathlib.Path(root)
 | 
			
		||||
    assert set(fs.find(root, "l4-f1")) == {locations["l4-f1"]}
 | 
			
		||||
    assert set(fs.find(root / "l1-s3", "l4-f2", max_depth=0)) == {locations["l4-f2-link"]}
 | 
			
		||||
    assert set(fs.find(root / "l1-d1", "l2-f1")) == {locations["l2-f1-link"]}
 | 
			
		||||
    # File is accessible via symlink and subdir, the link path will be
 | 
			
		||||
    # searched first, and the directory will not be searched again when
 | 
			
		||||
    # it is encountered the second time (via not-link) in the traversal
 | 
			
		||||
    assert set(fs.find(root, "l4-f2")) == {locations["l4-f2-link"]}
 | 
			
		||||
    # File is accessible only via the dir, so the full file path should
 | 
			
		||||
    # be reported
 | 
			
		||||
    assert set(fs.find(root / "l1-d1", "l4-f2")) == {locations["l4-f2-full"]}
 | 
			
		||||
    # Check following links to links
 | 
			
		||||
    assert set(fs.find(root, "l3-f3")) == {locations["l3-f3-link-l1"]}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_find_max_depth_multiple_and_repeated_entry_points(complex_dir_structure):
 | 
			
		||||
    root, locations = complex_dir_structure
 | 
			
		||||
 | 
			
		||||
    fst = str(pathlib.Path(root) / "l1-d1" / "l2-d1")
 | 
			
		||||
    snd = str(pathlib.Path(root) / "l1-d2")
 | 
			
		||||
    nonexistent = str(pathlib.Path(root) / "nonexistent")
 | 
			
		||||
 | 
			
		||||
    assert set(fs.find([fst, snd, fst, snd, nonexistent], ["l*-f*"], max_depth=1)) == {
 | 
			
		||||
        locations["l2-f1"],
 | 
			
		||||
        locations["l4-f1"],
 | 
			
		||||
        locations["l4-f2-full"],
 | 
			
		||||
        locations["l3-f3-full"],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_multiple_patterns(complex_dir_structure):
 | 
			
		||||
    root, _ = complex_dir_structure
 | 
			
		||||
    paths = fs.find(root, ["l2-f1", "l*-d*/l3-f3", "*", "*/*"])
 | 
			
		||||
    # There shouldn't be duplicate results with multiple, overlapping patterns
 | 
			
		||||
    assert len(set(paths)) == len(paths)
 | 
			
		||||
    # All files should be found
 | 
			
		||||
    filenames = [os.path.basename(p) for p in paths]
 | 
			
		||||
    assert set(filenames) == {"l2-f1", "l3-f3", "l4-f1", "l4-f2"}
 | 
			
		||||
    # They are ordered by first matching pattern (this is a bit of an implementation detail,
 | 
			
		||||
    # and we could decide to change the exact order in the future)
 | 
			
		||||
    assert filenames[0] == "l2-f1"
 | 
			
		||||
    assert filenames[1] == "l3-f3"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_find_input_types(tmp_path: pathlib.Path):
 | 
			
		||||
    """test that find only accepts sequences and instances of pathlib.Path and str for root, and
 | 
			
		||||
    only sequences and instances of str for patterns. In principle mypy catches these issues, but
 | 
			
		||||
    it is not enabled on all call-sites."""
 | 
			
		||||
    (tmp_path / "file.txt").write_text("")
 | 
			
		||||
    assert (
 | 
			
		||||
        fs.find(tmp_path, "file.txt")
 | 
			
		||||
        == fs.find(str(tmp_path), "file.txt")
 | 
			
		||||
        == fs.find([tmp_path, str(tmp_path)], "file.txt")
 | 
			
		||||
        == fs.find((tmp_path, str(tmp_path)), "file.txt")
 | 
			
		||||
        == fs.find(tmp_path, "file.txt")
 | 
			
		||||
        == fs.find(tmp_path, ["file.txt"])
 | 
			
		||||
        == fs.find(tmp_path, ("file.txt",))
 | 
			
		||||
        == [str(tmp_path / "file.txt")]
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    with pytest.raises(TypeError):
 | 
			
		||||
        fs.find(tmp_path, pathlib.Path("file.txt"))  # type: ignore
 | 
			
		||||
 | 
			
		||||
    with pytest.raises(TypeError):
 | 
			
		||||
        fs.find(1, "file.txt")  # type: ignore
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_find_only_finds_files(tmp_path: pathlib.Path):
 | 
			
		||||
    """ensure that find only returns files even at max_depth"""
 | 
			
		||||
    (tmp_path / "subdir").mkdir()
 | 
			
		||||
    (tmp_path / "subdir" / "dir").mkdir()
 | 
			
		||||
    (tmp_path / "subdir" / "file.txt").write_text("")
 | 
			
		||||
    assert (
 | 
			
		||||
        fs.find(tmp_path, "*", max_depth=1)
 | 
			
		||||
        == fs.find(tmp_path, "*/*", max_depth=1)
 | 
			
		||||
        == [str(tmp_path / "subdir" / "file.txt")]
 | 
			
		||||
    )
 | 
			
		||||
 
 | 
			
		||||
@@ -373,3 +373,19 @@ class _SomeClass:
 | 
			
		||||
    _SomeClass.deprecated.error_lvl = 2
 | 
			
		||||
    with pytest.raises(AttributeError):
 | 
			
		||||
        _ = s.deprecated
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_fnmatch_multiple():
 | 
			
		||||
    named_patterns = {"a": "libf*o.so", "b": "libb*r.so"}
 | 
			
		||||
    regex = re.compile(llnl.util.lang.fnmatch_translate_multiple(named_patterns))
 | 
			
		||||
 | 
			
		||||
    a = regex.match("libfoo.so")
 | 
			
		||||
    assert a and a.group("a") == "libfoo.so"
 | 
			
		||||
 | 
			
		||||
    b = regex.match("libbar.so")
 | 
			
		||||
    assert b and b.group("b") == "libbar.so"
 | 
			
		||||
 | 
			
		||||
    assert not regex.match("libfoo.so.1")
 | 
			
		||||
    assert not regex.match("libbar.so.1")
 | 
			
		||||
    assert not regex.match("libfoo.solibbar.so")
 | 
			
		||||
    assert not regex.match("libbaz.so")
 | 
			
		||||
 
 | 
			
		||||
@@ -501,18 +501,20 @@ def test_find_required_file(tmpdir):
 | 
			
		||||
 | 
			
		||||
    # First just find a single path
 | 
			
		||||
    results = spack.install_test.find_required_file(
 | 
			
		||||
        tmpdir.join("c"), filename, expected=1, recursive=True
 | 
			
		||||
        str(tmpdir.join("c")), filename, expected=1, recursive=True
 | 
			
		||||
    )
 | 
			
		||||
    assert isinstance(results, str)
 | 
			
		||||
 | 
			
		||||
    # Ensure none file if do not recursively search that directory
 | 
			
		||||
    with pytest.raises(spack.install_test.SkipTest, match="Expected 1"):
 | 
			
		||||
        spack.install_test.find_required_file(
 | 
			
		||||
            tmpdir.join("c"), filename, expected=1, recursive=False
 | 
			
		||||
            str(tmpdir.join("c")), filename, expected=1, recursive=False
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    # Now make sure we get all of the files
 | 
			
		||||
    results = spack.install_test.find_required_file(tmpdir, filename, expected=3, recursive=True)
 | 
			
		||||
    results = spack.install_test.find_required_file(
 | 
			
		||||
        str(tmpdir), filename, expected=3, recursive=True
 | 
			
		||||
    )
 | 
			
		||||
    assert isinstance(results, list) and len(results) == 3
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -44,7 +44,7 @@ def libs(self):
 | 
			
		||||
    # Header provided by the bar virutal package
 | 
			
		||||
    @property
 | 
			
		||||
    def bar_headers(self):
 | 
			
		||||
        return find_headers("bar/bar", root=self.home.include, recursive=False)
 | 
			
		||||
        return find_headers("bar", root=self.home.include, recursive=True)
 | 
			
		||||
 | 
			
		||||
    # Libary provided by the bar virtual package
 | 
			
		||||
    @property
 | 
			
		||||
@@ -59,7 +59,7 @@ def baz_home(self):
 | 
			
		||||
    # Header provided by the baz virtual package
 | 
			
		||||
    @property
 | 
			
		||||
    def baz_headers(self):
 | 
			
		||||
        return find_headers("baz/baz", root=self.baz_home.include, recursive=False)
 | 
			
		||||
        return find_headers("baz", root=self.baz_home.include, recursive=True)
 | 
			
		||||
 | 
			
		||||
    # Library provided by the baz virtual package
 | 
			
		||||
    @property
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user