diff --git a/lib/spack/llnl/util/link_tree.py b/lib/spack/llnl/util/link_tree.py index 81c3aa09ad4..d851f06d117 100644 --- a/lib/spack/llnl/util/link_tree.py +++ b/lib/spack/llnl/util/link_tree.py @@ -50,9 +50,14 @@ class SourceMergeVisitor(BaseDirectoryVisitor): - A list of merge conflicts in dst/ """ - def __init__(self, ignore: Optional[Callable[[str], bool]] = None): + def __init__( + self, ignore: Optional[Callable[[str], bool]] = None, normalize_paths: bool = False + ): self.ignore = ignore if ignore is not None else lambda f: False + # On case-insensitive filesystems, normalize paths to detect duplications + self.normalize_paths = normalize_paths + # When mapping to /, we need to prepend the # bit to the relative path in the destination dir. self.projection: str = "" @@ -71,10 +76,88 @@ def __init__(self, ignore: Optional[Callable[[str], bool]] = None): # and can run mkdir in order. self.directories: Dict[str, Tuple[str, str]] = {} + # If the visitor is configured to normalize paths, keep a map of + # normalized path to: original path, root directory + relative path + self._directories_normalized: Dict[str, Tuple[str, str, str]] = {} + # Files to link. Maps dst_rel to (src_root, src_rel). This is an ordered dict, where files # are guaranteed to be grouped by src_root in the order they were visited. self.files: Dict[str, Tuple[str, str]] = {} + # If the visitor is configured to normalize paths, keep a map of + # normalized path to: original path, root directory + relative path + self._files_normalized: Dict[str, Tuple[str, str, str]] = {} + + def _in_directories(self, proj_rel_path: str) -> bool: + """ + Check if a path is already in the directory list + """ + if self.normalize_paths: + return proj_rel_path.lower() in self._directories_normalized + else: + return proj_rel_path in self.directories + + def _directory(self, proj_rel_path: str) -> Tuple[str, str, str]: + """ + Get the directory that is mapped to a path + """ + if self.normalize_paths: + return self._directories_normalized[proj_rel_path.lower()] + else: + return (proj_rel_path, *self.directories[proj_rel_path]) + + def _del_directory(self, proj_rel_path: str): + """ + Remove a directory from the list of directories + """ + del self.directories[proj_rel_path] + if self.normalize_paths: + del self._directories_normalized[proj_rel_path.lower()] + + def _add_directory(self, proj_rel_path: str, root: str, rel_path: str): + """ + Add a directory to the list of directories. + Also stores the normalized version for later lookups + """ + self.directories[proj_rel_path] = (root, rel_path) + if self.normalize_paths: + self._directories_normalized[proj_rel_path.lower()] = (proj_rel_path, root, rel_path) + + def _in_files(self, proj_rel_path: str) -> bool: + """ + Check if a path is already in the files list + """ + if self.normalize_paths: + return proj_rel_path.lower() in self._files_normalized + else: + return proj_rel_path in self.files + + def _file(self, proj_rel_path: str) -> Tuple[str, str, str]: + """ + Get the file that is mapped to a path + """ + if self.normalize_paths: + return self._files_normalized[proj_rel_path.lower()] + else: + return (proj_rel_path, *self.files[proj_rel_path]) + + def _del_file(self, proj_rel_path: str): + """ + Remove a file from the list of files + """ + del self.files[proj_rel_path] + if self.normalize_paths: + del self._files_normalized[proj_rel_path.lower()] + + def _add_file(self, proj_rel_path: str, root: str, rel_path: str): + """ + Add a file to the list of files + Also stores the normalized version for later lookups + """ + self.files[proj_rel_path] = (root, rel_path) + if self.normalize_paths: + self._files_normalized[proj_rel_path.lower()] = (proj_rel_path, root, rel_path) + def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool: """ Register a directory if dst / rel_path is not blocked by a file or ignored. @@ -84,9 +167,9 @@ def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool: if self.ignore(rel_path): # Don't recurse when dir is ignored. return False - elif proj_rel_path in self.files: + elif self._in_files(proj_rel_path): # Can't create a dir where a file is. - src_a_root, src_a_relpath = self.files[proj_rel_path] + _, src_a_root, src_a_relpath = self._file(proj_rel_path) self.fatal_conflicts.append( MergeConflict( dst=proj_rel_path, @@ -95,12 +178,12 @@ def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool: ) ) return False - elif proj_rel_path in self.directories: + elif self._in_directories(proj_rel_path): # No new directory, carry on. return True else: # Register new directory. - self.directories[proj_rel_path] = (root, rel_path) + self._add_directory(proj_rel_path, root, rel_path) return True def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bool: @@ -140,22 +223,22 @@ def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = Fa if self.ignore(rel_path): pass - elif proj_rel_path in self.directories: + elif self._in_directories(proj_rel_path): # Can't create a file where a dir is; fatal error self.fatal_conflicts.append( MergeConflict( dst=proj_rel_path, - src_a=os.path.join(*self.directories[proj_rel_path]), + src_a=os.path.join(*self._directory(proj_rel_path)), src_b=os.path.join(root, rel_path), ) ) - elif proj_rel_path in self.files: + elif self._in_files(proj_rel_path): # When two files project to the same path, they conflict iff they are distinct. # If they are the same (i.e. one links to the other), register regular files rather # than symlinks. The reason is that in copy-type views, we need a copy of the actual # file, not the symlink. - src_a = os.path.join(*self.files[proj_rel_path]) + src_a = os.path.join(*self._file(proj_rel_path)) src_b = os.path.join(root, rel_path) try: @@ -173,12 +256,13 @@ def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = Fa if not symlink: # Remove the link in favor of the actual file. The del is necessary to maintain the # order of the files dict, which is grouped by root. - del self.files[proj_rel_path] - self.files[proj_rel_path] = (root, rel_path) + existing_proj_rel_path, _, _ = self._file(proj_rel_path) + self._del_file(existing_proj_rel_path) + self._add_file(proj_rel_path, root, rel_path) else: # Otherwise register this file to be linked. - self.files[proj_rel_path] = (root, rel_path) + self._add_file(proj_rel_path, root, rel_path) def visit_symlinked_file(self, root: str, rel_path: str, depth: int) -> None: # Treat symlinked files as ordinary files (without "dereferencing") @@ -197,11 +281,11 @@ def set_projection(self, projection: str) -> None: path = "" for part in self.projection.split(os.sep): path = os.path.join(path, part) - if path not in self.files: - self.directories[path] = ("", path) + if not self._in_files(path): + self._add_directory(path, "", path) else: # Can't create a dir where a file is. - src_a_root, src_a_relpath = self.files[path] + _, src_a_root, src_a_relpath = self._file(path) self.fatal_conflicts.append( MergeConflict( dst=path, @@ -227,8 +311,8 @@ def __init__(self, source_merge_visitor: SourceMergeVisitor): def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool: # If destination dir is a file in a src dir, add a conflict, # and don't traverse deeper - if rel_path in self.src.files: - src_a_root, src_a_relpath = self.src.files[rel_path] + if self.src._in_files(rel_path): + _, src_a_root, src_a_relpath = self.src._file(rel_path) self.src.fatal_conflicts.append( MergeConflict( rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path) @@ -238,8 +322,9 @@ def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool: # If destination dir was also a src dir, remove the mkdir # action, and traverse deeper. - if rel_path in self.src.directories: - del self.src.directories[rel_path] + if self.src._in_directories(rel_path): + existing_proj_rel_path, _, _ = self.src._directory(rel_path) + self.src._del_directory(existing_proj_rel_path) return True # If the destination dir does not appear in the src dir, @@ -252,38 +337,24 @@ def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bo be seen as files; we should not accidentally merge source dir with a symlinked dest dir. """ - # Always conflict - if rel_path in self.src.directories: - src_a_root, src_a_relpath = self.src.directories[rel_path] - self.src.fatal_conflicts.append( - MergeConflict( - rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path) - ) - ) - if rel_path in self.src.files: - src_a_root, src_a_relpath = self.src.files[rel_path] - self.src.fatal_conflicts.append( - MergeConflict( - rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path) - ) - ) + self.visit_file(root, rel_path, depth) # Never descend into symlinked target dirs. return False def visit_file(self, root: str, rel_path: str, depth: int) -> None: # Can't merge a file if target already exists - if rel_path in self.src.directories: - src_a_root, src_a_relpath = self.src.directories[rel_path] + if self.src._in_directories(rel_path): + _, src_a_root, src_a_relpath = self.src._directory(rel_path) self.src.fatal_conflicts.append( MergeConflict( rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path) ) ) - elif rel_path in self.src.files: - src_a_root, src_a_relpath = self.src.files[rel_path] + elif self.src._in_files(rel_path): + _, src_a_root, src_a_relpath = self.src._file(rel_path) self.src.fatal_conflicts.append( MergeConflict( rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path) diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py index 86efd8e2f56..2267928b6d5 100644 --- a/lib/spack/spack/filesystem_view.py +++ b/lib/spack/spack/filesystem_view.py @@ -9,6 +9,7 @@ import shutil import stat import sys +import tempfile from typing import Callable, Dict, Optional from typing_extensions import Literal @@ -708,7 +709,10 @@ def add_specs(self, *specs: spack.spec.Spec) -> None: def skip_list(file): return os.path.basename(file) == spack.store.STORE.layout.metadata_dir - visitor = SourceMergeVisitor(ignore=skip_list) + # Determine if the root is on a case-insensitive filesystem + normalize_paths = is_folder_on_case_insensitive_filesystem(self._root) + + visitor = SourceMergeVisitor(ignore=skip_list, normalize_paths=normalize_paths) # Gather all the directories to be made and files to be linked for spec in specs: @@ -884,3 +888,8 @@ def get_dependencies(specs): class ConflictingProjectionsError(SpackError): """Raised when a view has a projections file and is given one manually.""" + + +def is_folder_on_case_insensitive_filesystem(path: str) -> bool: + with tempfile.NamedTemporaryFile(dir=path, prefix=".sentinel") as sentinel: + return os.path.exists(os.path.join(path, os.path.basename(sentinel.name).upper())) diff --git a/lib/spack/spack/test/llnl/util/link_tree.py b/lib/spack/spack/test/llnl/util/link_tree.py index 57175aa32a8..459b40ef16c 100644 --- a/lib/spack/spack/test/llnl/util/link_tree.py +++ b/lib/spack/spack/test/llnl/util/link_tree.py @@ -396,3 +396,229 @@ def test_source_merge_visitor_does_deals_with_dangling_symlinks(tmp_path: pathli # The first file encountered should be listed. assert visitor.files == {str(tmp_path / "view" / "file"): (str(tmp_path / "dir_a"), "file")} + + +def test_source_visitor_no_path_normalization(tmp_path: pathlib.Path): + src = str(tmp_path / "a") + + a = SourceMergeVisitor(normalize_paths=False) + a.visit_file(src, "file", 0) + a.visit_file(src, "FILE", 0) + assert len(a.files) == 2 + assert len(a.directories) == 0 + assert "file" in a.files and "FILE" in a.files + assert len(a.file_conflicts) == 0 + + a = SourceMergeVisitor(normalize_paths=False) + a.visit_file(src, "file", 0) + a.before_visit_dir(src, "FILE", 0) + assert len(a.files) == 1 + assert "file" in a.files and "FILE" not in a.files + assert len(a.directories) == 1 + assert "FILE" in a.directories + assert len(a.fatal_conflicts) == 0 + assert len(a.file_conflicts) == 0 + + # without normalization, order doesn't matter + a = SourceMergeVisitor(normalize_paths=False) + a.before_visit_dir(src, "FILE", 0) + a.visit_file(src, "file", 0) + assert len(a.files) == 1 + assert "file" in a.files and "FILE" not in a.files + assert len(a.directories) == 1 + assert "FILE" in a.directories + assert len(a.fatal_conflicts) == 0 + assert len(a.file_conflicts) == 0 + + a = SourceMergeVisitor(normalize_paths=False) + a.before_visit_dir(src, "FILE", 0) + a.before_visit_dir(src, "file", 0) + assert len(a.files) == 0 + assert len(a.directories) == 2 + assert "FILE" in a.directories and "file" in a.directories + assert len(a.fatal_conflicts) == 0 + assert len(a.file_conflicts) == 0 + + +def test_source_visitor_path_normalization(tmp_path: pathlib.Path, monkeypatch): + src_a = str(tmp_path / "a") + src_b = str(tmp_path / "b") + + os.mkdir(src_a) + os.mkdir(src_b) + + file = os.path.join(src_a, "file") + FILE = os.path.join(src_b, "FILE") + + with open(file, "wb"): + pass + + with open(FILE, "wb"): + pass + + assert os.path.exists(file) + assert os.path.exists(FILE) + + # file conflict with os.path.samefile reporting it's NOT the same file + a = SourceMergeVisitor(normalize_paths=True) + a.visit_file(src_a, "file", 0) + a.visit_file(src_b, "FILE", 0) + assert a.files + assert len(a.files) == 1 + # first file wins + assert "file" in a.files + # this is a conflict since the files are reported to be distinct + assert len(a.file_conflicts) == 1 + assert "FILE" in [c.dst for c in a.file_conflicts] + + os.remove(FILE) + os.link(file, FILE) + + assert os.path.exists(file) + assert os.path.exists(FILE) + assert os.path.samefile(file, FILE) + + # file conflict with os.path.samefile reporting it's the same file + a = SourceMergeVisitor(normalize_paths=True) + a.visit_file(src_a, "file", 0) + a.visit_file(src_b, "FILE", 0) + assert a.files + assert len(a.files) == 1 + # second file wins + assert "FILE" in a.files + # not a conflict + assert len(a.file_conflicts) == 0 + + a = SourceMergeVisitor(normalize_paths=True) + a.visit_file(src_a, "file", 0) + a.before_visit_dir(src_a, "FILE", 0) + assert a.files + assert len(a.files) == 1 + assert "file" in a.files + assert len(a.directories) == 0 + assert len(a.fatal_conflicts) == 1 + conflicts = [c.dst for c in a.fatal_conflicts] + assert "FILE" in conflicts + + a = SourceMergeVisitor(normalize_paths=True) + a.before_visit_dir(src_a, "FILE", 0) + a.visit_file(src_a, "file", 0) + assert len(a.directories) == 1 + assert "FILE" in a.directories + assert len(a.files) == 0 + assert len(a.fatal_conflicts) == 1 + conflicts = [c.dst for c in a.fatal_conflicts] + assert "file" in conflicts + + a = SourceMergeVisitor(normalize_paths=True) + a.before_visit_dir(src_a, "FILE", 0) + a.before_visit_dir(src_a, "file", 0) + assert len(a.directories) == 1 + # first dir wins + assert "FILE" in a.directories + assert len(a.files) == 0 + assert len(a.fatal_conflicts) == 0 + + +def test_destination_visitor_no_path_normalization(tmp_path: pathlib.Path): + src = str(tmp_path / "a") + dest = str(tmp_path / "b") + + src_visitor = SourceMergeVisitor(normalize_paths=False) + src_visitor.visit_file(src, "file", 0) + assert len(src_visitor.files) == 1 + assert len(src_visitor.directories) == 0 + assert "file" in src_visitor.files + + dest_visitor = DestinationMergeVisitor(src_visitor) + dest_visitor.visit_file(dest, "FILE", 0) + # not a conflict, since normalization is off + assert len(dest_visitor.src.files) == 1 + assert len(dest_visitor.src.directories) == 0 + assert "file" in dest_visitor.src.files + assert len(dest_visitor.src.fatal_conflicts) == 0 + assert len(dest_visitor.src.file_conflicts) == 0 + + src_visitor = SourceMergeVisitor(normalize_paths=False) + src_visitor.visit_file(src, "file", 0) + dest_visitor = DestinationMergeVisitor(src_visitor) + dest_visitor.before_visit_dir(dest, "FILE", 0) + assert len(dest_visitor.src.files) == 1 + assert "file" in dest_visitor.src.files + assert len(dest_visitor.src.directories) == 0 + assert len(dest_visitor.src.fatal_conflicts) == 0 + assert len(dest_visitor.src.file_conflicts) == 0 + + # not insensitive, order does not matter + src_visitor = SourceMergeVisitor(normalize_paths=False) + src_visitor.before_visit_dir(src, "file", 0) + dest_visitor = DestinationMergeVisitor(src_visitor) + dest_visitor.visit_file(dest, "FILE", 0) + assert len(dest_visitor.src.files) == 0 + assert len(dest_visitor.src.directories) == 1 + assert "file" in dest_visitor.src.directories + assert len(dest_visitor.src.fatal_conflicts) == 0 + assert len(dest_visitor.src.file_conflicts) == 0 + + src_visitor = SourceMergeVisitor(normalize_paths=False) + src_visitor.before_visit_dir(src, "file", 0) + dest_visitor = DestinationMergeVisitor(src_visitor) + dest_visitor.before_visit_dir(dest, "FILE", 0) + assert len(dest_visitor.src.files) == 0 + assert len(dest_visitor.src.directories) == 1 + assert "file" in dest_visitor.src.directories + assert len(dest_visitor.src.fatal_conflicts) == 0 + assert len(dest_visitor.src.file_conflicts) == 0 + + +def test_destination_visitor_path_normalization(tmp_path: pathlib.Path): + src = str(tmp_path / "a") + dest = str(tmp_path / "b") + + src_visitor = SourceMergeVisitor(normalize_paths=True) + src_visitor.visit_file(src, "file", 0) + assert len(src_visitor.files) == 1 + assert len(src_visitor.directories) == 0 + assert "file" in src_visitor.files + + dest_visitor = DestinationMergeVisitor(src_visitor) + dest_visitor.visit_file(dest, "FILE", 0) + assert len(dest_visitor.src.files) == 1 + assert len(dest_visitor.src.directories) == 0 + assert "file" in dest_visitor.src.files + assert len(dest_visitor.src.fatal_conflicts) == 1 + assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts] + assert len(dest_visitor.src.file_conflicts) == 0 + + src_visitor = SourceMergeVisitor(normalize_paths=True) + src_visitor.visit_file(src, "file", 0) + dest_visitor = DestinationMergeVisitor(src_visitor) + dest_visitor.before_visit_dir(dest, "FILE", 0) + assert len(dest_visitor.src.files) == 1 + assert "file" in dest_visitor.src.files + assert len(dest_visitor.src.directories) == 0 + assert len(dest_visitor.src.fatal_conflicts) == 1 + assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts] + assert len(dest_visitor.src.file_conflicts) == 0 + + src_visitor = SourceMergeVisitor(normalize_paths=True) + src_visitor.before_visit_dir(src, "file", 0) + dest_visitor = DestinationMergeVisitor(src_visitor) + dest_visitor.visit_file(dest, "FILE", 0) + assert len(dest_visitor.src.files) == 0 + assert len(dest_visitor.src.directories) == 1 + assert "file" in dest_visitor.src.directories + assert len(dest_visitor.src.fatal_conflicts) == 1 + assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts] + assert len(dest_visitor.src.file_conflicts) == 0 + + src_visitor = SourceMergeVisitor(normalize_paths=True) + src_visitor.before_visit_dir(src, "file", 0) + dest_visitor = DestinationMergeVisitor(src_visitor) + dest_visitor.before_visit_dir(dest, "FILE", 0) + assert len(dest_visitor.src.files) == 0 + # this removes the mkdir action, no directory left over + assert len(dest_visitor.src.directories) == 0 + # but it's also not a conflict + assert len(dest_visitor.src.fatal_conflicts) == 0 + assert len(dest_visitor.src.file_conflicts) == 0