views: normalize paths on case insensitive file systems (#47370)

On macOS, prefix_a/file and prefix_b/FILE map to the same file view/file or view/FILE.

This commit ensures that we test whether a view is created on a case insensitive filesystem and handle projection conflicts accordingly.
This commit is contained in:
Paul Gessinger 2025-02-14 09:35:40 +01:00 committed by GitHub
parent 8ef5f1027a
commit 114bd5744f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 345 additions and 39 deletions

View File

@ -50,9 +50,14 @@ class SourceMergeVisitor(BaseDirectoryVisitor):
- A list of merge conflicts in dst/
"""
def __init__(self, ignore: Optional[Callable[[str], bool]] = None):
def __init__(
self, ignore: Optional[Callable[[str], bool]] = None, normalize_paths: bool = False
):
self.ignore = ignore if ignore is not None else lambda f: False
# On case-insensitive filesystems, normalize paths to detect duplications
self.normalize_paths = normalize_paths
# When mapping <src root> to <dst root>/<projection>, we need to prepend the <projection>
# bit to the relative path in the destination dir.
self.projection: str = ""
@ -71,10 +76,88 @@ def __init__(self, ignore: Optional[Callable[[str], bool]] = None):
# and can run mkdir in order.
self.directories: Dict[str, Tuple[str, str]] = {}
# If the visitor is configured to normalize paths, keep a map of
# normalized path to: original path, root directory + relative path
self._directories_normalized: Dict[str, Tuple[str, str, str]] = {}
# Files to link. Maps dst_rel to (src_root, src_rel). This is an ordered dict, where files
# are guaranteed to be grouped by src_root in the order they were visited.
self.files: Dict[str, Tuple[str, str]] = {}
# If the visitor is configured to normalize paths, keep a map of
# normalized path to: original path, root directory + relative path
self._files_normalized: Dict[str, Tuple[str, str, str]] = {}
def _in_directories(self, proj_rel_path: str) -> bool:
"""
Check if a path is already in the directory list
"""
if self.normalize_paths:
return proj_rel_path.lower() in self._directories_normalized
else:
return proj_rel_path in self.directories
def _directory(self, proj_rel_path: str) -> Tuple[str, str, str]:
"""
Get the directory that is mapped to a path
"""
if self.normalize_paths:
return self._directories_normalized[proj_rel_path.lower()]
else:
return (proj_rel_path, *self.directories[proj_rel_path])
def _del_directory(self, proj_rel_path: str):
"""
Remove a directory from the list of directories
"""
del self.directories[proj_rel_path]
if self.normalize_paths:
del self._directories_normalized[proj_rel_path.lower()]
def _add_directory(self, proj_rel_path: str, root: str, rel_path: str):
"""
Add a directory to the list of directories.
Also stores the normalized version for later lookups
"""
self.directories[proj_rel_path] = (root, rel_path)
if self.normalize_paths:
self._directories_normalized[proj_rel_path.lower()] = (proj_rel_path, root, rel_path)
def _in_files(self, proj_rel_path: str) -> bool:
"""
Check if a path is already in the files list
"""
if self.normalize_paths:
return proj_rel_path.lower() in self._files_normalized
else:
return proj_rel_path in self.files
def _file(self, proj_rel_path: str) -> Tuple[str, str, str]:
"""
Get the file that is mapped to a path
"""
if self.normalize_paths:
return self._files_normalized[proj_rel_path.lower()]
else:
return (proj_rel_path, *self.files[proj_rel_path])
def _del_file(self, proj_rel_path: str):
"""
Remove a file from the list of files
"""
del self.files[proj_rel_path]
if self.normalize_paths:
del self._files_normalized[proj_rel_path.lower()]
def _add_file(self, proj_rel_path: str, root: str, rel_path: str):
"""
Add a file to the list of files
Also stores the normalized version for later lookups
"""
self.files[proj_rel_path] = (root, rel_path)
if self.normalize_paths:
self._files_normalized[proj_rel_path.lower()] = (proj_rel_path, root, rel_path)
def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
"""
Register a directory if dst / rel_path is not blocked by a file or ignored.
@ -84,9 +167,9 @@ def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
if self.ignore(rel_path):
# Don't recurse when dir is ignored.
return False
elif proj_rel_path in self.files:
elif self._in_files(proj_rel_path):
# Can't create a dir where a file is.
src_a_root, src_a_relpath = self.files[proj_rel_path]
_, src_a_root, src_a_relpath = self._file(proj_rel_path)
self.fatal_conflicts.append(
MergeConflict(
dst=proj_rel_path,
@ -95,12 +178,12 @@ def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
)
)
return False
elif proj_rel_path in self.directories:
elif self._in_directories(proj_rel_path):
# No new directory, carry on.
return True
else:
# Register new directory.
self.directories[proj_rel_path] = (root, rel_path)
self._add_directory(proj_rel_path, root, rel_path)
return True
def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bool:
@ -140,22 +223,22 @@ def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = Fa
if self.ignore(rel_path):
pass
elif proj_rel_path in self.directories:
elif self._in_directories(proj_rel_path):
# Can't create a file where a dir is; fatal error
self.fatal_conflicts.append(
MergeConflict(
dst=proj_rel_path,
src_a=os.path.join(*self.directories[proj_rel_path]),
src_a=os.path.join(*self._directory(proj_rel_path)),
src_b=os.path.join(root, rel_path),
)
)
elif proj_rel_path in self.files:
elif self._in_files(proj_rel_path):
# When two files project to the same path, they conflict iff they are distinct.
# If they are the same (i.e. one links to the other), register regular files rather
# than symlinks. The reason is that in copy-type views, we need a copy of the actual
# file, not the symlink.
src_a = os.path.join(*self.files[proj_rel_path])
src_a = os.path.join(*self._file(proj_rel_path))
src_b = os.path.join(root, rel_path)
try:
@ -173,12 +256,13 @@ def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = Fa
if not symlink:
# Remove the link in favor of the actual file. The del is necessary to maintain the
# order of the files dict, which is grouped by root.
del self.files[proj_rel_path]
self.files[proj_rel_path] = (root, rel_path)
existing_proj_rel_path, _, _ = self._file(proj_rel_path)
self._del_file(existing_proj_rel_path)
self._add_file(proj_rel_path, root, rel_path)
else:
# Otherwise register this file to be linked.
self.files[proj_rel_path] = (root, rel_path)
self._add_file(proj_rel_path, root, rel_path)
def visit_symlinked_file(self, root: str, rel_path: str, depth: int) -> None:
# Treat symlinked files as ordinary files (without "dereferencing")
@ -197,11 +281,11 @@ def set_projection(self, projection: str) -> None:
path = ""
for part in self.projection.split(os.sep):
path = os.path.join(path, part)
if path not in self.files:
self.directories[path] = ("<projection>", path)
if not self._in_files(path):
self._add_directory(path, "<projection>", path)
else:
# Can't create a dir where a file is.
src_a_root, src_a_relpath = self.files[path]
_, src_a_root, src_a_relpath = self._file(path)
self.fatal_conflicts.append(
MergeConflict(
dst=path,
@ -227,8 +311,8 @@ def __init__(self, source_merge_visitor: SourceMergeVisitor):
def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
# If destination dir is a file in a src dir, add a conflict,
# and don't traverse deeper
if rel_path in self.src.files:
src_a_root, src_a_relpath = self.src.files[rel_path]
if self.src._in_files(rel_path):
_, src_a_root, src_a_relpath = self.src._file(rel_path)
self.src.fatal_conflicts.append(
MergeConflict(
rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
@ -238,8 +322,9 @@ def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
# If destination dir was also a src dir, remove the mkdir
# action, and traverse deeper.
if rel_path in self.src.directories:
del self.src.directories[rel_path]
if self.src._in_directories(rel_path):
existing_proj_rel_path, _, _ = self.src._directory(rel_path)
self.src._del_directory(existing_proj_rel_path)
return True
# If the destination dir does not appear in the src dir,
@ -252,38 +337,24 @@ def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bo
be seen as files; we should not accidentally merge
source dir with a symlinked dest dir.
"""
# Always conflict
if rel_path in self.src.directories:
src_a_root, src_a_relpath = self.src.directories[rel_path]
self.src.fatal_conflicts.append(
MergeConflict(
rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
)
)
if rel_path in self.src.files:
src_a_root, src_a_relpath = self.src.files[rel_path]
self.src.fatal_conflicts.append(
MergeConflict(
rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
)
)
self.visit_file(root, rel_path, depth)
# Never descend into symlinked target dirs.
return False
def visit_file(self, root: str, rel_path: str, depth: int) -> None:
# Can't merge a file if target already exists
if rel_path in self.src.directories:
src_a_root, src_a_relpath = self.src.directories[rel_path]
if self.src._in_directories(rel_path):
_, src_a_root, src_a_relpath = self.src._directory(rel_path)
self.src.fatal_conflicts.append(
MergeConflict(
rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)
)
)
elif rel_path in self.src.files:
src_a_root, src_a_relpath = self.src.files[rel_path]
elif self.src._in_files(rel_path):
_, src_a_root, src_a_relpath = self.src._file(rel_path)
self.src.fatal_conflicts.append(
MergeConflict(
rel_path, os.path.join(src_a_root, src_a_relpath), os.path.join(root, rel_path)

View File

@ -9,6 +9,7 @@
import shutil
import stat
import sys
import tempfile
from typing import Callable, Dict, Optional
from typing_extensions import Literal
@ -708,7 +709,10 @@ def add_specs(self, *specs: spack.spec.Spec) -> None:
def skip_list(file):
return os.path.basename(file) == spack.store.STORE.layout.metadata_dir
visitor = SourceMergeVisitor(ignore=skip_list)
# Determine if the root is on a case-insensitive filesystem
normalize_paths = is_folder_on_case_insensitive_filesystem(self._root)
visitor = SourceMergeVisitor(ignore=skip_list, normalize_paths=normalize_paths)
# Gather all the directories to be made and files to be linked
for spec in specs:
@ -884,3 +888,8 @@ def get_dependencies(specs):
class ConflictingProjectionsError(SpackError):
"""Raised when a view has a projections file and is given one manually."""
def is_folder_on_case_insensitive_filesystem(path: str) -> bool:
with tempfile.NamedTemporaryFile(dir=path, prefix=".sentinel") as sentinel:
return os.path.exists(os.path.join(path, os.path.basename(sentinel.name).upper()))

View File

@ -396,3 +396,229 @@ def test_source_merge_visitor_does_deals_with_dangling_symlinks(tmp_path: pathli
# The first file encountered should be listed.
assert visitor.files == {str(tmp_path / "view" / "file"): (str(tmp_path / "dir_a"), "file")}
def test_source_visitor_no_path_normalization(tmp_path: pathlib.Path):
src = str(tmp_path / "a")
a = SourceMergeVisitor(normalize_paths=False)
a.visit_file(src, "file", 0)
a.visit_file(src, "FILE", 0)
assert len(a.files) == 2
assert len(a.directories) == 0
assert "file" in a.files and "FILE" in a.files
assert len(a.file_conflicts) == 0
a = SourceMergeVisitor(normalize_paths=False)
a.visit_file(src, "file", 0)
a.before_visit_dir(src, "FILE", 0)
assert len(a.files) == 1
assert "file" in a.files and "FILE" not in a.files
assert len(a.directories) == 1
assert "FILE" in a.directories
assert len(a.fatal_conflicts) == 0
assert len(a.file_conflicts) == 0
# without normalization, order doesn't matter
a = SourceMergeVisitor(normalize_paths=False)
a.before_visit_dir(src, "FILE", 0)
a.visit_file(src, "file", 0)
assert len(a.files) == 1
assert "file" in a.files and "FILE" not in a.files
assert len(a.directories) == 1
assert "FILE" in a.directories
assert len(a.fatal_conflicts) == 0
assert len(a.file_conflicts) == 0
a = SourceMergeVisitor(normalize_paths=False)
a.before_visit_dir(src, "FILE", 0)
a.before_visit_dir(src, "file", 0)
assert len(a.files) == 0
assert len(a.directories) == 2
assert "FILE" in a.directories and "file" in a.directories
assert len(a.fatal_conflicts) == 0
assert len(a.file_conflicts) == 0
def test_source_visitor_path_normalization(tmp_path: pathlib.Path, monkeypatch):
src_a = str(tmp_path / "a")
src_b = str(tmp_path / "b")
os.mkdir(src_a)
os.mkdir(src_b)
file = os.path.join(src_a, "file")
FILE = os.path.join(src_b, "FILE")
with open(file, "wb"):
pass
with open(FILE, "wb"):
pass
assert os.path.exists(file)
assert os.path.exists(FILE)
# file conflict with os.path.samefile reporting it's NOT the same file
a = SourceMergeVisitor(normalize_paths=True)
a.visit_file(src_a, "file", 0)
a.visit_file(src_b, "FILE", 0)
assert a.files
assert len(a.files) == 1
# first file wins
assert "file" in a.files
# this is a conflict since the files are reported to be distinct
assert len(a.file_conflicts) == 1
assert "FILE" in [c.dst for c in a.file_conflicts]
os.remove(FILE)
os.link(file, FILE)
assert os.path.exists(file)
assert os.path.exists(FILE)
assert os.path.samefile(file, FILE)
# file conflict with os.path.samefile reporting it's the same file
a = SourceMergeVisitor(normalize_paths=True)
a.visit_file(src_a, "file", 0)
a.visit_file(src_b, "FILE", 0)
assert a.files
assert len(a.files) == 1
# second file wins
assert "FILE" in a.files
# not a conflict
assert len(a.file_conflicts) == 0
a = SourceMergeVisitor(normalize_paths=True)
a.visit_file(src_a, "file", 0)
a.before_visit_dir(src_a, "FILE", 0)
assert a.files
assert len(a.files) == 1
assert "file" in a.files
assert len(a.directories) == 0
assert len(a.fatal_conflicts) == 1
conflicts = [c.dst for c in a.fatal_conflicts]
assert "FILE" in conflicts
a = SourceMergeVisitor(normalize_paths=True)
a.before_visit_dir(src_a, "FILE", 0)
a.visit_file(src_a, "file", 0)
assert len(a.directories) == 1
assert "FILE" in a.directories
assert len(a.files) == 0
assert len(a.fatal_conflicts) == 1
conflicts = [c.dst for c in a.fatal_conflicts]
assert "file" in conflicts
a = SourceMergeVisitor(normalize_paths=True)
a.before_visit_dir(src_a, "FILE", 0)
a.before_visit_dir(src_a, "file", 0)
assert len(a.directories) == 1
# first dir wins
assert "FILE" in a.directories
assert len(a.files) == 0
assert len(a.fatal_conflicts) == 0
def test_destination_visitor_no_path_normalization(tmp_path: pathlib.Path):
src = str(tmp_path / "a")
dest = str(tmp_path / "b")
src_visitor = SourceMergeVisitor(normalize_paths=False)
src_visitor.visit_file(src, "file", 0)
assert len(src_visitor.files) == 1
assert len(src_visitor.directories) == 0
assert "file" in src_visitor.files
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.visit_file(dest, "FILE", 0)
# not a conflict, since normalization is off
assert len(dest_visitor.src.files) == 1
assert len(dest_visitor.src.directories) == 0
assert "file" in dest_visitor.src.files
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
src_visitor = SourceMergeVisitor(normalize_paths=False)
src_visitor.visit_file(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.before_visit_dir(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 1
assert "file" in dest_visitor.src.files
assert len(dest_visitor.src.directories) == 0
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
# not insensitive, order does not matter
src_visitor = SourceMergeVisitor(normalize_paths=False)
src_visitor.before_visit_dir(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.visit_file(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 0
assert len(dest_visitor.src.directories) == 1
assert "file" in dest_visitor.src.directories
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
src_visitor = SourceMergeVisitor(normalize_paths=False)
src_visitor.before_visit_dir(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.before_visit_dir(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 0
assert len(dest_visitor.src.directories) == 1
assert "file" in dest_visitor.src.directories
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
def test_destination_visitor_path_normalization(tmp_path: pathlib.Path):
src = str(tmp_path / "a")
dest = str(tmp_path / "b")
src_visitor = SourceMergeVisitor(normalize_paths=True)
src_visitor.visit_file(src, "file", 0)
assert len(src_visitor.files) == 1
assert len(src_visitor.directories) == 0
assert "file" in src_visitor.files
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.visit_file(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 1
assert len(dest_visitor.src.directories) == 0
assert "file" in dest_visitor.src.files
assert len(dest_visitor.src.fatal_conflicts) == 1
assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts]
assert len(dest_visitor.src.file_conflicts) == 0
src_visitor = SourceMergeVisitor(normalize_paths=True)
src_visitor.visit_file(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.before_visit_dir(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 1
assert "file" in dest_visitor.src.files
assert len(dest_visitor.src.directories) == 0
assert len(dest_visitor.src.fatal_conflicts) == 1
assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts]
assert len(dest_visitor.src.file_conflicts) == 0
src_visitor = SourceMergeVisitor(normalize_paths=True)
src_visitor.before_visit_dir(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.visit_file(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 0
assert len(dest_visitor.src.directories) == 1
assert "file" in dest_visitor.src.directories
assert len(dest_visitor.src.fatal_conflicts) == 1
assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts]
assert len(dest_visitor.src.file_conflicts) == 0
src_visitor = SourceMergeVisitor(normalize_paths=True)
src_visitor.before_visit_dir(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.before_visit_dir(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 0
# this removes the mkdir action, no directory left over
assert len(dest_visitor.src.directories) == 0
# but it's also not a conflict
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0