From e4a8d45d8638994efb62cbfead22db3df962e0af Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Tue, 18 Feb 2025 06:53:17 +0100 Subject: [PATCH] views: resolve symlinked dir - dir conflict when same file (#49039) A directory and a symlink to it under the same relative path in a different prefix ``` /prefix1/dir/ /prefix1/dir/file /prefix2/dir -> /prefix1/dir/ ``` are not a blocker to create a view. The view structure simply looks like this: ``` /view/dir/ /view/dir/file ``` This should be the case independently of the order in which we visit prefixes, so we could in principle create views order independently. --- lib/spack/llnl/util/link_tree.py | 59 +-- lib/spack/spack/test/llnl/util/link_tree.py | 378 ++++++++------------ 2 files changed, 178 insertions(+), 259 deletions(-) diff --git a/lib/spack/llnl/util/link_tree.py b/lib/spack/llnl/util/link_tree.py index d851f06d117..e28bd096b12 100644 --- a/lib/spack/llnl/util/link_tree.py +++ b/lib/spack/llnl/util/link_tree.py @@ -41,6 +41,16 @@ def __init__(self, dst, src_a=None, src_b=None): self.src_a = src_a self.src_b = src_b + def __repr__(self) -> str: + return f"MergeConflict(dst={self.dst!r}, src_a={self.src_a!r}, src_b={self.src_b!r})" + + +def _samefile(a: str, b: str): + try: + return os.path.samefile(a, b) + except OSError: + return False + class SourceMergeVisitor(BaseDirectoryVisitor): """ @@ -168,16 +178,21 @@ def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool: # Don't recurse when dir is ignored. return False elif self._in_files(proj_rel_path): - # Can't create a dir where a file is. - _, src_a_root, src_a_relpath = self._file(proj_rel_path) - self.fatal_conflicts.append( - MergeConflict( - dst=proj_rel_path, - src_a=os.path.join(src_a_root, src_a_relpath), - src_b=os.path.join(root, rel_path), + # A file-dir conflict is fatal except if they're the same file (symlinked dir). + src_a = os.path.join(*self._file(proj_rel_path)) + src_b = os.path.join(root, rel_path) + + if not _samefile(src_a, src_b): + self.fatal_conflicts.append( + MergeConflict(dst=proj_rel_path, src_a=src_a, src_b=src_b) ) - ) - return False + return False + + # Remove the link in favor of the dir. + existing_proj_rel_path, _, _ = self._file(proj_rel_path) + self._del_file(existing_proj_rel_path) + self._add_directory(proj_rel_path, root, rel_path) + return True elif self._in_directories(proj_rel_path): # No new directory, carry on. return True @@ -215,7 +230,7 @@ def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bo if handle_as_dir: return self.before_visit_dir(root, rel_path, depth) - self.visit_file(root, rel_path, depth) + self.visit_file(root, rel_path, depth, symlink=True) return False def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = False) -> None: @@ -224,29 +239,22 @@ def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = Fa if self.ignore(rel_path): pass elif self._in_directories(proj_rel_path): - # Can't create a file where a dir is; fatal error - self.fatal_conflicts.append( - MergeConflict( - dst=proj_rel_path, - src_a=os.path.join(*self._directory(proj_rel_path)), - src_b=os.path.join(root, rel_path), + # Can't create a file where a dir is, unless they are the same file (symlinked dir), + # in which case we simply drop the symlink in favor of the actual dir. + src_a = os.path.join(*self._directory(proj_rel_path)) + src_b = os.path.join(root, rel_path) + if not symlink or not _samefile(src_a, src_b): + self.fatal_conflicts.append( + MergeConflict(dst=proj_rel_path, src_a=src_a, src_b=src_b) ) - ) elif self._in_files(proj_rel_path): # When two files project to the same path, they conflict iff they are distinct. # If they are the same (i.e. one links to the other), register regular files rather # than symlinks. The reason is that in copy-type views, we need a copy of the actual # file, not the symlink. - src_a = os.path.join(*self._file(proj_rel_path)) src_b = os.path.join(root, rel_path) - - try: - samefile = os.path.samefile(src_a, src_b) - except OSError: - samefile = False - - if not samefile: + if not _samefile(src_a, src_b): # Distinct files produce a conflict. self.file_conflicts.append( MergeConflict(dst=proj_rel_path, src_a=src_a, src_b=src_b) @@ -259,7 +267,6 @@ def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = Fa existing_proj_rel_path, _, _ = self._file(proj_rel_path) self._del_file(existing_proj_rel_path) self._add_file(proj_rel_path, root, rel_path) - else: # Otherwise register this file to be linked. self._add_file(proj_rel_path, root, rel_path) diff --git a/lib/spack/spack/test/llnl/util/link_tree.py b/lib/spack/spack/test/llnl/util/link_tree.py index 459b40ef16c..3b3c9ff297a 100644 --- a/lib/spack/spack/test/llnl/util/link_tree.py +++ b/lib/spack/spack/test/llnl/util/link_tree.py @@ -341,39 +341,53 @@ def test_destination_merge_visitor_file_dir_clashes(tmpdir): assert b_to_a.fatal_conflicts[0].dst == "example" -def test_source_merge_visitor_does_not_register_identical_file_conflicts(tmp_path: pathlib.Path): - """Tests whether the SourceMergeVisitor does not register identical file conflicts. - but instead registers the file that triggers the potential conflict.""" - (tmp_path / "dir_bottom").mkdir() - (tmp_path / "dir_bottom" / "file").write_bytes(b"hello") +@pytest.mark.parametrize("normalize", [True, False]) +def test_source_merge_visitor_handles_same_file_gracefully( + tmp_path: pathlib.Path, normalize: bool +): + """Symlinked files/dirs from one prefix to the other are not file or fatal conflicts, they are + resolved by taking the underlying file/dir, and this does not depend on the order prefixes + are visited.""" - (tmp_path / "dir_top").mkdir() - (tmp_path / "dir_top" / "file").symlink_to(tmp_path / "dir_bottom" / "file") - (tmp_path / "dir_top" / "zzzz").write_bytes(b"hello") + def u(path: str) -> str: + return path.upper() if normalize else path - visitor = SourceMergeVisitor() - visitor.set_projection(str(tmp_path / "view")) + (tmp_path / "a").mkdir() + (tmp_path / "a" / "file").write_bytes(b"hello") + (tmp_path / "a" / "dir").mkdir() + (tmp_path / "a" / "dir" / "foo").write_bytes(b"hello") - visit_directory_tree(str(tmp_path / "dir_top"), visitor) + (tmp_path / "b").mkdir() + (tmp_path / "b" / u("file")).symlink_to(tmp_path / "a" / "file") + (tmp_path / "b" / u("dir")).symlink_to(tmp_path / "a" / "dir") + (tmp_path / "b" / "bar").write_bytes(b"hello") - # After visiting the top dir, we should have `file` and `zzzz` listed, in that order. Using - # .items() to test order. - assert list(visitor.files.items()) == [ - (str(tmp_path / "view" / "file"), (str(tmp_path / "dir_top"), "file")), - (str(tmp_path / "view" / "zzzz"), (str(tmp_path / "dir_top"), "zzzz")), - ] + visitor_1 = SourceMergeVisitor(normalize_paths=normalize) + visitor_1.set_projection(str(tmp_path / "view")) + for p in ("a", "b"): + visit_directory_tree(str(tmp_path / p), visitor_1) - # Then after visiting the bottom dir, the "conflict" should be resolved, and `file` should - # come from the bottom dir. - visit_directory_tree(str(tmp_path / "dir_bottom"), visitor) - assert not visitor.file_conflicts - assert list(visitor.files.items()) == [ - (str(tmp_path / "view" / "zzzz"), (str(tmp_path / "dir_top"), "zzzz")), - (str(tmp_path / "view" / "file"), (str(tmp_path / "dir_bottom"), "file")), - ] + visitor_2 = SourceMergeVisitor(normalize_paths=normalize) + visitor_2.set_projection(str(tmp_path / "view")) + for p in ("b", "a"): + visit_directory_tree(str(tmp_path / p), visitor_2) + + assert not visitor_1.file_conflicts and not visitor_2.file_conflicts + assert not visitor_1.fatal_conflicts and not visitor_2.fatal_conflicts + assert ( + sorted(visitor_1.files.items()) + == sorted(visitor_2.files.items()) + == [ + (str(tmp_path / "view" / "bar"), (str(tmp_path / "b"), "bar")), + (str(tmp_path / "view" / "dir" / "foo"), (str(tmp_path / "a"), f"dir{os.sep}foo")), + (str(tmp_path / "view" / "file"), (str(tmp_path / "a"), "file")), + ] + ) + assert visitor_1.directories[str(tmp_path / "view" / "dir")] == (str(tmp_path / "a"), "dir") + assert visitor_2.directories[str(tmp_path / "view" / "dir")] == (str(tmp_path / "a"), "dir") -def test_source_merge_visitor_does_deals_with_dangling_symlinks(tmp_path: pathlib.Path): +def test_source_merge_visitor_deals_with_dangling_symlinks(tmp_path: pathlib.Path): """When a file and a dangling symlink conflict, this should be handled like a file conflict.""" (tmp_path / "dir_a").mkdir() os.symlink("non-existent", str(tmp_path / "dir_a" / "file")) @@ -398,227 +412,125 @@ def test_source_merge_visitor_does_deals_with_dangling_symlinks(tmp_path: pathli assert visitor.files == {str(tmp_path / "view" / "file"): (str(tmp_path / "dir_a"), "file")} -def test_source_visitor_no_path_normalization(tmp_path: pathlib.Path): - src = str(tmp_path / "a") +@pytest.mark.parametrize("normalize", [True, False]) +def test_source_visitor_file_file(tmp_path: pathlib.Path, normalize: bool): + (tmp_path / "a").mkdir() + (tmp_path / "b").mkdir() + (tmp_path / "a" / "file").write_bytes(b"") + (tmp_path / "b" / "FILE").write_bytes(b"") - a = SourceMergeVisitor(normalize_paths=False) - a.visit_file(src, "file", 0) - a.visit_file(src, "FILE", 0) - assert len(a.files) == 2 - assert len(a.directories) == 0 - assert "file" in a.files and "FILE" in a.files - assert len(a.file_conflicts) == 0 + v = SourceMergeVisitor(normalize_paths=normalize) + for p in ("a", "b"): + visit_directory_tree(str(tmp_path / p), v) - a = SourceMergeVisitor(normalize_paths=False) - a.visit_file(src, "file", 0) - a.before_visit_dir(src, "FILE", 0) - assert len(a.files) == 1 - assert "file" in a.files and "FILE" not in a.files - assert len(a.directories) == 1 - assert "FILE" in a.directories - assert len(a.fatal_conflicts) == 0 - assert len(a.file_conflicts) == 0 - - # without normalization, order doesn't matter - a = SourceMergeVisitor(normalize_paths=False) - a.before_visit_dir(src, "FILE", 0) - a.visit_file(src, "file", 0) - assert len(a.files) == 1 - assert "file" in a.files and "FILE" not in a.files - assert len(a.directories) == 1 - assert "FILE" in a.directories - assert len(a.fatal_conflicts) == 0 - assert len(a.file_conflicts) == 0 - - a = SourceMergeVisitor(normalize_paths=False) - a.before_visit_dir(src, "FILE", 0) - a.before_visit_dir(src, "file", 0) - assert len(a.files) == 0 - assert len(a.directories) == 2 - assert "FILE" in a.directories and "file" in a.directories - assert len(a.fatal_conflicts) == 0 - assert len(a.file_conflicts) == 0 + if normalize: + assert len(v.files) == 1 + assert len(v.directories) == 0 + assert "file" in v.files # first file wins + assert len(v.file_conflicts) == 1 + else: + assert len(v.files) == 2 + assert len(v.directories) == 0 + assert "file" in v.files and "FILE" in v.files + assert not v.fatal_conflicts + assert not v.file_conflicts -def test_source_visitor_path_normalization(tmp_path: pathlib.Path, monkeypatch): - src_a = str(tmp_path / "a") - src_b = str(tmp_path / "b") +@pytest.mark.parametrize("normalize", [True, False]) +def test_source_visitor_file_dir(tmp_path: pathlib.Path, normalize: bool): + (tmp_path / "a").mkdir() + (tmp_path / "a" / "file").write_bytes(b"") + (tmp_path / "b").mkdir() + (tmp_path / "b" / "FILE").mkdir() + v1 = SourceMergeVisitor(normalize_paths=normalize) + for p in ("a", "b"): + visit_directory_tree(str(tmp_path / p), v1) + v2 = SourceMergeVisitor(normalize_paths=normalize) + for p in ("b", "a"): + visit_directory_tree(str(tmp_path / p), v2) - os.mkdir(src_a) - os.mkdir(src_b) + assert not v1.file_conflicts and not v2.file_conflicts - file = os.path.join(src_a, "file") - FILE = os.path.join(src_b, "FILE") - - with open(file, "wb"): - pass - - with open(FILE, "wb"): - pass - - assert os.path.exists(file) - assert os.path.exists(FILE) - - # file conflict with os.path.samefile reporting it's NOT the same file - a = SourceMergeVisitor(normalize_paths=True) - a.visit_file(src_a, "file", 0) - a.visit_file(src_b, "FILE", 0) - assert a.files - assert len(a.files) == 1 - # first file wins - assert "file" in a.files - # this is a conflict since the files are reported to be distinct - assert len(a.file_conflicts) == 1 - assert "FILE" in [c.dst for c in a.file_conflicts] - - os.remove(FILE) - os.link(file, FILE) - - assert os.path.exists(file) - assert os.path.exists(FILE) - assert os.path.samefile(file, FILE) - - # file conflict with os.path.samefile reporting it's the same file - a = SourceMergeVisitor(normalize_paths=True) - a.visit_file(src_a, "file", 0) - a.visit_file(src_b, "FILE", 0) - assert a.files - assert len(a.files) == 1 - # second file wins - assert "FILE" in a.files - # not a conflict - assert len(a.file_conflicts) == 0 - - a = SourceMergeVisitor(normalize_paths=True) - a.visit_file(src_a, "file", 0) - a.before_visit_dir(src_a, "FILE", 0) - assert a.files - assert len(a.files) == 1 - assert "file" in a.files - assert len(a.directories) == 0 - assert len(a.fatal_conflicts) == 1 - conflicts = [c.dst for c in a.fatal_conflicts] - assert "FILE" in conflicts - - a = SourceMergeVisitor(normalize_paths=True) - a.before_visit_dir(src_a, "FILE", 0) - a.visit_file(src_a, "file", 0) - assert len(a.directories) == 1 - assert "FILE" in a.directories - assert len(a.files) == 0 - assert len(a.fatal_conflicts) == 1 - conflicts = [c.dst for c in a.fatal_conflicts] - assert "file" in conflicts - - a = SourceMergeVisitor(normalize_paths=True) - a.before_visit_dir(src_a, "FILE", 0) - a.before_visit_dir(src_a, "file", 0) - assert len(a.directories) == 1 - # first dir wins - assert "FILE" in a.directories - assert len(a.files) == 0 - assert len(a.fatal_conflicts) == 0 + if normalize: + assert len(v1.fatal_conflicts) == len(v2.fatal_conflicts) == 1 + else: + assert len(v1.files) == len(v2.files) == 1 + assert "file" in v1.files and "file" in v2.files + assert len(v1.directories) == len(v2.directories) == 1 + assert "FILE" in v1.directories and "FILE" in v2.directories + assert not v1.fatal_conflicts and not v2.fatal_conflicts -def test_destination_visitor_no_path_normalization(tmp_path: pathlib.Path): - src = str(tmp_path / "a") - dest = str(tmp_path / "b") +@pytest.mark.parametrize("normalize", [True, False]) +def test_source_visitor_dir_dir(tmp_path: pathlib.Path, normalize: bool): + (tmp_path / "a").mkdir() + (tmp_path / "a" / "dir").mkdir() + (tmp_path / "b").mkdir() + (tmp_path / "b" / "DIR").mkdir() + v = SourceMergeVisitor(normalize_paths=normalize) + for p in ("a", "b"): + visit_directory_tree(str(tmp_path / p), v) - src_visitor = SourceMergeVisitor(normalize_paths=False) - src_visitor.visit_file(src, "file", 0) - assert len(src_visitor.files) == 1 - assert len(src_visitor.directories) == 0 - assert "file" in src_visitor.files + assert not v.files + assert not v.fatal_conflicts + assert not v.file_conflicts - dest_visitor = DestinationMergeVisitor(src_visitor) - dest_visitor.visit_file(dest, "FILE", 0) - # not a conflict, since normalization is off - assert len(dest_visitor.src.files) == 1 - assert len(dest_visitor.src.directories) == 0 - assert "file" in dest_visitor.src.files - assert len(dest_visitor.src.fatal_conflicts) == 0 - assert len(dest_visitor.src.file_conflicts) == 0 - - src_visitor = SourceMergeVisitor(normalize_paths=False) - src_visitor.visit_file(src, "file", 0) - dest_visitor = DestinationMergeVisitor(src_visitor) - dest_visitor.before_visit_dir(dest, "FILE", 0) - assert len(dest_visitor.src.files) == 1 - assert "file" in dest_visitor.src.files - assert len(dest_visitor.src.directories) == 0 - assert len(dest_visitor.src.fatal_conflicts) == 0 - assert len(dest_visitor.src.file_conflicts) == 0 - - # not insensitive, order does not matter - src_visitor = SourceMergeVisitor(normalize_paths=False) - src_visitor.before_visit_dir(src, "file", 0) - dest_visitor = DestinationMergeVisitor(src_visitor) - dest_visitor.visit_file(dest, "FILE", 0) - assert len(dest_visitor.src.files) == 0 - assert len(dest_visitor.src.directories) == 1 - assert "file" in dest_visitor.src.directories - assert len(dest_visitor.src.fatal_conflicts) == 0 - assert len(dest_visitor.src.file_conflicts) == 0 - - src_visitor = SourceMergeVisitor(normalize_paths=False) - src_visitor.before_visit_dir(src, "file", 0) - dest_visitor = DestinationMergeVisitor(src_visitor) - dest_visitor.before_visit_dir(dest, "FILE", 0) - assert len(dest_visitor.src.files) == 0 - assert len(dest_visitor.src.directories) == 1 - assert "file" in dest_visitor.src.directories - assert len(dest_visitor.src.fatal_conflicts) == 0 - assert len(dest_visitor.src.file_conflicts) == 0 + if normalize: + assert len(v.directories) == 1 + assert "dir" in v.directories + else: + assert len(v.directories) == 2 + assert "DIR" in v.directories and "dir" in v.directories -def test_destination_visitor_path_normalization(tmp_path: pathlib.Path): - src = str(tmp_path / "a") - dest = str(tmp_path / "b") +@pytest.mark.parametrize("normalize", [True, False]) +def test_dst_visitor_file_file(tmp_path: pathlib.Path, normalize: bool): + (tmp_path / "a").mkdir() + (tmp_path / "b").mkdir() + (tmp_path / "a" / "file").write_bytes(b"") + (tmp_path / "b" / "FILE").write_bytes(b"") - src_visitor = SourceMergeVisitor(normalize_paths=True) - src_visitor.visit_file(src, "file", 0) - assert len(src_visitor.files) == 1 - assert len(src_visitor.directories) == 0 - assert "file" in src_visitor.files + src = SourceMergeVisitor(normalize_paths=normalize) + visit_directory_tree(str(tmp_path / "a"), src) + visit_directory_tree(str(tmp_path / "b"), DestinationMergeVisitor(src)) - dest_visitor = DestinationMergeVisitor(src_visitor) - dest_visitor.visit_file(dest, "FILE", 0) - assert len(dest_visitor.src.files) == 1 - assert len(dest_visitor.src.directories) == 0 - assert "file" in dest_visitor.src.files - assert len(dest_visitor.src.fatal_conflicts) == 1 - assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts] - assert len(dest_visitor.src.file_conflicts) == 0 + assert len(src.files) == 1 + assert len(src.directories) == 0 + assert "file" in src.files + assert not src.file_conflicts - src_visitor = SourceMergeVisitor(normalize_paths=True) - src_visitor.visit_file(src, "file", 0) - dest_visitor = DestinationMergeVisitor(src_visitor) - dest_visitor.before_visit_dir(dest, "FILE", 0) - assert len(dest_visitor.src.files) == 1 - assert "file" in dest_visitor.src.files - assert len(dest_visitor.src.directories) == 0 - assert len(dest_visitor.src.fatal_conflicts) == 1 - assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts] - assert len(dest_visitor.src.file_conflicts) == 0 + if normalize: + assert len(src.fatal_conflicts) == 1 + assert "FILE" in [c.dst for c in src.fatal_conflicts] + else: + assert not src.fatal_conflicts - src_visitor = SourceMergeVisitor(normalize_paths=True) - src_visitor.before_visit_dir(src, "file", 0) - dest_visitor = DestinationMergeVisitor(src_visitor) - dest_visitor.visit_file(dest, "FILE", 0) - assert len(dest_visitor.src.files) == 0 - assert len(dest_visitor.src.directories) == 1 - assert "file" in dest_visitor.src.directories - assert len(dest_visitor.src.fatal_conflicts) == 1 - assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts] - assert len(dest_visitor.src.file_conflicts) == 0 - src_visitor = SourceMergeVisitor(normalize_paths=True) - src_visitor.before_visit_dir(src, "file", 0) - dest_visitor = DestinationMergeVisitor(src_visitor) - dest_visitor.before_visit_dir(dest, "FILE", 0) - assert len(dest_visitor.src.files) == 0 - # this removes the mkdir action, no directory left over - assert len(dest_visitor.src.directories) == 0 - # but it's also not a conflict - assert len(dest_visitor.src.fatal_conflicts) == 0 - assert len(dest_visitor.src.file_conflicts) == 0 +@pytest.mark.parametrize("normalize", [True, False]) +def test_dst_visitor_file_dir(tmp_path: pathlib.Path, normalize: bool): + (tmp_path / "a").mkdir() + (tmp_path / "a" / "file").write_bytes(b"") + (tmp_path / "b").mkdir() + (tmp_path / "b" / "FILE").mkdir() + src1 = SourceMergeVisitor(normalize_paths=normalize) + visit_directory_tree(str(tmp_path / "a"), src1) + visit_directory_tree(str(tmp_path / "b"), DestinationMergeVisitor(src1)) + src2 = SourceMergeVisitor(normalize_paths=normalize) + visit_directory_tree(str(tmp_path / "b"), src2) + visit_directory_tree(str(tmp_path / "a"), DestinationMergeVisitor(src2)) + + assert len(src1.files) == 1 + assert "file" in src1.files + assert not src1.directories + assert not src2.file_conflicts + assert len(src2.directories) == 1 + + if normalize: + assert len(src1.fatal_conflicts) == 1 + assert "FILE" in [c.dst for c in src1.fatal_conflicts] + assert not src2.files + assert len(src2.fatal_conflicts) == 1 + assert "file" in [c.dst for c in src2.fatal_conflicts] + else: + assert not src1.fatal_conflicts and not src2.fatal_conflicts + assert not src1.file_conflicts and not src2.file_conflicts