views: resolve symlinked dir - dir conflict when same file (#49039)

A directory and a symlink to it under the same relative path in a
different prefix

```
/prefix1/dir/
/prefix1/dir/file
/prefix2/dir -> /prefix1/dir/
```

are not a blocker to create a view. The view structure simply looks like
this:

```
/view/dir/
/view/dir/file
```

This should be the case independently of the order in which we visit
prefixes, so we could in principle create views order independently.
This commit is contained in:
Harmen Stoppels 2025-02-18 06:53:17 +01:00 committed by GitHub
parent d6669845ed
commit e4a8d45d86
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 178 additions and 259 deletions

View File

@ -41,6 +41,16 @@ def __init__(self, dst, src_a=None, src_b=None):
self.src_a = src_a
self.src_b = src_b
def __repr__(self) -> str:
return f"MergeConflict(dst={self.dst!r}, src_a={self.src_a!r}, src_b={self.src_b!r})"
def _samefile(a: str, b: str):
try:
return os.path.samefile(a, b)
except OSError:
return False
class SourceMergeVisitor(BaseDirectoryVisitor):
"""
@ -168,16 +178,21 @@ def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
# Don't recurse when dir is ignored.
return False
elif self._in_files(proj_rel_path):
# Can't create a dir where a file is.
_, src_a_root, src_a_relpath = self._file(proj_rel_path)
self.fatal_conflicts.append(
MergeConflict(
dst=proj_rel_path,
src_a=os.path.join(src_a_root, src_a_relpath),
src_b=os.path.join(root, rel_path),
# A file-dir conflict is fatal except if they're the same file (symlinked dir).
src_a = os.path.join(*self._file(proj_rel_path))
src_b = os.path.join(root, rel_path)
if not _samefile(src_a, src_b):
self.fatal_conflicts.append(
MergeConflict(dst=proj_rel_path, src_a=src_a, src_b=src_b)
)
)
return False
return False
# Remove the link in favor of the dir.
existing_proj_rel_path, _, _ = self._file(proj_rel_path)
self._del_file(existing_proj_rel_path)
self._add_directory(proj_rel_path, root, rel_path)
return True
elif self._in_directories(proj_rel_path):
# No new directory, carry on.
return True
@ -215,7 +230,7 @@ def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bo
if handle_as_dir:
return self.before_visit_dir(root, rel_path, depth)
self.visit_file(root, rel_path, depth)
self.visit_file(root, rel_path, depth, symlink=True)
return False
def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = False) -> None:
@ -224,29 +239,22 @@ def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = Fa
if self.ignore(rel_path):
pass
elif self._in_directories(proj_rel_path):
# Can't create a file where a dir is; fatal error
self.fatal_conflicts.append(
MergeConflict(
dst=proj_rel_path,
src_a=os.path.join(*self._directory(proj_rel_path)),
src_b=os.path.join(root, rel_path),
# Can't create a file where a dir is, unless they are the same file (symlinked dir),
# in which case we simply drop the symlink in favor of the actual dir.
src_a = os.path.join(*self._directory(proj_rel_path))
src_b = os.path.join(root, rel_path)
if not symlink or not _samefile(src_a, src_b):
self.fatal_conflicts.append(
MergeConflict(dst=proj_rel_path, src_a=src_a, src_b=src_b)
)
)
elif self._in_files(proj_rel_path):
# When two files project to the same path, they conflict iff they are distinct.
# If they are the same (i.e. one links to the other), register regular files rather
# than symlinks. The reason is that in copy-type views, we need a copy of the actual
# file, not the symlink.
src_a = os.path.join(*self._file(proj_rel_path))
src_b = os.path.join(root, rel_path)
try:
samefile = os.path.samefile(src_a, src_b)
except OSError:
samefile = False
if not samefile:
if not _samefile(src_a, src_b):
# Distinct files produce a conflict.
self.file_conflicts.append(
MergeConflict(dst=proj_rel_path, src_a=src_a, src_b=src_b)
@ -259,7 +267,6 @@ def visit_file(self, root: str, rel_path: str, depth: int, *, symlink: bool = Fa
existing_proj_rel_path, _, _ = self._file(proj_rel_path)
self._del_file(existing_proj_rel_path)
self._add_file(proj_rel_path, root, rel_path)
else:
# Otherwise register this file to be linked.
self._add_file(proj_rel_path, root, rel_path)

View File

@ -341,39 +341,53 @@ def test_destination_merge_visitor_file_dir_clashes(tmpdir):
assert b_to_a.fatal_conflicts[0].dst == "example"
def test_source_merge_visitor_does_not_register_identical_file_conflicts(tmp_path: pathlib.Path):
"""Tests whether the SourceMergeVisitor does not register identical file conflicts.
but instead registers the file that triggers the potential conflict."""
(tmp_path / "dir_bottom").mkdir()
(tmp_path / "dir_bottom" / "file").write_bytes(b"hello")
@pytest.mark.parametrize("normalize", [True, False])
def test_source_merge_visitor_handles_same_file_gracefully(
tmp_path: pathlib.Path, normalize: bool
):
"""Symlinked files/dirs from one prefix to the other are not file or fatal conflicts, they are
resolved by taking the underlying file/dir, and this does not depend on the order prefixes
are visited."""
(tmp_path / "dir_top").mkdir()
(tmp_path / "dir_top" / "file").symlink_to(tmp_path / "dir_bottom" / "file")
(tmp_path / "dir_top" / "zzzz").write_bytes(b"hello")
def u(path: str) -> str:
return path.upper() if normalize else path
visitor = SourceMergeVisitor()
visitor.set_projection(str(tmp_path / "view"))
(tmp_path / "a").mkdir()
(tmp_path / "a" / "file").write_bytes(b"hello")
(tmp_path / "a" / "dir").mkdir()
(tmp_path / "a" / "dir" / "foo").write_bytes(b"hello")
visit_directory_tree(str(tmp_path / "dir_top"), visitor)
(tmp_path / "b").mkdir()
(tmp_path / "b" / u("file")).symlink_to(tmp_path / "a" / "file")
(tmp_path / "b" / u("dir")).symlink_to(tmp_path / "a" / "dir")
(tmp_path / "b" / "bar").write_bytes(b"hello")
# After visiting the top dir, we should have `file` and `zzzz` listed, in that order. Using
# .items() to test order.
assert list(visitor.files.items()) == [
(str(tmp_path / "view" / "file"), (str(tmp_path / "dir_top"), "file")),
(str(tmp_path / "view" / "zzzz"), (str(tmp_path / "dir_top"), "zzzz")),
]
visitor_1 = SourceMergeVisitor(normalize_paths=normalize)
visitor_1.set_projection(str(tmp_path / "view"))
for p in ("a", "b"):
visit_directory_tree(str(tmp_path / p), visitor_1)
# Then after visiting the bottom dir, the "conflict" should be resolved, and `file` should
# come from the bottom dir.
visit_directory_tree(str(tmp_path / "dir_bottom"), visitor)
assert not visitor.file_conflicts
assert list(visitor.files.items()) == [
(str(tmp_path / "view" / "zzzz"), (str(tmp_path / "dir_top"), "zzzz")),
(str(tmp_path / "view" / "file"), (str(tmp_path / "dir_bottom"), "file")),
]
visitor_2 = SourceMergeVisitor(normalize_paths=normalize)
visitor_2.set_projection(str(tmp_path / "view"))
for p in ("b", "a"):
visit_directory_tree(str(tmp_path / p), visitor_2)
assert not visitor_1.file_conflicts and not visitor_2.file_conflicts
assert not visitor_1.fatal_conflicts and not visitor_2.fatal_conflicts
assert (
sorted(visitor_1.files.items())
== sorted(visitor_2.files.items())
== [
(str(tmp_path / "view" / "bar"), (str(tmp_path / "b"), "bar")),
(str(tmp_path / "view" / "dir" / "foo"), (str(tmp_path / "a"), f"dir{os.sep}foo")),
(str(tmp_path / "view" / "file"), (str(tmp_path / "a"), "file")),
]
)
assert visitor_1.directories[str(tmp_path / "view" / "dir")] == (str(tmp_path / "a"), "dir")
assert visitor_2.directories[str(tmp_path / "view" / "dir")] == (str(tmp_path / "a"), "dir")
def test_source_merge_visitor_does_deals_with_dangling_symlinks(tmp_path: pathlib.Path):
def test_source_merge_visitor_deals_with_dangling_symlinks(tmp_path: pathlib.Path):
"""When a file and a dangling symlink conflict, this should be handled like a file conflict."""
(tmp_path / "dir_a").mkdir()
os.symlink("non-existent", str(tmp_path / "dir_a" / "file"))
@ -398,227 +412,125 @@ def test_source_merge_visitor_does_deals_with_dangling_symlinks(tmp_path: pathli
assert visitor.files == {str(tmp_path / "view" / "file"): (str(tmp_path / "dir_a"), "file")}
def test_source_visitor_no_path_normalization(tmp_path: pathlib.Path):
src = str(tmp_path / "a")
@pytest.mark.parametrize("normalize", [True, False])
def test_source_visitor_file_file(tmp_path: pathlib.Path, normalize: bool):
(tmp_path / "a").mkdir()
(tmp_path / "b").mkdir()
(tmp_path / "a" / "file").write_bytes(b"")
(tmp_path / "b" / "FILE").write_bytes(b"")
a = SourceMergeVisitor(normalize_paths=False)
a.visit_file(src, "file", 0)
a.visit_file(src, "FILE", 0)
assert len(a.files) == 2
assert len(a.directories) == 0
assert "file" in a.files and "FILE" in a.files
assert len(a.file_conflicts) == 0
v = SourceMergeVisitor(normalize_paths=normalize)
for p in ("a", "b"):
visit_directory_tree(str(tmp_path / p), v)
a = SourceMergeVisitor(normalize_paths=False)
a.visit_file(src, "file", 0)
a.before_visit_dir(src, "FILE", 0)
assert len(a.files) == 1
assert "file" in a.files and "FILE" not in a.files
assert len(a.directories) == 1
assert "FILE" in a.directories
assert len(a.fatal_conflicts) == 0
assert len(a.file_conflicts) == 0
# without normalization, order doesn't matter
a = SourceMergeVisitor(normalize_paths=False)
a.before_visit_dir(src, "FILE", 0)
a.visit_file(src, "file", 0)
assert len(a.files) == 1
assert "file" in a.files and "FILE" not in a.files
assert len(a.directories) == 1
assert "FILE" in a.directories
assert len(a.fatal_conflicts) == 0
assert len(a.file_conflicts) == 0
a = SourceMergeVisitor(normalize_paths=False)
a.before_visit_dir(src, "FILE", 0)
a.before_visit_dir(src, "file", 0)
assert len(a.files) == 0
assert len(a.directories) == 2
assert "FILE" in a.directories and "file" in a.directories
assert len(a.fatal_conflicts) == 0
assert len(a.file_conflicts) == 0
if normalize:
assert len(v.files) == 1
assert len(v.directories) == 0
assert "file" in v.files # first file wins
assert len(v.file_conflicts) == 1
else:
assert len(v.files) == 2
assert len(v.directories) == 0
assert "file" in v.files and "FILE" in v.files
assert not v.fatal_conflicts
assert not v.file_conflicts
def test_source_visitor_path_normalization(tmp_path: pathlib.Path, monkeypatch):
src_a = str(tmp_path / "a")
src_b = str(tmp_path / "b")
@pytest.mark.parametrize("normalize", [True, False])
def test_source_visitor_file_dir(tmp_path: pathlib.Path, normalize: bool):
(tmp_path / "a").mkdir()
(tmp_path / "a" / "file").write_bytes(b"")
(tmp_path / "b").mkdir()
(tmp_path / "b" / "FILE").mkdir()
v1 = SourceMergeVisitor(normalize_paths=normalize)
for p in ("a", "b"):
visit_directory_tree(str(tmp_path / p), v1)
v2 = SourceMergeVisitor(normalize_paths=normalize)
for p in ("b", "a"):
visit_directory_tree(str(tmp_path / p), v2)
os.mkdir(src_a)
os.mkdir(src_b)
assert not v1.file_conflicts and not v2.file_conflicts
file = os.path.join(src_a, "file")
FILE = os.path.join(src_b, "FILE")
with open(file, "wb"):
pass
with open(FILE, "wb"):
pass
assert os.path.exists(file)
assert os.path.exists(FILE)
# file conflict with os.path.samefile reporting it's NOT the same file
a = SourceMergeVisitor(normalize_paths=True)
a.visit_file(src_a, "file", 0)
a.visit_file(src_b, "FILE", 0)
assert a.files
assert len(a.files) == 1
# first file wins
assert "file" in a.files
# this is a conflict since the files are reported to be distinct
assert len(a.file_conflicts) == 1
assert "FILE" in [c.dst for c in a.file_conflicts]
os.remove(FILE)
os.link(file, FILE)
assert os.path.exists(file)
assert os.path.exists(FILE)
assert os.path.samefile(file, FILE)
# file conflict with os.path.samefile reporting it's the same file
a = SourceMergeVisitor(normalize_paths=True)
a.visit_file(src_a, "file", 0)
a.visit_file(src_b, "FILE", 0)
assert a.files
assert len(a.files) == 1
# second file wins
assert "FILE" in a.files
# not a conflict
assert len(a.file_conflicts) == 0
a = SourceMergeVisitor(normalize_paths=True)
a.visit_file(src_a, "file", 0)
a.before_visit_dir(src_a, "FILE", 0)
assert a.files
assert len(a.files) == 1
assert "file" in a.files
assert len(a.directories) == 0
assert len(a.fatal_conflicts) == 1
conflicts = [c.dst for c in a.fatal_conflicts]
assert "FILE" in conflicts
a = SourceMergeVisitor(normalize_paths=True)
a.before_visit_dir(src_a, "FILE", 0)
a.visit_file(src_a, "file", 0)
assert len(a.directories) == 1
assert "FILE" in a.directories
assert len(a.files) == 0
assert len(a.fatal_conflicts) == 1
conflicts = [c.dst for c in a.fatal_conflicts]
assert "file" in conflicts
a = SourceMergeVisitor(normalize_paths=True)
a.before_visit_dir(src_a, "FILE", 0)
a.before_visit_dir(src_a, "file", 0)
assert len(a.directories) == 1
# first dir wins
assert "FILE" in a.directories
assert len(a.files) == 0
assert len(a.fatal_conflicts) == 0
if normalize:
assert len(v1.fatal_conflicts) == len(v2.fatal_conflicts) == 1
else:
assert len(v1.files) == len(v2.files) == 1
assert "file" in v1.files and "file" in v2.files
assert len(v1.directories) == len(v2.directories) == 1
assert "FILE" in v1.directories and "FILE" in v2.directories
assert not v1.fatal_conflicts and not v2.fatal_conflicts
def test_destination_visitor_no_path_normalization(tmp_path: pathlib.Path):
src = str(tmp_path / "a")
dest = str(tmp_path / "b")
@pytest.mark.parametrize("normalize", [True, False])
def test_source_visitor_dir_dir(tmp_path: pathlib.Path, normalize: bool):
(tmp_path / "a").mkdir()
(tmp_path / "a" / "dir").mkdir()
(tmp_path / "b").mkdir()
(tmp_path / "b" / "DIR").mkdir()
v = SourceMergeVisitor(normalize_paths=normalize)
for p in ("a", "b"):
visit_directory_tree(str(tmp_path / p), v)
src_visitor = SourceMergeVisitor(normalize_paths=False)
src_visitor.visit_file(src, "file", 0)
assert len(src_visitor.files) == 1
assert len(src_visitor.directories) == 0
assert "file" in src_visitor.files
assert not v.files
assert not v.fatal_conflicts
assert not v.file_conflicts
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.visit_file(dest, "FILE", 0)
# not a conflict, since normalization is off
assert len(dest_visitor.src.files) == 1
assert len(dest_visitor.src.directories) == 0
assert "file" in dest_visitor.src.files
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
src_visitor = SourceMergeVisitor(normalize_paths=False)
src_visitor.visit_file(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.before_visit_dir(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 1
assert "file" in dest_visitor.src.files
assert len(dest_visitor.src.directories) == 0
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
# not insensitive, order does not matter
src_visitor = SourceMergeVisitor(normalize_paths=False)
src_visitor.before_visit_dir(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.visit_file(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 0
assert len(dest_visitor.src.directories) == 1
assert "file" in dest_visitor.src.directories
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
src_visitor = SourceMergeVisitor(normalize_paths=False)
src_visitor.before_visit_dir(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.before_visit_dir(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 0
assert len(dest_visitor.src.directories) == 1
assert "file" in dest_visitor.src.directories
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
if normalize:
assert len(v.directories) == 1
assert "dir" in v.directories
else:
assert len(v.directories) == 2
assert "DIR" in v.directories and "dir" in v.directories
def test_destination_visitor_path_normalization(tmp_path: pathlib.Path):
src = str(tmp_path / "a")
dest = str(tmp_path / "b")
@pytest.mark.parametrize("normalize", [True, False])
def test_dst_visitor_file_file(tmp_path: pathlib.Path, normalize: bool):
(tmp_path / "a").mkdir()
(tmp_path / "b").mkdir()
(tmp_path / "a" / "file").write_bytes(b"")
(tmp_path / "b" / "FILE").write_bytes(b"")
src_visitor = SourceMergeVisitor(normalize_paths=True)
src_visitor.visit_file(src, "file", 0)
assert len(src_visitor.files) == 1
assert len(src_visitor.directories) == 0
assert "file" in src_visitor.files
src = SourceMergeVisitor(normalize_paths=normalize)
visit_directory_tree(str(tmp_path / "a"), src)
visit_directory_tree(str(tmp_path / "b"), DestinationMergeVisitor(src))
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.visit_file(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 1
assert len(dest_visitor.src.directories) == 0
assert "file" in dest_visitor.src.files
assert len(dest_visitor.src.fatal_conflicts) == 1
assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts]
assert len(dest_visitor.src.file_conflicts) == 0
assert len(src.files) == 1
assert len(src.directories) == 0
assert "file" in src.files
assert not src.file_conflicts
src_visitor = SourceMergeVisitor(normalize_paths=True)
src_visitor.visit_file(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.before_visit_dir(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 1
assert "file" in dest_visitor.src.files
assert len(dest_visitor.src.directories) == 0
assert len(dest_visitor.src.fatal_conflicts) == 1
assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts]
assert len(dest_visitor.src.file_conflicts) == 0
if normalize:
assert len(src.fatal_conflicts) == 1
assert "FILE" in [c.dst for c in src.fatal_conflicts]
else:
assert not src.fatal_conflicts
src_visitor = SourceMergeVisitor(normalize_paths=True)
src_visitor.before_visit_dir(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.visit_file(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 0
assert len(dest_visitor.src.directories) == 1
assert "file" in dest_visitor.src.directories
assert len(dest_visitor.src.fatal_conflicts) == 1
assert "FILE" in [c.dst for c in dest_visitor.src.fatal_conflicts]
assert len(dest_visitor.src.file_conflicts) == 0
src_visitor = SourceMergeVisitor(normalize_paths=True)
src_visitor.before_visit_dir(src, "file", 0)
dest_visitor = DestinationMergeVisitor(src_visitor)
dest_visitor.before_visit_dir(dest, "FILE", 0)
assert len(dest_visitor.src.files) == 0
# this removes the mkdir action, no directory left over
assert len(dest_visitor.src.directories) == 0
# but it's also not a conflict
assert len(dest_visitor.src.fatal_conflicts) == 0
assert len(dest_visitor.src.file_conflicts) == 0
@pytest.mark.parametrize("normalize", [True, False])
def test_dst_visitor_file_dir(tmp_path: pathlib.Path, normalize: bool):
(tmp_path / "a").mkdir()
(tmp_path / "a" / "file").write_bytes(b"")
(tmp_path / "b").mkdir()
(tmp_path / "b" / "FILE").mkdir()
src1 = SourceMergeVisitor(normalize_paths=normalize)
visit_directory_tree(str(tmp_path / "a"), src1)
visit_directory_tree(str(tmp_path / "b"), DestinationMergeVisitor(src1))
src2 = SourceMergeVisitor(normalize_paths=normalize)
visit_directory_tree(str(tmp_path / "b"), src2)
visit_directory_tree(str(tmp_path / "a"), DestinationMergeVisitor(src2))
assert len(src1.files) == 1
assert "file" in src1.files
assert not src1.directories
assert not src2.file_conflicts
assert len(src2.directories) == 1
if normalize:
assert len(src1.fatal_conflicts) == 1
assert "FILE" in [c.dst for c in src1.fatal_conflicts]
assert not src2.files
assert len(src2.fatal_conflicts) == 1
assert "file" in [c.dst for c in src2.fatal_conflicts]
else:
assert not src1.fatal_conflicts and not src2.fatal_conflicts
assert not src1.file_conflicts and not src2.file_conflicts