filesystem: cleanup (#42342)

Type hints and removal of unused code
This commit is contained in:
Harmen Stoppels
2024-01-29 15:43:17 +01:00
committed by GitHub
parent 7ec93a496d
commit 0718e3459a
4 changed files with 116 additions and 253 deletions

View File

@@ -1377,120 +1377,89 @@ def traverse_tree(
yield (source_path, dest_path)
def lexists_islink_isdir(path):
"""Computes the tuple (lexists(path), islink(path), isdir(path)) in a minimal
number of stat calls on unix. Use os.path and symlink.islink methods for windows."""
if sys.platform == "win32":
if not os.path.lexists(path):
return False, False, False
return os.path.lexists(path), islink(path), os.path.isdir(path)
# First try to lstat, so we know if it's a link or not.
try:
lst = os.lstat(path)
except (IOError, OSError):
return False, False, False
is_link = stat.S_ISLNK(lst.st_mode)
# Check whether file is a dir.
if not is_link:
is_dir = stat.S_ISDIR(lst.st_mode)
return True, is_link, is_dir
# Check whether symlink points to a dir.
try:
st = os.stat(path)
is_dir = stat.S_ISDIR(st.st_mode)
except (IOError, OSError):
# Dangling symlink (i.e. it lexists but not exists)
is_dir = False
return True, is_link, is_dir
class BaseDirectoryVisitor:
"""Base class and interface for :py:func:`visit_directory_tree`."""
def visit_file(self, root, rel_path, depth):
def visit_file(self, root: str, rel_path: str, depth: int) -> None:
"""Handle the non-symlink file at ``os.path.join(root, rel_path)``
Parameters:
root (str): root directory
rel_path (str): relative path to current file from ``root``
root: root directory
rel_path: relative path to current file from ``root``
depth (int): depth of current file from the ``root`` directory"""
pass
def visit_symlinked_file(self, root, rel_path, depth):
"""Handle the symlink to a file at ``os.path.join(root, rel_path)``.
Note: ``rel_path`` is the location of the symlink, not to what it is
pointing to. The symlink may be dangling.
def visit_symlinked_file(self, root: str, rel_path: str, depth) -> None:
"""Handle the symlink to a file at ``os.path.join(root, rel_path)``. Note: ``rel_path`` is
the location of the symlink, not to what it is pointing to. The symlink may be dangling.
Parameters:
root (str): root directory
rel_path (str): relative path to current symlink from ``root``
depth (int): depth of current symlink from the ``root`` directory"""
root: root directory
rel_path: relative path to current symlink from ``root``
depth: depth of current symlink from the ``root`` directory"""
pass
def before_visit_dir(self, root, rel_path, depth):
def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
"""Return True from this function to recurse into the directory at
os.path.join(root, rel_path). Return False in order not to recurse further.
Parameters:
root (str): root directory
rel_path (str): relative path to current directory from ``root``
depth (int): depth of current directory from the ``root`` directory
root: root directory
rel_path: relative path to current directory from ``root``
depth: depth of current directory from the ``root`` directory
Returns:
bool: ``True`` when the directory should be recursed into. ``False`` when
not"""
return False
def before_visit_symlinked_dir(self, root, rel_path, depth):
"""Return ``True`` to recurse into the symlinked directory and ``False`` in
order not to. Note: ``rel_path`` is the path to the symlink itself.
Following symlinked directories blindly can cause infinite recursion due to
cycles.
def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bool:
"""Return ``True`` to recurse into the symlinked directory and ``False`` in order not to.
Note: ``rel_path`` is the path to the symlink itself. Following symlinked directories
blindly can cause infinite recursion due to cycles.
Parameters:
root (str): root directory
rel_path (str): relative path to current symlink from ``root``
depth (int): depth of current symlink from the ``root`` directory
root: root directory
rel_path: relative path to current symlink from ``root``
depth: depth of current symlink from the ``root`` directory
Returns:
bool: ``True`` when the directory should be recursed into. ``False`` when
not"""
return False
def after_visit_dir(self, root, rel_path, depth):
"""Called after recursion into ``rel_path`` finished. This function is not
called when ``rel_path`` was not recursed into.
def after_visit_dir(self, root: str, rel_path: str, depth: int) -> None:
"""Called after recursion into ``rel_path`` finished. This function is not called when
``rel_path`` was not recursed into.
Parameters:
root (str): root directory
rel_path (str): relative path to current directory from ``root``
depth (int): depth of current directory from the ``root`` directory"""
root: root directory
rel_path: relative path to current directory from ``root``
depth: depth of current directory from the ``root`` directory"""
pass
def after_visit_symlinked_dir(self, root, rel_path, depth):
"""Called after recursion into ``rel_path`` finished. This function is not
called when ``rel_path`` was not recursed into.
def after_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> None:
"""Called after recursion into ``rel_path`` finished. This function is not called when
``rel_path`` was not recursed into.
Parameters:
root (str): root directory
rel_path (str): relative path to current symlink from ``root``
depth (int): depth of current symlink from the ``root`` directory"""
root: root directory
rel_path: relative path to current symlink from ``root``
depth: depth of current symlink from the ``root`` directory"""
pass
def visit_directory_tree(root, visitor, rel_path="", depth=0):
"""Recurses the directory root depth-first through a visitor pattern using the
interface from :py:class:`BaseDirectoryVisitor`
def visit_directory_tree(
root: str, visitor: BaseDirectoryVisitor, rel_path: str = "", depth: int = 0
):
"""Recurses the directory root depth-first through a visitor pattern using the interface from
:py:class:`BaseDirectoryVisitor`
Parameters:
root (str): path of directory to recurse into
visitor (BaseDirectoryVisitor): what visitor to use
rel_path (str): current relative path from the root
depth (str): current depth from the root
root: path of directory to recurse into
visitor: what visitor to use
rel_path: current relative path from the root
depth: current depth from the root
"""
dir = os.path.join(root, rel_path)
dir_entries = sorted(os.scandir(dir), key=lambda d: d.name)
@@ -1498,26 +1467,19 @@ def visit_directory_tree(root, visitor, rel_path="", depth=0):
for f in dir_entries:
rel_child = os.path.join(rel_path, f.name)
islink = f.is_symlink()
# On Windows, symlinks to directories are distinct from
# symlinks to files, and it is possible to create a
# broken symlink to a directory (e.g. using os.symlink
# without `target_is_directory=True`), invoking `isdir`
# on a symlink on Windows that is broken in this manner
# will result in an error. In this case we can work around
# the issue by reading the target and resolving the
# directory ourselves
# On Windows, symlinks to directories are distinct from symlinks to files, and it is
# possible to create a broken symlink to a directory (e.g. using os.symlink without
# `target_is_directory=True`), invoking `isdir` on a symlink on Windows that is broken in
# this manner will result in an error. In this case we can work around the issue by reading
# the target and resolving the directory ourselves
try:
isdir = f.is_dir()
except OSError as e:
if sys.platform == "win32" and hasattr(e, "winerror") and e.winerror == 5 and islink:
# if path is a symlink, determine destination and
# evaluate file vs directory
# if path is a symlink, determine destination and evaluate file vs directory
link_target = resolve_link_target_relative_to_the_link(f)
# link_target might be relative but
# resolve_link_target_relative_to_the_link
# will ensure that if so, that it is relative
# to the CWD and therefore
# makes sense
# link_target might be relative but resolve_link_target_relative_to_the_link
# will ensure that if so, that it is relative to the CWD and therefore makes sense
isdir = os.path.isdir(link_target)
else:
raise e

View File

@@ -8,7 +8,7 @@
import filecmp
import os
import shutil
from collections import OrderedDict
from typing import Callable, Dict, List, Optional, Tuple
import llnl.util.tty as tty
from llnl.util.filesystem import BaseDirectoryVisitor, mkdirp, touch, traverse_tree
@@ -51,32 +51,30 @@ class SourceMergeVisitor(BaseDirectoryVisitor):
- A list of merge conflicts in dst/
"""
def __init__(self, ignore=None):
def __init__(self, ignore: Optional[Callable[[str], bool]] = None):
self.ignore = ignore if ignore is not None else lambda f: False
# When mapping <src root> to <dst root>/<projection>, we need
# to prepend the <projection> bit to the relative path in the
# destination dir.
self.projection = ""
# When mapping <src root> to <dst root>/<projection>, we need to prepend the <projection>
# bit to the relative path in the destination dir.
self.projection: str = ""
# When a file blocks another file, the conflict can sometimes
# be resolved / ignored (e.g. <prefix>/LICENSE or
# or <site-packages>/<namespace>/__init__.py conflicts can be
# When a file blocks another file, the conflict can sometimes be resolved / ignored
# (e.g. <prefix>/LICENSE or <site-packages>/<namespace>/__init__.py conflicts can be
# ignored).
self.file_conflicts = []
self.file_conflicts: List[MergeConflict] = []
# When we have to create a dir where a file is, or a file
# where a dir is, we have fatal errors, listed here.
self.fatal_conflicts = []
# When we have to create a dir where a file is, or a file where a dir is, we have fatal
# errors, listed here.
self.fatal_conflicts: List[MergeConflict] = []
# What directories we have to make; this is an ordered set,
# so that we have a fast lookup and can run mkdir in order.
self.directories = OrderedDict()
# What directories we have to make; this is an ordered dict, so that we have a fast lookup
# and can run mkdir in order.
self.directories: Dict[str, Tuple[str, str]] = {}
# Files to link. Maps dst_rel to (src_root, src_rel)
self.files = OrderedDict()
self.files: Dict[str, Tuple[str, str]] = {}
def before_visit_dir(self, root, rel_path, depth):
def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
"""
Register a directory if dst / rel_path is not blocked by a file or ignored.
"""
@@ -104,7 +102,7 @@ def before_visit_dir(self, root, rel_path, depth):
self.directories[proj_rel_path] = (root, rel_path)
return True
def before_visit_symlinked_dir(self, root, rel_path, depth):
def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bool:
"""
Replace symlinked dirs with actual directories when possible in low depths,
otherwise handle it as a file (i.e. we link to the symlink).
@@ -136,7 +134,7 @@ def before_visit_symlinked_dir(self, root, rel_path, depth):
self.visit_file(root, rel_path, depth)
return False
def visit_file(self, root, rel_path, depth):
def visit_file(self, root: str, rel_path: str, depth: int) -> None:
proj_rel_path = os.path.join(self.projection, rel_path)
if self.ignore(rel_path):
@@ -165,11 +163,11 @@ def visit_file(self, root, rel_path, depth):
# Otherwise register this file to be linked.
self.files[proj_rel_path] = (root, rel_path)
def visit_symlinked_file(self, root, rel_path, depth):
def visit_symlinked_file(self, root: str, rel_path: str, depth: int) -> None:
# Treat symlinked files as ordinary files (without "dereferencing")
self.visit_file(root, rel_path, depth)
def set_projection(self, projection):
def set_projection(self, projection: str) -> None:
self.projection = os.path.normpath(projection)
# Todo, is this how to check in general for empty projection?
@@ -197,24 +195,19 @@ def set_projection(self, projection):
class DestinationMergeVisitor(BaseDirectoryVisitor):
"""DestinatinoMergeVisitor takes a SourceMergeVisitor
and:
"""DestinatinoMergeVisitor takes a SourceMergeVisitor and:
a. registers additional conflicts when merging
to the destination prefix
b. removes redundant mkdir operations when
directories already exist in the destination
prefix.
a. registers additional conflicts when merging to the destination prefix
b. removes redundant mkdir operations when directories already exist in the destination prefix.
This also makes sure that symlinked directories
in the target prefix will never be merged with
This also makes sure that symlinked directories in the target prefix will never be merged with
directories in the sources directories.
"""
def __init__(self, source_merge_visitor):
def __init__(self, source_merge_visitor: SourceMergeVisitor):
self.src = source_merge_visitor
def before_visit_dir(self, root, rel_path, depth):
def before_visit_dir(self, root: str, rel_path: str, depth: int) -> bool:
# If destination dir is a file in a src dir, add a conflict,
# and don't traverse deeper
if rel_path in self.src.files:
@@ -236,7 +229,7 @@ def before_visit_dir(self, root, rel_path, depth):
# don't descend into it.
return False
def before_visit_symlinked_dir(self, root, rel_path, depth):
def before_visit_symlinked_dir(self, root: str, rel_path: str, depth: int) -> bool:
"""
Symlinked directories in the destination prefix should
be seen as files; we should not accidentally merge
@@ -262,7 +255,7 @@ def before_visit_symlinked_dir(self, root, rel_path, depth):
# Never descend into symlinked target dirs.
return False
def visit_file(self, root, rel_path, depth):
def visit_file(self, root: str, rel_path: str, depth: int) -> None:
# Can't merge a file if target already exists
if rel_path in self.src.directories:
src_a_root, src_a_relpath = self.src.directories[rel_path]
@@ -280,7 +273,7 @@ def visit_file(self, root, rel_path, depth):
)
)
def visit_symlinked_file(self, root, rel_path, depth):
def visit_symlinked_file(self, root: str, rel_path: str, depth: int) -> None:
# Treat symlinked files as ordinary files (without "dereferencing")
self.visit_file(root, rel_path, depth)