Use st_nlink in hardlink tracking (#39328)

Only add potential hardlinks to a set/dict, instead of each file. This
should be much cheaper, since hardlinks are very rare.
This commit is contained in:
Harmen Stoppels 2023-09-28 15:24:56 +02:00 committed by GitHub
parent 78132f2d6b
commit 6d55066b94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 11 deletions

View File

@ -647,8 +647,7 @@ class BuildManifestVisitor(BaseDirectoryVisitor):
directories."""
def __init__(self):
# Save unique identifiers of files to avoid
# relocating hardlink files for each path.
# Save unique identifiers of hardlinks to avoid relocating them multiple times
self.visited = set()
# Lists of files we will check
@ -657,6 +656,8 @@ def __init__(self):
def seen_before(self, root, rel_path):
stat_result = os.lstat(os.path.join(root, rel_path))
if stat_result.st_nlink == 1:
return False
identifier = (stat_result.st_dev, stat_result.st_ino)
if identifier in self.visited:
return True
@ -1581,9 +1582,10 @@ def dedupe_hardlinks_if_necessary(root, buildinfo):
for rel_path in buildinfo[key]:
stat_result = os.lstat(os.path.join(root, rel_path))
identifier = (stat_result.st_dev, stat_result.st_ino)
if identifier in visited:
continue
visited.add(identifier)
if stat_result.st_nlink > 1:
if identifier in visited:
continue
visited.add(identifier)
new_list.append(rel_path)
buildinfo[key] = new_list

View File

@ -79,8 +79,7 @@ class ElfFilesWithRPathVisitor(BaseDirectoryVisitor):
"""Visitor that collects all elf files that have an rpath"""
def __init__(self):
# Map from (ino, dev) -> path. We need 1 path per file, if there are hardlinks,
# we don't need to store the path multiple times.
# Keep track of what hardlinked files we've already visited.
self.visited = set()
def visit_file(self, root, rel_path, depth):
@ -89,10 +88,10 @@ def visit_file(self, root, rel_path, depth):
identifier = (s.st_ino, s.st_dev)
# We're hitting a hardlink or symlink of an excluded lib, no need to parse.
if identifier in self.visited:
return
self.visited.add(identifier)
if s.st_nlink > 1:
if identifier in self.visited:
return
self.visited.add(identifier)
result = drop_redundant_rpaths(filepath)