Reduce the number of stat calls in "spack verify" (#37251)
Spack comes to a crawl post-install of nvhpc, which is partly thanks to this post install hook which has a lot of redundancy, and isn't correct. 1. There's no need to store "type" because that _is_ "mode". 2. There are more file types than "symlink", "dir", "file". 3. Don't checksum device type things 4. Don't run 3 stat calls (exists, stat, isdir/islink), but one lstat call 5. Don't read entire files into memory I also don't know why `spack.crypto` wasn't used for checksumming, but I guess it's too late for that now. Finally md5 would've been the faster algorithm, which would've been fine given that a non cryptographicall checksum was used anyways.
This commit is contained in:
parent
fc201a2b75
commit
c85877566f
@ -6,6 +6,7 @@
|
|||||||
"""Tests for the `spack.verify` module"""
|
"""Tests for the `spack.verify` module"""
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import stat
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@ -30,22 +31,12 @@ def test_link_manifest_entry(tmpdir):
|
|||||||
os.symlink(file, link)
|
os.symlink(file, link)
|
||||||
|
|
||||||
data = spack.verify.create_manifest_entry(link)
|
data = spack.verify.create_manifest_entry(link)
|
||||||
assert data["type"] == "link"
|
|
||||||
assert data["dest"] == file
|
assert data["dest"] == file
|
||||||
assert all(x in data for x in ("mode", "owner", "group"))
|
assert all(x in data for x in ("mode", "owner", "group"))
|
||||||
|
|
||||||
results = spack.verify.check_entry(link, data)
|
results = spack.verify.check_entry(link, data)
|
||||||
assert not results.has_errors()
|
assert not results.has_errors()
|
||||||
|
|
||||||
data["type"] = "garbage"
|
|
||||||
|
|
||||||
results = spack.verify.check_entry(link, data)
|
|
||||||
assert results.has_errors()
|
|
||||||
assert link in results.errors
|
|
||||||
assert results.errors[link] == ["type"]
|
|
||||||
|
|
||||||
data["type"] = "link"
|
|
||||||
|
|
||||||
file2 = str(tmpdir.join("file2"))
|
file2 = str(tmpdir.join("file2"))
|
||||||
open(file2, "a").close()
|
open(file2, "a").close()
|
||||||
os.remove(link)
|
os.remove(link)
|
||||||
@ -64,18 +55,18 @@ def test_dir_manifest_entry(tmpdir):
|
|||||||
fs.mkdirp(dirent)
|
fs.mkdirp(dirent)
|
||||||
|
|
||||||
data = spack.verify.create_manifest_entry(dirent)
|
data = spack.verify.create_manifest_entry(dirent)
|
||||||
assert data["type"] == "dir"
|
assert stat.S_ISDIR(data["mode"])
|
||||||
assert all(x in data for x in ("mode", "owner", "group"))
|
assert all(x in data for x in ("mode", "owner", "group"))
|
||||||
|
|
||||||
results = spack.verify.check_entry(dirent, data)
|
results = spack.verify.check_entry(dirent, data)
|
||||||
assert not results.has_errors()
|
assert not results.has_errors()
|
||||||
|
|
||||||
data["type"] = "garbage"
|
data["mode"] = "garbage"
|
||||||
|
|
||||||
results = spack.verify.check_entry(dirent, data)
|
results = spack.verify.check_entry(dirent, data)
|
||||||
assert results.has_errors()
|
assert results.has_errors()
|
||||||
assert dirent in results.errors
|
assert dirent in results.errors
|
||||||
assert results.errors[dirent] == ["type"]
|
assert results.errors[dirent] == ["mode"]
|
||||||
|
|
||||||
|
|
||||||
def test_file_manifest_entry(tmpdir):
|
def test_file_manifest_entry(tmpdir):
|
||||||
@ -89,25 +80,25 @@ def test_file_manifest_entry(tmpdir):
|
|||||||
f.write(orig_str)
|
f.write(orig_str)
|
||||||
|
|
||||||
data = spack.verify.create_manifest_entry(file)
|
data = spack.verify.create_manifest_entry(file)
|
||||||
assert data["type"] == "file"
|
assert stat.S_ISREG(data["mode"])
|
||||||
assert data["size"] == len(orig_str)
|
assert data["size"] == len(orig_str)
|
||||||
assert all(x in data for x in ("mode", "owner", "group"))
|
assert all(x in data for x in ("owner", "group"))
|
||||||
|
|
||||||
results = spack.verify.check_entry(file, data)
|
results = spack.verify.check_entry(file, data)
|
||||||
assert not results.has_errors()
|
assert not results.has_errors()
|
||||||
|
|
||||||
data["type"] = "garbage"
|
data["mode"] = 0x99999
|
||||||
|
|
||||||
results = spack.verify.check_entry(file, data)
|
results = spack.verify.check_entry(file, data)
|
||||||
assert results.has_errors()
|
assert results.has_errors()
|
||||||
assert file in results.errors
|
assert file in results.errors
|
||||||
assert results.errors[file] == ["type"]
|
assert results.errors[file] == ["mode"]
|
||||||
|
|
||||||
data["type"] = "file"
|
|
||||||
|
|
||||||
with open(file, "w") as f:
|
with open(file, "w") as f:
|
||||||
f.write(new_str)
|
f.write(new_str)
|
||||||
|
|
||||||
|
data["mode"] = os.stat(file).st_mode
|
||||||
|
|
||||||
results = spack.verify.check_entry(file, data)
|
results = spack.verify.check_entry(file, data)
|
||||||
|
|
||||||
expected = ["size", "hash"]
|
expected = ["size", "hash"]
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
import stat
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
|
|
||||||
@ -14,35 +16,33 @@
|
|||||||
import spack.util.spack_json as sjson
|
import spack.util.spack_json as sjson
|
||||||
|
|
||||||
|
|
||||||
def compute_hash(path):
|
def compute_hash(path: str, block_size: int = 1048576) -> str:
|
||||||
with open(path, "rb") as f:
|
# why is this not using spack.util.crypto.checksum...
|
||||||
sha1 = hashlib.sha1(f.read()).digest()
|
hasher = hashlib.sha1()
|
||||||
b32 = base64.b32encode(sha1)
|
with open(path, "rb") as file:
|
||||||
return b32.decode()
|
while True:
|
||||||
|
data = file.read(block_size)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
hasher.update(data)
|
||||||
|
return base64.b32encode(hasher.digest()).decode()
|
||||||
|
|
||||||
|
|
||||||
def create_manifest_entry(path):
|
def create_manifest_entry(path: str) -> Dict[str, Any]:
|
||||||
data = {}
|
try:
|
||||||
|
s = os.lstat(path)
|
||||||
|
except OSError:
|
||||||
|
return {}
|
||||||
|
|
||||||
if os.path.exists(path):
|
data: Dict[str, Any] = {"mode": s.st_mode, "owner": s.st_uid, "group": s.st_gid}
|
||||||
stat = os.stat(path)
|
|
||||||
|
|
||||||
data["mode"] = stat.st_mode
|
if stat.S_ISLNK(s.st_mode):
|
||||||
data["owner"] = stat.st_uid
|
|
||||||
data["group"] = stat.st_gid
|
|
||||||
|
|
||||||
if os.path.islink(path):
|
|
||||||
data["type"] = "link"
|
|
||||||
data["dest"] = os.readlink(path)
|
data["dest"] = os.readlink(path)
|
||||||
|
|
||||||
elif os.path.isdir(path):
|
elif stat.S_ISREG(s.st_mode):
|
||||||
data["type"] = "dir"
|
|
||||||
|
|
||||||
else:
|
|
||||||
data["type"] = "file"
|
|
||||||
data["hash"] = compute_hash(path)
|
data["hash"] = compute_hash(path)
|
||||||
data["time"] = stat.st_mtime
|
data["time"] = s.st_mtime
|
||||||
data["size"] = stat.st_size
|
data["size"] = s.st_size
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@ -75,38 +75,28 @@ def check_entry(path, data):
|
|||||||
res.add_error(path, "added")
|
res.add_error(path, "added")
|
||||||
return res
|
return res
|
||||||
|
|
||||||
stat = os.stat(path)
|
s = os.lstat(path)
|
||||||
|
|
||||||
# Check for all entries
|
# Check for all entries
|
||||||
if stat.st_mode != data["mode"]:
|
if s.st_uid != data["owner"]:
|
||||||
res.add_error(path, "mode")
|
|
||||||
if stat.st_uid != data["owner"]:
|
|
||||||
res.add_error(path, "owner")
|
res.add_error(path, "owner")
|
||||||
if stat.st_gid != data["group"]:
|
if s.st_gid != data["group"]:
|
||||||
res.add_error(path, "group")
|
res.add_error(path, "group")
|
||||||
|
|
||||||
# Check for symlink targets and listed as symlink
|
# In the past, `stat(...).st_mode` was stored
|
||||||
if os.path.islink(path):
|
# instead of `lstat(...).st_mode`. So, ignore mode errors for symlinks.
|
||||||
if data["type"] != "link":
|
if not stat.S_ISLNK(s.st_mode) and s.st_mode != data["mode"]:
|
||||||
res.add_error(path, "type")
|
res.add_error(path, "mode")
|
||||||
if os.readlink(path) != data.get("dest", ""):
|
elif stat.S_ISLNK(s.st_mode) and os.readlink(path) != data.get("dest"):
|
||||||
res.add_error(path, "link")
|
res.add_error(path, "link")
|
||||||
|
elif stat.S_ISREG(s.st_mode):
|
||||||
# Check directories are listed as directory
|
|
||||||
elif os.path.isdir(path):
|
|
||||||
if data["type"] != "dir":
|
|
||||||
res.add_error(path, "type")
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Check file contents against hash and listed as file
|
# Check file contents against hash and listed as file
|
||||||
# Check mtime and size as well
|
# Check mtime and size as well
|
||||||
if stat.st_size != data["size"]:
|
if s.st_size != data["size"]:
|
||||||
res.add_error(path, "size")
|
res.add_error(path, "size")
|
||||||
if stat.st_mtime != data["time"]:
|
if s.st_mtime != data["time"]:
|
||||||
res.add_error(path, "mtime")
|
res.add_error(path, "mtime")
|
||||||
if data["type"] != "file":
|
if compute_hash(path) != data.get("hash"):
|
||||||
res.add_error(path, "type")
|
|
||||||
if compute_hash(path) != data.get("hash", ""):
|
|
||||||
res.add_error(path, "hash")
|
res.add_error(path, "hash")
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
Loading…
Reference in New Issue
Block a user