file_cache.py: allow read transaction on uninitialized cache (#47212)

This allows the following

```python
cache.init_entry("my/cache")
with cache.read_transaction("my/cache") as f:
    data = f.read() if f is not None else None
```

mirroring `write_transaction`, which returns a tuple `(old, new)` where
`old` is `None` if the cache file did not exist yet.

The alternative that requires less defensive programming on the call
site would be to create the "old" file upon first read, but I did not
want to think about how to safely atomically create the file, and it's
not unthinkable that an empty file is an invalid format (for instance
the call site may expect a json file, which requires at least {} bytes).
This commit is contained in:
Harmen Stoppels 2024-10-25 17:10:14 +02:00 committed by GitHub
parent 7319408bc7
commit e86a3b68f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 39 deletions

View File

@ -31,6 +31,11 @@ def test_write_and_read_cache_file(file_cache):
assert text == "foobar\n" assert text == "foobar\n"
def test_read_before_init(file_cache):
with file_cache.read_transaction("test.yaml") as stream:
assert stream is None
@pytest.mark.not_on_windows("Locks not supported on Windows") @pytest.mark.not_on_windows("Locks not supported on Windows")
def test_failed_write_and_read_cache_file(file_cache): def test_failed_write_and_read_cache_file(file_cache):
"""Test failing to write then attempting to read a cached file.""" """Test failing to write then attempting to read a cached file."""
@ -46,11 +51,6 @@ def test_failed_write_and_read_cache_file(file_cache):
# File does not exist # File does not exist
assert not file_cache.init_entry("test.yaml") assert not file_cache.init_entry("test.yaml")
# Attempting to read will cause a FileNotFoundError
with pytest.raises(FileNotFoundError, match=r"test\.yaml"):
with file_cache.read_transaction("test.yaml"):
pass
def test_write_and_remove_cache_file(file_cache): def test_write_and_remove_cache_file(file_cache):
"""Test two write transactions on a cached file. Then try to remove an """Test two write transactions on a cached file. Then try to remove an

View File

@ -7,6 +7,7 @@
import math import math
import os import os
import shutil import shutil
from typing import IO, Optional, Tuple
from llnl.util.filesystem import mkdirp, rename from llnl.util.filesystem import mkdirp, rename
@ -14,6 +15,51 @@
from spack.util.lock import Lock, ReadTransaction, WriteTransaction from spack.util.lock import Lock, ReadTransaction, WriteTransaction
def _maybe_open(path: str) -> Optional[IO[str]]:
try:
return open(path, "r")
except OSError as e:
if e.errno != errno.ENOENT:
raise
return None
class ReadContextManager:
def __init__(self, path: str) -> None:
self.path = path
def __enter__(self) -> Optional[IO[str]]:
"""Return a file object for the cache if it exists."""
self.cache_file = _maybe_open(self.path)
return self.cache_file
def __exit__(self, type, value, traceback):
if self.cache_file:
self.cache_file.close()
class WriteContextManager:
def __init__(self, path: str) -> None:
self.path = path
self.tmp_path = f"{self.path}.tmp"
def __enter__(self) -> Tuple[Optional[IO[str]], IO[str]]:
"""Return (old_file, new_file) file objects, where old_file is optional."""
self.old_file = _maybe_open(self.path)
self.new_file = open(self.tmp_path, "w")
return self.old_file, self.new_file
def __exit__(self, type, value, traceback):
if self.old_file:
self.old_file.close()
self.new_file.close()
if value:
os.remove(self.tmp_path)
else:
rename(self.tmp_path, self.path)
class FileCache: class FileCache:
"""This class manages cached data in the filesystem. """This class manages cached data in the filesystem.
@ -107,7 +153,8 @@ def read_transaction(self, key):
cache_file.read() cache_file.read()
""" """
return ReadTransaction(self._get_lock(key), acquire=lambda: open(self.cache_path(key))) path = self.cache_path(key)
return ReadTransaction(self._get_lock(key), acquire=lambda: ReadContextManager(path))
def write_transaction(self, key): def write_transaction(self, key):
"""Get a write transaction on a file cache item. """Get a write transaction on a file cache item.
@ -117,40 +164,11 @@ def write_transaction(self, key):
moves the file into place on top of the old file atomically. moves the file into place on top of the old file atomically.
""" """
filename = self.cache_path(key) path = self.cache_path(key)
if os.path.exists(filename) and not os.access(filename, os.W_OK): if os.path.exists(path) and not os.access(path, os.W_OK):
raise CacheError( raise CacheError(f"Insufficient permissions to write to file cache at {path}")
"Insufficient permissions to write to file cache at {0}".format(filename)
)
# TODO: this nested context manager adds a lot of complexity and return WriteTransaction(self._get_lock(key), acquire=lambda: WriteContextManager(path))
# TODO: is pretty hard to reason about in llnl.util.lock. At some
# TODO: point we should just replace it with functions and simplify
# TODO: the locking code.
class WriteContextManager:
def __enter__(cm):
cm.orig_filename = self.cache_path(key)
cm.orig_file = None
if os.path.exists(cm.orig_filename):
cm.orig_file = open(cm.orig_filename, "r")
cm.tmp_filename = self.cache_path(key) + ".tmp"
cm.tmp_file = open(cm.tmp_filename, "w")
return cm.orig_file, cm.tmp_file
def __exit__(cm, type, value, traceback):
if cm.orig_file:
cm.orig_file.close()
cm.tmp_file.close()
if value:
os.remove(cm.tmp_filename)
else:
rename(cm.tmp_filename, cm.orig_filename)
return WriteTransaction(self._get_lock(key), acquire=WriteContextManager)
def mtime(self, key) -> float: def mtime(self, key) -> float:
"""Return modification time of cache file, or -inf if it does not exist. """Return modification time of cache file, or -inf if it does not exist.