file_cache.py: idempotent remove without races (#31477)

There's a race condition in `remove()` as the lockfile is removed after
releasing the lock, which is a problem when another process acquires a
write lock during deletion.

Also simplify life a bit in multiprocessing when a file is possibly
removed multiple times, which currently is an error on the second
deletion, so the proposed fix is to make remove(...) idempotent and not
error when deleting non-existing cache entries.

Don't tests for existence of lockfile, cause windows/linux behavior is different
This commit is contained in:
Harmen Stoppels 2022-07-12 17:03:48 +02:00 committed by GitHub
parent 74e2625dcf
commit ea91e8f4ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 4 deletions

View File

@ -55,9 +55,12 @@ def test_write_and_remove_cache_file(file_cache):
file_cache.remove('test.yaml')
# After removal both the file and the lock file should not exist
# After removal the file should not exist
assert not os.path.exists(file_cache.cache_path('test.yaml'))
assert not os.path.exists(file_cache._lock_path('test.yaml'))
# Whether the lock file exists is more of an implementation detail, on Linux they
# continue to exist, on Windows they don't.
# assert os.path.exists(file_cache._lock_path('test.yaml'))
@pytest.mark.skipif(sys.platform == 'win32',
@ -94,3 +97,10 @@ def test_cache_write_readonly_cache_fails(file_cache):
with pytest.raises(CacheError, match='Insufficient permissions to write'):
file_cache.write_transaction(filename)
@pytest.mark.regression('31475')
def test_delete_is_idempotent(file_cache):
"""Deleting a non-existent key should be idempotent, to simplify life when
running delete with multiple processes"""
file_cache.remove('test.yaml')

View File

@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import errno
import os
import shutil
@ -170,13 +171,17 @@ def mtime(self, key):
return sinfo.st_mtime
def remove(self, key):
file = self.cache_path(key)
lock = self._get_lock(key)
try:
lock.acquire_write()
os.unlink(self.cache_path(key))
os.unlink(file)
except OSError as e:
# File not found is OK, so remove is idempotent.
if e.errno != errno.ENOENT:
raise
finally:
lock.release_write()
lock.cleanup()
class CacheError(SpackError):