filter_file: fix various bugs (#48038)
* `f.tell` on a `TextIOWrapper` does not return the offset in bytes, but an opaque integer that can only be used for `f.seek` on the same object. Spack assumes it's a byte offset. * Do not open in a locale dependent way, but assume utf-8 (and allow users to override that) * Use tempfile to generate a backup/temporary file in a safe way * Comparison between None and str is valid and on purpose.
This commit is contained in:
parent
bee2132c04
commit
f124409d8a
@ -301,35 +301,32 @@ def filter_file(
|
|||||||
ignore_absent: bool = False,
|
ignore_absent: bool = False,
|
||||||
start_at: Optional[str] = None,
|
start_at: Optional[str] = None,
|
||||||
stop_at: Optional[str] = None,
|
stop_at: Optional[str] = None,
|
||||||
|
encoding: Optional[str] = "utf-8",
|
||||||
) -> None:
|
) -> None:
|
||||||
r"""Like sed, but uses python regular expressions.
|
r"""Like sed, but uses python regular expressions.
|
||||||
|
|
||||||
Filters every line of each file through regex and replaces the file
|
Filters every line of each file through regex and replaces the file with a filtered version.
|
||||||
with a filtered version. Preserves mode of filtered files.
|
Preserves mode of filtered files.
|
||||||
|
|
||||||
As with re.sub, ``repl`` can be either a string or a callable.
|
As with re.sub, ``repl`` can be either a string or a callable. If it is a callable, it is
|
||||||
If it is a callable, it is passed the match object and should
|
passed the match object and should return a suitable replacement string. If it is a string, it
|
||||||
return a suitable replacement string. If it is a string, it
|
can contain ``\1``, ``\2``, etc. to represent back-substitution as sed would allow.
|
||||||
can contain ``\1``, ``\2``, etc. to represent back-substitution
|
|
||||||
as sed would allow.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
regex (str): The regular expression to search for
|
regex: The regular expression to search for
|
||||||
repl (str): The string to replace matches with
|
repl: The string to replace matches with
|
||||||
*filenames: One or more files to search and replace
|
*filenames: One or more files to search and replace string: Treat regex as a plain string.
|
||||||
string (bool): Treat regex as a plain string. Default it False
|
Default it False backup: Make backup file(s) suffixed with ``~``. Default is False
|
||||||
backup (bool): Make backup file(s) suffixed with ``~``. Default is False
|
ignore_absent: Ignore any files that don't exist. Default is False
|
||||||
ignore_absent (bool): Ignore any files that don't exist.
|
start_at: Marker used to start applying the replacements. If a text line matches this
|
||||||
Default is False
|
marker filtering is started at the next line. All contents before the marker and the
|
||||||
start_at (str): Marker used to start applying the replacements. If a
|
marker itself are copied verbatim. Default is to start filtering from the first line of
|
||||||
text line matches this marker filtering is started at the next line.
|
the file.
|
||||||
All contents before the marker and the marker itself are copied
|
stop_at: Marker used to stop scanning the file further. If a text line matches this marker
|
||||||
verbatim. Default is to start filtering from the first line of the
|
filtering is stopped and the rest of the file is copied verbatim. Default is to filter
|
||||||
file.
|
until the end of the file.
|
||||||
stop_at (str): Marker used to stop scanning the file further. If a text
|
encoding: The encoding to use when reading and writing the files. Default is None, which
|
||||||
line matches this marker filtering is stopped and the rest of the
|
uses the system's default encoding.
|
||||||
file is copied verbatim. Default is to filter until the end of the
|
|
||||||
file.
|
|
||||||
"""
|
"""
|
||||||
# Allow strings to use \1, \2, etc. for replacement, like sed
|
# Allow strings to use \1, \2, etc. for replacement, like sed
|
||||||
if not callable(repl):
|
if not callable(repl):
|
||||||
@ -345,72 +342,54 @@ def groupid_to_group(x):
|
|||||||
|
|
||||||
if string:
|
if string:
|
||||||
regex = re.escape(regex)
|
regex = re.escape(regex)
|
||||||
for filename in path_to_os_path(*filenames):
|
regex_compiled = re.compile(regex)
|
||||||
msg = 'FILTER FILE: {0} [replacing "{1}"]'
|
for path in path_to_os_path(*filenames):
|
||||||
tty.debug(msg.format(filename, regex))
|
fd, temp_path = tempfile.mkstemp(prefix=os.path.basename(path), dir=os.path.dirname(path))
|
||||||
|
os.close(fd)
|
||||||
|
|
||||||
backup_filename = filename + "~"
|
if ignore_absent and not os.path.exists(path):
|
||||||
tmp_filename = filename + ".spack~"
|
tty.debug(f'FILTER FILE: file "{path}" not found. Skipping to next file.')
|
||||||
|
|
||||||
if ignore_absent and not os.path.exists(filename):
|
|
||||||
msg = 'FILTER FILE: file "{0}" not found. Skipping to next file.'
|
|
||||||
tty.debug(msg.format(filename))
|
|
||||||
continue
|
continue
|
||||||
|
else:
|
||||||
|
tty.debug(f'FILTER FILE: {path} [replacing "{regex}"]')
|
||||||
|
|
||||||
# Create backup file. Don't overwrite an existing backup
|
shutil.copy(path, temp_path)
|
||||||
# file in case this file is being filtered multiple times.
|
errored = False
|
||||||
if not os.path.exists(backup_filename):
|
|
||||||
shutil.copy(filename, backup_filename)
|
|
||||||
|
|
||||||
# Create a temporary file to read from. We cannot use backup_filename
|
|
||||||
# in case filter_file is invoked multiple times on the same file.
|
|
||||||
shutil.copy(filename, tmp_filename)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Open as a text file and filter until the end of the file is
|
# Open as a text file and filter until the end of the file is reached, or we found a
|
||||||
# reached, or we found a marker in the line if it was specified
|
# marker in the line if it was specified. To avoid translating line endings (\n to
|
||||||
#
|
# \r\n and vice-versa) use newline="".
|
||||||
# To avoid translating line endings (\n to \r\n and vice-versa)
|
with open(
|
||||||
# we force os.open to ignore translations and use the line endings
|
temp_path, mode="r", errors="surrogateescape", newline="", encoding=encoding
|
||||||
# the file comes with
|
) as input_file, open(
|
||||||
with open(tmp_filename, mode="r", errors="surrogateescape", newline="") as input_file:
|
path, mode="w", errors="surrogateescape", newline="", encoding=encoding
|
||||||
with open(filename, mode="w", errors="surrogateescape", newline="") as output_file:
|
) as output_file:
|
||||||
do_filtering = start_at is None
|
if start_at is None and stop_at is None: # common case, avoids branching in loop
|
||||||
# Using iter and readline is a workaround needed not to
|
for line in input_file:
|
||||||
# disable input_file.tell(), which will happen if we call
|
output_file.write(re.sub(regex_compiled, repl, line))
|
||||||
# input_file.next() implicitly via the for loop
|
else:
|
||||||
for line in iter(input_file.readline, ""):
|
# state is -1 before start_at; 0 between; 1 after stop_at
|
||||||
if stop_at is not None:
|
state = 0 if start_at is None else -1
|
||||||
current_position = input_file.tell()
|
for line in input_file:
|
||||||
|
if state == 0:
|
||||||
if stop_at == line.strip():
|
if stop_at == line.strip():
|
||||||
output_file.write(line)
|
state = 1
|
||||||
break
|
else:
|
||||||
if do_filtering:
|
line = re.sub(regex_compiled, repl, line)
|
||||||
filtered_line = re.sub(regex, repl, line)
|
elif state == -1 and start_at == line.strip():
|
||||||
output_file.write(filtered_line)
|
state = 0
|
||||||
else:
|
output_file.write(line)
|
||||||
do_filtering = start_at == line.strip()
|
|
||||||
output_file.write(line)
|
|
||||||
else:
|
|
||||||
current_position = None
|
|
||||||
|
|
||||||
# If we stopped filtering at some point, reopen the file in
|
|
||||||
# binary mode and copy verbatim the remaining part
|
|
||||||
if current_position and stop_at:
|
|
||||||
with open(tmp_filename, mode="rb") as input_binary_buffer:
|
|
||||||
input_binary_buffer.seek(current_position)
|
|
||||||
with open(filename, mode="ab") as output_binary_buffer:
|
|
||||||
output_binary_buffer.writelines(input_binary_buffer.readlines())
|
|
||||||
|
|
||||||
except BaseException:
|
except BaseException:
|
||||||
# clean up the original file on failure.
|
# restore the original file
|
||||||
shutil.move(backup_filename, filename)
|
os.rename(temp_path, path)
|
||||||
|
errored = True
|
||||||
raise
|
raise
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
os.remove(tmp_filename)
|
if not errored and not backup:
|
||||||
if not backup and os.path.exists(backup_filename):
|
os.unlink(temp_path)
|
||||||
os.remove(backup_filename)
|
|
||||||
|
|
||||||
|
|
||||||
class FileFilter:
|
class FileFilter:
|
||||||
|
Loading…
Reference in New Issue
Block a user