From e9d2732e0072d992736403f61d1f3fc8a09905f8 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Wed, 11 Dec 2024 10:54:17 +0100 Subject: [PATCH] log.py: improve utf-8 handling, and non-utf-8 output (#48005) --- lib/spack/llnl/util/tty/log.py | 22 ++++++++++++++-------- lib/spack/spack/test/llnl/util/tty/log.py | 18 ++++++++---------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/lib/spack/llnl/util/tty/log.py b/lib/spack/llnl/util/tty/log.py index 534472dbecd..612d9ff380e 100644 --- a/lib/spack/llnl/util/tty/log.py +++ b/lib/spack/llnl/util/tty/log.py @@ -879,10 +879,13 @@ def _writer_daemon( write_fd.close() # 1. Use line buffering (3rd param = 1) since Python 3 has a bug - # that prevents unbuffered text I/O. - # 2. Python 3.x before 3.7 does not open with UTF-8 encoding by default + # that prevents unbuffered text I/O. [needs citation] + # 2. Enforce a UTF-8 interpretation of build process output with errors replaced by '?'. + # The downside is that the log file will not contain the exact output of the build process. # 3. closefd=False because Connection has "ownership" - read_file = os.fdopen(read_fd.fileno(), "r", 1, encoding="utf-8", closefd=False) + read_file = os.fdopen( + read_fd.fileno(), "r", 1, encoding="utf-8", errors="replace", closefd=False + ) if stdin_fd: stdin_file = os.fdopen(stdin_fd.fileno(), closefd=False) @@ -928,11 +931,7 @@ def _writer_daemon( try: while line_count < 100: # Handle output from the calling process. - try: - line = _retry(read_file.readline)() - except UnicodeDecodeError: - # installs like --test=root gpgme produce non-UTF8 logs - line = "\n" + line = _retry(read_file.readline)() if not line: return @@ -946,6 +945,13 @@ def _writer_daemon( output_line = clean_line if filter_fn: output_line = filter_fn(clean_line) + enc = sys.stdout.encoding + if enc != "utf-8": + # On Python 3.6 and 3.7-3.14 with non-{utf-8,C} locale stdout + # may not be able to handle utf-8 output. We do an inefficient + # dance of re-encoding with errors replaced, so stdout.write + # does not raise. + output_line = output_line.encode(enc, "replace").decode(enc) sys.stdout.write(output_line) # Stripped output to log file. diff --git a/lib/spack/spack/test/llnl/util/tty/log.py b/lib/spack/spack/test/llnl/util/tty/log.py index 61879fc4108..1ffcf22453f 100644 --- a/lib/spack/spack/test/llnl/util/tty/log.py +++ b/lib/spack/spack/test/llnl/util/tty/log.py @@ -57,18 +57,16 @@ def test_log_python_output_without_echo(capfd, tmpdir): assert capfd.readouterr()[0] == "" -def test_log_python_output_with_invalid_utf8(capfd, tmpdir): - with tmpdir.as_cwd(): - with log.log_output("foo.txt"): - sys.stdout.buffer.write(b"\xc3\x28\n") +def test_log_python_output_with_invalid_utf8(capfd, tmp_path): + tmp_file = str(tmp_path / "foo.txt") + with log.log_output(tmp_file, echo=True): + sys.stdout.buffer.write(b"\xc3helloworld\n") - expected = b"\n" - with open("foo.txt", "rb") as f: - written = f.read() - assert written == expected + # we should be able to read this as valid utf-8 + with open(tmp_file, "r", encoding="utf-8") as f: + assert f.read() == "�helloworld\n" - # nothing on stdout or stderr - assert capfd.readouterr()[0] == "" + assert capfd.readouterr().out == "�helloworld\n" def test_log_python_output_and_echo_output(capfd, tmpdir):