Handle byte sequences which are not encoded as UTF8 while logging. (#21447)

Fix builds which produce a lines with non-UTF8 output while logging
The alternative is to read in binary mode, and then decode while
ignoring errors.
This commit is contained in:
Paul Ferrell 2021-11-29 05:27:02 -07:00 committed by GitHub
parent bdde70c9d3
commit c0edb17b93
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 1 deletions

View File

@ -780,7 +780,12 @@ def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo,
try:
while line_count < 100:
# Handle output from the calling process.
line = _retry(in_pipe.readline)()
try:
line = _retry(in_pipe.readline)()
except UnicodeDecodeError:
# installs like --test=root gpgme produce non-UTF8 logs
line = '<line lost: output was not encoded as UTF-8>\n'
if not line:
return
line_count += 1

View File

@ -62,6 +62,24 @@ def test_log_python_output_without_echo(capfd, tmpdir):
assert capfd.readouterr()[0] == ''
def test_log_python_output_with_invalid_utf8(capfd, tmpdir):
with tmpdir.as_cwd():
with log_output('foo.txt'):
sys.stdout.buffer.write(b'\xc3\x28\n')
# python2 and 3 treat invalid UTF-8 differently
if sys.version_info.major == 2:
expected = b'\xc3(\n'
else:
expected = b'<line lost: output was not encoded as UTF-8>\n'
with open('foo.txt', 'rb') as f:
written = f.read()
assert written == expected
# nothing on stdout or stderr
assert capfd.readouterr()[0] == ''
def test_log_python_output_and_echo_output(capfd, tmpdir):
with tmpdir.as_cwd():
# echo two lines