The HTML number conversion regex operating against a byte string will only

convert individual bytes, so therefore incorrectly converts utf-8 encoded
characters. Decoding byte strings to unicode objects results in correct HTML
number encodings.
This commit is contained in:
Peter Scheibel 2015-10-23 20:56:06 -07:00
parent d76c923623
commit cc0ee3dc29

View File

@ -27,6 +27,7 @@
import itertools
import re
import os
import codecs
import llnl.util.tty as tty
from llnl.util.filesystem import *
@ -107,7 +108,7 @@ def __eq__(self, other):
def fetch_log(path):
if not os.path.exists(path):
return list()
with open(path, 'rb') as F:
with codecs.open(path, 'rb', 'utf-8') as F:
return list(line.strip() for line in F.readlines())