Refactor to improve spec format speed (#43712)

When looking at where we spend our time in solver setup, I noticed a fair bit of time is spent
in `Spec.format()`, and `Spec.format()` is a pretty old, slow, convoluted method.

This PR does a number of things:
- [x] Consolidate most of what was being done manually with a character loop and several
      regexes into a single regex.
- [x] Precompile regexes where we keep them 
- [x] Remove the `transform=` argument to `Spec.format()` which was only used in one 
      place in the code (modules) to uppercase env var names, but added a lot of complexity
- [x] Avoid escaping and colorizing specs unless necessary
- [x] Refactor a lot of the colorization logic to avoid unnecessary object construction
- [x] Add type hints and remove some spots in the code where we were using nonexistent
      arguments to `format()`.
- [x] Add trivial cases to `__str__` in `VariantMap` and `VersionList` to avoid sorting
- [x] Avoid calling `isinstance()` in the main loop of `Spec.format()`
- [x] Don't bother constructing a `string` representation for the result of `_prev_version`
      as it is only used for comparisons.

In my timings (on all the specs formatted in a solve of `hdf5`), this is over 2.67x faster than the 
original `format()`, and it seems to reduce setup time by around a second (for `hdf5`).
This commit is contained in:
Todd Gamblin
2024-04-23 10:52:15 -07:00
committed by GitHub
parent 978c20f35a
commit aa0825d642
12 changed files with 226 additions and 270 deletions

View File

@@ -12,7 +12,7 @@
import traceback
from datetime import datetime
from sys import platform as _platform
from typing import NoReturn
from typing import Any, NoReturn
if _platform != "win32":
import fcntl
@@ -158,21 +158,22 @@ def get_timestamp(force=False):
return ""
def msg(message, *args, **kwargs):
def msg(message: Any, *args: Any, newline: bool = True) -> None:
if not msg_enabled():
return
if isinstance(message, Exception):
message = "%s: %s" % (message.__class__.__name__, str(message))
message = f"{message.__class__.__name__}: {message}"
else:
message = str(message)
newline = kwargs.get("newline", True)
st_text = ""
if _stacktrace:
st_text = process_stacktrace(2)
if newline:
cprint("@*b{%s==>} %s%s" % (st_text, get_timestamp(), cescape(_output_filter(message))))
else:
cwrite("@*b{%s==>} %s%s" % (st_text, get_timestamp(), cescape(_output_filter(message))))
nl = "\n" if newline else ""
cwrite(f"@*b{{{st_text}==>}} {get_timestamp()}{cescape(_output_filter(message))}{nl}")
for arg in args:
print(indent + _output_filter(str(arg)))

View File

@@ -62,6 +62,7 @@
import re
import sys
from contextlib import contextmanager
from typing import Optional
class ColorParseError(Exception):
@@ -95,7 +96,7 @@ def __init__(self, message):
} # white
# Regex to be used for color formatting
color_re = r"@(?:@|\.|([*_])?([a-zA-Z])?(?:{((?:[^}]|}})*)})?)"
COLOR_RE = re.compile(r"@(?:(@)|(\.)|([*_])?([a-zA-Z])?(?:{((?:[^}]|}})*)})?)")
# Mapping from color arguments to values for tty.set_color
color_when_values = {"always": True, "auto": None, "never": False}
@@ -203,77 +204,64 @@ def color_when(value):
set_color_when(old_value)
class match_to_ansi:
def __init__(self, color=True, enclose=False, zsh=False):
self.color = _color_when_value(color)
self.enclose = enclose
self.zsh = zsh
def escape(self, s):
"""Returns a TTY escape sequence for a color"""
if self.color:
if self.zsh:
result = rf"\e[0;{s}m"
else:
result = f"\033[{s}m"
if self.enclose:
result = rf"\[{result}\]"
return result
def _escape(s: str, color: bool, enclose: bool, zsh: bool) -> str:
"""Returns a TTY escape sequence for a color"""
if color:
if zsh:
result = rf"\e[0;{s}m"
else:
return ""
result = f"\033[{s}m"
def __call__(self, match):
"""Convert a match object generated by ``color_re`` into an ansi
color code. This can be used as a handler in ``re.sub``.
"""
style, color, text = match.groups()
m = match.group(0)
if enclose:
result = rf"\[{result}\]"
if m == "@@":
return "@"
elif m == "@.":
return self.escape(0)
elif m == "@":
raise ColorParseError("Incomplete color format: '%s' in %s" % (m, match.string))
string = styles[style]
if color:
if color not in colors:
raise ColorParseError(
"Invalid color specifier: '%s' in '%s'" % (color, match.string)
)
string += ";" + str(colors[color])
colored_text = ""
if text:
colored_text = text + self.escape(0)
return self.escape(string) + colored_text
return result
else:
return ""
def colorize(string, **kwargs):
def colorize(
string: str, color: Optional[bool] = None, enclose: bool = False, zsh: bool = False
) -> str:
"""Replace all color expressions in a string with ANSI control codes.
Args:
string (str): The string to replace
string: The string to replace
Returns:
str: The filtered string
The filtered string
Keyword Arguments:
color (bool): If False, output will be plain text without control
codes, for output to non-console devices.
enclose (bool): If True, enclose ansi color sequences with
color: If False, output will be plain text without control codes, for output to
non-console devices (default: automatically choose color or not)
enclose: If True, enclose ansi color sequences with
square brackets to prevent misestimation of terminal width.
zsh (bool): If True, use zsh ansi codes instead of bash ones (for variables like PS1)
zsh: If True, use zsh ansi codes instead of bash ones (for variables like PS1)
"""
color = _color_when_value(kwargs.get("color", get_color_when()))
zsh = kwargs.get("zsh", False)
string = re.sub(color_re, match_to_ansi(color, kwargs.get("enclose")), string, zsh)
string = string.replace("}}", "}")
return string
color = color if color is not None else get_color_when()
def match_to_ansi(match):
"""Convert a match object generated by ``COLOR_RE`` into an ansi
color code. This can be used as a handler in ``re.sub``.
"""
escaped_at, dot, style, color_code, text = match.groups()
if escaped_at:
return "@"
elif dot:
return _escape(0, color, enclose, zsh)
elif not (style or color_code):
raise ColorParseError(
f"Incomplete color format: '{match.group(0)}' in '{match.string}'"
)
ansi_code = _escape(f"{styles[style]};{colors.get(color_code, '')}", color, enclose, zsh)
if text:
return f"{ansi_code}{text}{_escape(0, color, enclose, zsh)}"
else:
return ansi_code
return COLOR_RE.sub(match_to_ansi, string).replace("}}", "}")
def clen(string):
@@ -305,7 +293,7 @@ def cprint(string, stream=None, color=None):
cwrite(string + "\n", stream, color)
def cescape(string):
def cescape(string: str) -> str:
"""Escapes special characters needed for color codes.
Replaces the following symbols with their equivalent literal forms:
@@ -321,10 +309,7 @@ def cescape(string):
Returns:
(str): the string with color codes escaped
"""
string = str(string)
string = string.replace("@", "@@")
string = string.replace("}", "}}")
return string
return string.replace("@", "@@").replace("}", "}}")
class ColorStream: