Refactor to improve spec format speed (#43712)

When looking at where we spend our time in solver setup, I noticed a fair bit of time is spent in `Spec.format()`, and `Spec.format()` is a pretty old, slow, convoluted method. This PR does a number of things: - [x] Consolidate most of what was being done manually with a character loop and several regexes into a single regex. - [x] Precompile regexes where we keep them - [x] Remove the `transform=` argument to `Spec.format()` which was only used in one place in the code (modules) to uppercase env var names, but added a lot of complexity - [x] Avoid escaping and colorizing specs unless necessary - [x] Refactor a lot of the colorization logic to avoid unnecessary object construction - [x] Add type hints and remove some spots in the code where we were using nonexistent arguments to `format()`. - [x] Add trivial cases to `__str__` in `VariantMap` and `VersionList` to avoid sorting - [x] Avoid calling `isinstance()` in the main loop of `Spec.format()` - [x] Don't bother constructing a `string` representation for the result of `_prev_version` as it is only used for comparisons. In my timings (on all the specs formatted in a solve of `hdf5`), this is over 2.67x faster than the original `format()`, and it seems to reduce setup time by around a second (for `hdf5`).
2024-04-23 10:52:15 -07:00
parent 978c20f35a
commit aa0825d642
12 changed files with 226 additions and 270 deletions
--- a/lib/spack/llnl/util/tty/init.py
+++ b/lib/spack/llnl/util/tty/init.py
@@ -12,7 +12,7 @@
 import traceback
 from datetime import datetime
 from sys import platform as _platform
-from typing import NoReturn
+from typing import Any, NoReturn

 if _platform != "win32":
    import fcntl
@@ -158,21 +158,22 @@ def get_timestamp(force=False):
        return ""


-def msg(message, *args, **kwargs):
+def msg(message: Any, *args: Any, newline: bool = True) -> None:
    if not msg_enabled():
        return

    if isinstance(message, Exception):
-        message = "%s: %s" % (message.__class__.__name__, str(message))
+        message = f"{message.__class__.__name__}: {message}"
+    else:
+        message = str(message)

-    newline = kwargs.get("newline", True)
    st_text = ""
    if _stacktrace:
        st_text = process_stacktrace(2)
-    if newline:
-        cprint("@*b{%s==>} %s%s" % (st_text, get_timestamp(), cescape(_output_filter(message))))
-    else:
-        cwrite("@*b{%s==>} %s%s" % (st_text, get_timestamp(), cescape(_output_filter(message))))
+
+    nl = "\n" if newline else ""
+    cwrite(f"@*b{{{st_text}==>}} {get_timestamp()}{cescape(_output_filter(message))}{nl}")
+
    for arg in args:
        print(indent + _output_filter(str(arg)))

--- a/lib/spack/llnl/util/tty/color.py
+++ b/lib/spack/llnl/util/tty/color.py
@@ -62,6 +62,7 @@
 import re
 import sys
 from contextlib import contextmanager
+from typing import Optional


 class ColorParseError(Exception):
@@ -95,7 +96,7 @@ def __init__(self, message):
 }  # white

 # Regex to be used for color formatting
-color_re = r"@(?:@|\.|([*_])?([a-zA-Z])?(?:{((?:[^}]|}})*)})?)"
+COLOR_RE = re.compile(r"@(?:(@)|(\.)|([*_])?([a-zA-Z])?(?:{((?:[^}]|}})*)})?)")

 # Mapping from color arguments to values for tty.set_color
 color_when_values = {"always": True, "auto": None, "never": False}
@@ -203,77 +204,64 @@ def color_when(value):
    set_color_when(old_value)


-class match_to_ansi:
-    def __init__(self, color=True, enclose=False, zsh=False):
-        self.color = _color_when_value(color)
-        self.enclose = enclose
-        self.zsh = zsh
-
-    def escape(self, s):
-        """Returns a TTY escape sequence for a color"""
-        if self.color:
-            if self.zsh:
-                result = rf"\e[0;{s}m"
-            else:
-                result = f"\033[{s}m"
-
-            if self.enclose:
-                result = rf"\[{result}\]"
-
-            return result
+def _escape(s: str, color: bool, enclose: bool, zsh: bool) -> str:
+    """Returns a TTY escape sequence for a color"""
+    if color:
+        if zsh:
+            result = rf"\e[0;{s}m"
        else:
-            return ""
+            result = f"\033[{s}m"

-    def __call__(self, match):
-        """Convert a match object generated by ``color_re`` into an ansi
-        color code. This can be used as a handler in ``re.sub``.
-        """
-        style, color, text = match.groups()
-        m = match.group(0)
+        if enclose:
+            result = rf"\[{result}\]"

-        if m == "@@":
-            return "@"
-        elif m == "@.":
-            return self.escape(0)
-        elif m == "@":
-            raise ColorParseError("Incomplete color format: '%s' in %s" % (m, match.string))
-
-        string = styles[style]
-        if color:
-            if color not in colors:
-                raise ColorParseError(
-                    "Invalid color specifier: '%s' in '%s'" % (color, match.string)
-                )
-            string += ";" + str(colors[color])
-
-        colored_text = ""
-        if text:
-            colored_text = text + self.escape(0)
-
-        return self.escape(string) + colored_text
+        return result
+    else:
+        return ""


-def colorize(string, **kwargs):
+def colorize(
+    string: str, color: Optional[bool] = None, enclose: bool = False, zsh: bool = False
+) -> str:
    """Replace all color expressions in a string with ANSI control codes.

    Args:
-        string (str): The string to replace
+        string: The string to replace

    Returns:
-        str: The filtered string
+        The filtered string

    Keyword Arguments:
-        color (bool): If False, output will be plain text without control
-            codes, for output to non-console devices.
-        enclose (bool): If True, enclose ansi color sequences with
+        color: If False, output will be plain text without control codes, for output to
+            non-console devices (default: automatically choose color or not)
+        enclose: If True, enclose ansi color sequences with
            square brackets to prevent misestimation of terminal width.
-        zsh (bool): If True, use zsh ansi codes instead of bash ones (for variables like PS1)
+        zsh: If True, use zsh ansi codes instead of bash ones (for variables like PS1)
    """
-    color = _color_when_value(kwargs.get("color", get_color_when()))
-    zsh = kwargs.get("zsh", False)
-    string = re.sub(color_re, match_to_ansi(color, kwargs.get("enclose")), string, zsh)
-    string = string.replace("}}", "}")
-    return string
+    color = color if color is not None else get_color_when()
+
+    def match_to_ansi(match):
+        """Convert a match object generated by ``COLOR_RE`` into an ansi
+        color code. This can be used as a handler in ``re.sub``.
+        """
+        escaped_at, dot, style, color_code, text = match.groups()
+
+        if escaped_at:
+            return "@"
+        elif dot:
+            return _escape(0, color, enclose, zsh)
+        elif not (style or color_code):
+            raise ColorParseError(
+                f"Incomplete color format: '{match.group(0)}' in '{match.string}'"
+            )
+
+        ansi_code = _escape(f"{styles[style]};{colors.get(color_code, '')}", color, enclose, zsh)
+        if text:
+            return f"{ansi_code}{text}{_escape(0, color, enclose, zsh)}"
+        else:
+            return ansi_code
+
+    return COLOR_RE.sub(match_to_ansi, string).replace("}}", "}")


 def clen(string):
@@ -305,7 +293,7 @@ def cprint(string, stream=None, color=None):
    cwrite(string + "\n", stream, color)


-def cescape(string):
+def cescape(string: str) -> str:
    """Escapes special characters needed for color codes.

    Replaces the following symbols with their equivalent literal forms:
@@ -321,10 +309,7 @@ def cescape(string):
    Returns:
        (str): the string with color codes escaped
    """
-    string = str(string)
-    string = string.replace("@", "@@")
-    string = string.replace("}", "}}")
-    return string
+    return string.replace("@", "@@").replace("}", "}}")


 class ColorStream: