CI Timing Statistics (#38598)
* Write timing information for installs from cache * CI: aggregate and upload install_times.json to artifacts * CI: Don't change root directory for artifact generation * Flat event based timer variation Event based timer allows for easily starting and stopping timers without wiping sub-timer data. It also requires less branching logic when tracking time. The json output is non-hierarchical in this version and hierarchy is less rigidly enforced between starting and stopping. * Add and write timers for top level install * Update completion * remove unused subtimer api * Fix unit tests * Suppress timing summary option * Save timers summaries to user_data artifacts * Remove completion from fish * Move spack python to script section * Write timer correctly for non-cache installs * Re-add hash to timer file * Fish completion updates * Fix null timer yield value * fix type hints * Remove timer-summary-file option * Add "." in front of non-package timer name --------- Co-authored-by: Harmen Stoppels <harmenstoppels@gmail.com> Co-authored-by: Harmen Stoppels <me@harmenstoppels.nl>
This commit is contained in:
parent
c5fc794d77
commit
8ec1657136
@ -48,6 +48,7 @@
|
|||||||
import spack.util.gpg
|
import spack.util.gpg
|
||||||
import spack.util.spack_json as sjson
|
import spack.util.spack_json as sjson
|
||||||
import spack.util.spack_yaml as syaml
|
import spack.util.spack_yaml as syaml
|
||||||
|
import spack.util.timer as timer
|
||||||
import spack.util.url as url_util
|
import spack.util.url as url_util
|
||||||
import spack.util.web as web_util
|
import spack.util.web as web_util
|
||||||
from spack.caches import misc_cache_location
|
from spack.caches import misc_cache_location
|
||||||
@ -1798,10 +1799,11 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
|
|||||||
m.linkname = m.linkname[result.end() :]
|
m.linkname = m.linkname[result.end() :]
|
||||||
|
|
||||||
|
|
||||||
def extract_tarball(spec, download_result, unsigned=False, force=False):
|
def extract_tarball(spec, download_result, unsigned=False, force=False, timer=timer.NULL_TIMER):
|
||||||
"""
|
"""
|
||||||
extract binary tarball for given package into install area
|
extract binary tarball for given package into install area
|
||||||
"""
|
"""
|
||||||
|
timer.start("extract")
|
||||||
if os.path.exists(spec.prefix):
|
if os.path.exists(spec.prefix):
|
||||||
if force:
|
if force:
|
||||||
shutil.rmtree(spec.prefix)
|
shutil.rmtree(spec.prefix)
|
||||||
@ -1881,7 +1883,9 @@ def extract_tarball(spec, download_result, unsigned=False, force=False):
|
|||||||
|
|
||||||
os.remove(tarfile_path)
|
os.remove(tarfile_path)
|
||||||
os.remove(specfile_path)
|
os.remove(specfile_path)
|
||||||
|
timer.stop("extract")
|
||||||
|
|
||||||
|
timer.start("relocate")
|
||||||
try:
|
try:
|
||||||
relocate_package(spec)
|
relocate_package(spec)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -1902,6 +1906,7 @@ def extract_tarball(spec, download_result, unsigned=False, force=False):
|
|||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
os.remove(filename)
|
os.remove(filename)
|
||||||
_delete_staged_downloads(download_result)
|
_delete_staged_downloads(download_result)
|
||||||
|
timer.stop("relocate")
|
||||||
|
|
||||||
|
|
||||||
def _ensure_common_prefix(tar: tarfile.TarFile) -> str:
|
def _ensure_common_prefix(tar: tarfile.TarFile) -> str:
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
import spack.hash_types as ht
|
import spack.hash_types as ht
|
||||||
import spack.mirror
|
import spack.mirror
|
||||||
import spack.util.gpg as gpg_util
|
import spack.util.gpg as gpg_util
|
||||||
|
import spack.util.timer as timer
|
||||||
import spack.util.url as url_util
|
import spack.util.url as url_util
|
||||||
import spack.util.web as web_util
|
import spack.util.web as web_util
|
||||||
|
|
||||||
@ -253,6 +254,8 @@ def ci_rebuild(args):
|
|||||||
check a single spec against the remote mirror, and rebuild it from source if the mirror does
|
check a single spec against the remote mirror, and rebuild it from source if the mirror does
|
||||||
not contain the hash
|
not contain the hash
|
||||||
"""
|
"""
|
||||||
|
rebuild_timer = timer.Timer()
|
||||||
|
|
||||||
env = spack.cmd.require_active_env(cmd_name="ci rebuild")
|
env = spack.cmd.require_active_env(cmd_name="ci rebuild")
|
||||||
|
|
||||||
# Make sure the environment is "gitlab-enabled", or else there's nothing
|
# Make sure the environment is "gitlab-enabled", or else there's nothing
|
||||||
@ -736,6 +739,14 @@ def ci_rebuild(args):
|
|||||||
|
|
||||||
print(reproduce_msg)
|
print(reproduce_msg)
|
||||||
|
|
||||||
|
rebuild_timer.stop()
|
||||||
|
try:
|
||||||
|
with open("install_timers.json", "w") as timelog:
|
||||||
|
extra_attributes = {"name": ".ci-rebuild"}
|
||||||
|
rebuild_timer.write_json(timelog, extra_attributes=extra_attributes)
|
||||||
|
except Exception as e:
|
||||||
|
tty.debug(str(e))
|
||||||
|
|
||||||
# Tie job success/failure to the success/failure of building the spec
|
# Tie job success/failure to the success/failure of building the spec
|
||||||
return install_exit_code
|
return install_exit_code
|
||||||
|
|
||||||
|
@ -90,6 +90,16 @@
|
|||||||
STATUS_REMOVED = "removed"
|
STATUS_REMOVED = "removed"
|
||||||
|
|
||||||
|
|
||||||
|
def _write_timer_json(pkg, timer, cache):
|
||||||
|
extra_attributes = {"name": pkg.name, "cache": cache, "hash": pkg.spec.dag_hash()}
|
||||||
|
try:
|
||||||
|
with open(pkg.times_log_path, "w") as timelog:
|
||||||
|
timer.write_json(timelog, extra_attributes=extra_attributes)
|
||||||
|
except Exception as e:
|
||||||
|
tty.debug(str(e))
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
class InstallAction:
|
class InstallAction:
|
||||||
#: Don't perform an install
|
#: Don't perform an install
|
||||||
NONE = 0
|
NONE = 0
|
||||||
@ -399,6 +409,8 @@ def _install_from_cache(
|
|||||||
return False
|
return False
|
||||||
t.stop()
|
t.stop()
|
||||||
tty.debug("Successfully extracted {0} from binary cache".format(pkg_id))
|
tty.debug("Successfully extracted {0} from binary cache".format(pkg_id))
|
||||||
|
|
||||||
|
_write_timer_json(pkg, t, True)
|
||||||
_print_timer(pre=_log_prefix(pkg.name), pkg_id=pkg_id, timer=t)
|
_print_timer(pre=_log_prefix(pkg.name), pkg_id=pkg_id, timer=t)
|
||||||
_print_installed_pkg(pkg.spec.prefix)
|
_print_installed_pkg(pkg.spec.prefix)
|
||||||
spack.hooks.post_install(pkg.spec, explicit)
|
spack.hooks.post_install(pkg.spec, explicit)
|
||||||
@ -481,7 +493,7 @@ def _process_binary_cache_tarball(
|
|||||||
|
|
||||||
with timer.measure("install"), spack.util.path.filter_padding():
|
with timer.measure("install"), spack.util.path.filter_padding():
|
||||||
binary_distribution.extract_tarball(
|
binary_distribution.extract_tarball(
|
||||||
pkg.spec, download_result, unsigned=unsigned, force=False
|
pkg.spec, download_result, unsigned=unsigned, force=False, timer=timer
|
||||||
)
|
)
|
||||||
|
|
||||||
pkg.installed_from_binary_cache = True
|
pkg.installed_from_binary_cache = True
|
||||||
@ -2093,7 +2105,6 @@ def install(self) -> None:
|
|||||||
# another process has a write lock so must be (un)installing
|
# another process has a write lock so must be (un)installing
|
||||||
# the spec (or that process is hung).
|
# the spec (or that process is hung).
|
||||||
ltype, lock = self._ensure_locked("read", pkg)
|
ltype, lock = self._ensure_locked("read", pkg)
|
||||||
|
|
||||||
# Requeue the spec if we cannot get at least a read lock so we
|
# Requeue the spec if we cannot get at least a read lock so we
|
||||||
# can check the status presumably established by another process
|
# can check the status presumably established by another process
|
||||||
# -- failed, installed, or uninstalled -- on the next pass.
|
# -- failed, installed, or uninstalled -- on the next pass.
|
||||||
@ -2373,8 +2384,7 @@ def run(self) -> bool:
|
|||||||
|
|
||||||
# Stop the timer and save results
|
# Stop the timer and save results
|
||||||
self.timer.stop()
|
self.timer.stop()
|
||||||
with open(self.pkg.times_log_path, "w") as timelog:
|
_write_timer_json(self.pkg, self.timer, False)
|
||||||
self.timer.write_json(timelog)
|
|
||||||
|
|
||||||
print_install_test_log(self.pkg)
|
print_install_test_log(self.pkg)
|
||||||
_print_timer(pre=self.pre, pkg_id=self.pkg_id, timer=self.timer)
|
_print_timer(pre=self.pre, pkg_id=self.pkg_id, timer=self.timer)
|
||||||
|
@ -126,8 +126,8 @@ def test_timer_write():
|
|||||||
|
|
||||||
deserialized = json.loads(json_buffer.getvalue())
|
deserialized = json.loads(json_buffer.getvalue())
|
||||||
assert deserialized == {
|
assert deserialized == {
|
||||||
"phases": [{"name": "timer", "seconds": 1.0}],
|
"phases": [{"name": "timer", "path": "timer", "seconds": 1.0, "count": 1}],
|
||||||
"total": {"seconds": 3.0},
|
"total": 3.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,31 +13,32 @@
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from typing import Dict
|
from typing import Callable, Dict, List
|
||||||
|
|
||||||
from llnl.util.lang import pretty_seconds_formatter
|
from llnl.util.lang import pretty_seconds_formatter
|
||||||
|
|
||||||
import spack.util.spack_json as sjson
|
import spack.util.spack_json as sjson
|
||||||
|
|
||||||
Interval = collections.namedtuple("Interval", ("begin", "end"))
|
TimerEvent = collections.namedtuple("TimerEvent", ("time", "running", "label"))
|
||||||
|
TimeTracker = collections.namedtuple("TimeTracker", ("total", "start", "count", "path"))
|
||||||
|
|
||||||
#: name for the global timer (used in start(), stop(), duration() without arguments)
|
#: name for the global timer (used in start(), stop(), duration() without arguments)
|
||||||
global_timer_name = "_global"
|
global_timer_name = "_global"
|
||||||
|
|
||||||
|
|
||||||
class BaseTimer:
|
class BaseTimer:
|
||||||
def start(self, name=global_timer_name):
|
def start(self, name=None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def stop(self, name=global_timer_name):
|
def stop(self, name=None):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def duration(self, name=global_timer_name):
|
def duration(self, name=None):
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def measure(self, name):
|
def measure(self, name):
|
||||||
yield
|
yield self
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def phases(self):
|
def phases(self):
|
||||||
@ -60,16 +61,18 @@ class NullTimer(BaseTimer):
|
|||||||
class Timer(BaseTimer):
|
class Timer(BaseTimer):
|
||||||
"""Simple interval timer"""
|
"""Simple interval timer"""
|
||||||
|
|
||||||
def __init__(self, now=time.time):
|
def __init__(self, now: Callable[[], float] = time.time):
|
||||||
"""
|
"""
|
||||||
Arguments:
|
Arguments:
|
||||||
now: function that gives the seconds since e.g. epoch
|
now: function that gives the seconds since e.g. epoch
|
||||||
"""
|
"""
|
||||||
self._now = now
|
self._now = now
|
||||||
self._timers: Dict[str, Interval] = collections.OrderedDict()
|
self._timers: Dict[str, TimeTracker] = {}
|
||||||
|
self._timer_stack: List[str] = []
|
||||||
|
|
||||||
# _global is the overal timer since the instance was created
|
self._events: List[TimerEvent] = []
|
||||||
self._timers[global_timer_name] = Interval(self._now(), end=None)
|
# Push start event
|
||||||
|
self._events.append(TimerEvent(self._now(), True, global_timer_name))
|
||||||
|
|
||||||
def start(self, name=global_timer_name):
|
def start(self, name=global_timer_name):
|
||||||
"""
|
"""
|
||||||
@ -79,7 +82,7 @@ def start(self, name=global_timer_name):
|
|||||||
name (str): Optional name of the timer. When no name is passed, the
|
name (str): Optional name of the timer. When no name is passed, the
|
||||||
global timer is started.
|
global timer is started.
|
||||||
"""
|
"""
|
||||||
self._timers[name] = Interval(self._now(), None)
|
self._events.append(TimerEvent(self._now(), True, name))
|
||||||
|
|
||||||
def stop(self, name=global_timer_name):
|
def stop(self, name=global_timer_name):
|
||||||
"""
|
"""
|
||||||
@ -90,10 +93,7 @@ def stop(self, name=global_timer_name):
|
|||||||
name (str): Optional name of the timer. When no name is passed, all
|
name (str): Optional name of the timer. When no name is passed, all
|
||||||
timers are stopped.
|
timers are stopped.
|
||||||
"""
|
"""
|
||||||
interval = self._timers.get(name, None)
|
self._events.append(TimerEvent(self._now(), False, name))
|
||||||
if not interval:
|
|
||||||
return
|
|
||||||
self._timers[name] = Interval(interval.begin, self._now())
|
|
||||||
|
|
||||||
def duration(self, name=global_timer_name):
|
def duration(self, name=global_timer_name):
|
||||||
"""
|
"""
|
||||||
@ -107,13 +107,13 @@ def duration(self, name=global_timer_name):
|
|||||||
Returns:
|
Returns:
|
||||||
float: duration of timer.
|
float: duration of timer.
|
||||||
"""
|
"""
|
||||||
try:
|
self._flatten()
|
||||||
interval = self._timers[name]
|
if name in self._timers:
|
||||||
except KeyError:
|
if name in self._timer_stack:
|
||||||
|
return self._timers[name].total + (self._now() - self._timers[name].start)
|
||||||
|
return self._timers[name].total
|
||||||
|
else:
|
||||||
return 0.0
|
return 0.0
|
||||||
# Take either the interval end, the global timer, or now.
|
|
||||||
end = interval.end or self._timers[global_timer_name].end or self._now()
|
|
||||||
return end - interval.begin
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def measure(self, name):
|
def measure(self, name):
|
||||||
@ -123,23 +123,72 @@ def measure(self, name):
|
|||||||
Arguments:
|
Arguments:
|
||||||
name (str): Name of the timer
|
name (str): Name of the timer
|
||||||
"""
|
"""
|
||||||
begin = self._now()
|
self.start(name)
|
||||||
yield
|
yield self
|
||||||
self._timers[name] = Interval(begin, self._now())
|
self.stop(name)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def phases(self):
|
def phases(self):
|
||||||
"""Get all named timers (excluding the global/total timer)"""
|
"""Get all named timers (excluding the global/total timer)"""
|
||||||
return [k for k in self._timers.keys() if k != global_timer_name]
|
self._flatten()
|
||||||
|
return [k for k in self._timers.keys() if not k == global_timer_name]
|
||||||
|
|
||||||
def write_json(self, out=sys.stdout):
|
def _flatten(self):
|
||||||
|
for event in self._events:
|
||||||
|
if event.running:
|
||||||
|
if event.label not in self._timer_stack:
|
||||||
|
self._timer_stack.append(event.label)
|
||||||
|
# Only start the timer if it is on top of the stack
|
||||||
|
# restart doesn't work after a subtimer is started
|
||||||
|
if event.label == self._timer_stack[-1]:
|
||||||
|
timer_path = "/".join(self._timer_stack[1:])
|
||||||
|
tracker = self._timers.get(
|
||||||
|
event.label, TimeTracker(0.0, event.time, 0, timer_path)
|
||||||
|
)
|
||||||
|
assert tracker.path == timer_path
|
||||||
|
self._timers[event.label] = TimeTracker(
|
||||||
|
tracker.total, event.time, tracker.count, tracker.path
|
||||||
|
)
|
||||||
|
else: # if not event.running:
|
||||||
|
if event.label in self._timer_stack:
|
||||||
|
index = self._timer_stack.index(event.label)
|
||||||
|
for label in self._timer_stack[index:]:
|
||||||
|
tracker = self._timers[label]
|
||||||
|
self._timers[label] = TimeTracker(
|
||||||
|
tracker.total + (event.time - tracker.start),
|
||||||
|
None,
|
||||||
|
tracker.count + 1,
|
||||||
|
tracker.path,
|
||||||
|
)
|
||||||
|
self._timer_stack = self._timer_stack[: max(0, index)]
|
||||||
|
# clear events
|
||||||
|
self._events = []
|
||||||
|
|
||||||
|
def write_json(self, out=sys.stdout, extra_attributes={}):
|
||||||
"""Write a json object with times to file"""
|
"""Write a json object with times to file"""
|
||||||
phases = [{"name": p, "seconds": self.duration(p)} for p in self.phases]
|
self._flatten()
|
||||||
times = {"phases": phases, "total": {"seconds": self.duration()}}
|
data = {
|
||||||
out.write(sjson.dump(times))
|
"total": self._timers[global_timer_name].total,
|
||||||
|
"phases": [
|
||||||
|
{
|
||||||
|
"name": phase,
|
||||||
|
"path": self._timers[phase].path,
|
||||||
|
"seconds": self._timers[phase].total,
|
||||||
|
"count": self._timers[phase].count,
|
||||||
|
}
|
||||||
|
for phase in self.phases
|
||||||
|
],
|
||||||
|
}
|
||||||
|
if extra_attributes:
|
||||||
|
data.update(extra_attributes)
|
||||||
|
if out:
|
||||||
|
out.write(sjson.dump(data))
|
||||||
|
else:
|
||||||
|
return data
|
||||||
|
|
||||||
def write_tty(self, out=sys.stdout):
|
def write_tty(self, out=sys.stdout):
|
||||||
"""Write a human-readable summary of timings"""
|
"""Write a human-readable summary of timings (depth is 1)"""
|
||||||
|
self._flatten()
|
||||||
|
|
||||||
times = [self.duration(p) for p in self.phases]
|
times = [self.duration(p) for p in self.phases]
|
||||||
|
|
||||||
|
@ -20,7 +20,11 @@ ci:
|
|||||||
- k=$CI_GPG_KEY_ROOT/intermediate_ci_signing_key.gpg; [[ -r $k ]] && spack gpg trust $k
|
- k=$CI_GPG_KEY_ROOT/intermediate_ci_signing_key.gpg; [[ -r $k ]] && spack gpg trust $k
|
||||||
- k=$CI_GPG_KEY_ROOT/spack_public_key.gpg; [[ -r $k ]] && spack gpg trust $k
|
- k=$CI_GPG_KEY_ROOT/spack_public_key.gpg; [[ -r $k ]] && spack gpg trust $k
|
||||||
script::
|
script::
|
||||||
- spack --color=always --backtrace ci rebuild --tests > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
|
- - spack --color=always --backtrace ci rebuild --tests > >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_out.txt) 2> >(tee ${SPACK_ARTIFACTS_ROOT}/user_data/pipeline_err.txt >&2)
|
||||||
|
- - spack python ${CI_PROJECT_DIR}/share/spack/gitlab/cloud_pipelines/scripts/common/aggregate_package_logs.spack.py
|
||||||
|
--prefix /home/software/spack:${CI_PROJECT_DIR}
|
||||||
|
--log install_times.json
|
||||||
|
${SPACK_ARTIFACTS_ROOT}/user_data/install_times.json
|
||||||
after_script:
|
after_script:
|
||||||
- - cat /proc/loadavg || true
|
- - cat /proc/loadavg || true
|
||||||
variables:
|
variables:
|
||||||
|
@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env spack-python
|
||||||
|
"""
|
||||||
|
This script is meant to be run using:
|
||||||
|
`spack python aggregate_logs.spack.py`
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def find_logs(prefix, filename):
|
||||||
|
for root, _, files in os.walk(prefix):
|
||||||
|
if filename in files:
|
||||||
|
yield os.path.join(root, filename)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import json
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
parser = ArgumentParser("aggregate_logs")
|
||||||
|
parser.add_argument("output_file")
|
||||||
|
parser.add_argument("--log", default="install_times.json")
|
||||||
|
parser.add_argument("--prefix", required=True)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
prefixes = [p for p in args.prefix.split(":") if os.path.exists(p)]
|
||||||
|
|
||||||
|
# Aggregate the install timers into a single json
|
||||||
|
data = []
|
||||||
|
for prefix in prefixes:
|
||||||
|
time_logs = find_logs(prefix, args.log)
|
||||||
|
for log in time_logs:
|
||||||
|
with open(log) as fd:
|
||||||
|
data.append(json.load(fd))
|
||||||
|
|
||||||
|
with open(args.output_file, "w") as fd:
|
||||||
|
json.dump(data, fd)
|
Loading…
Reference in New Issue
Block a user