test refactor: refactoring of .github/integration-test.py

2025-12-18 21:54:05 +08:00 · 2023-06-06 16:53:16 +02:00
parent 8f4cee1e46
commit 835c6a1154
6 changed files with 173 additions and 182 deletions
--- a/.github/integration-test.py
+++ b/.github/integration-test.py
@@ -1,154 +1,160 @@
 #!/usr/bin/env python3
 import argparse
+import functools
 import os
 import subprocess
 import time
 from shutil import which

+GIT_REPO_PATH = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+TEST_IMAGE_NAME = "test-systemd"

-def container_runtime():
+
+@functools.lru_cache()
+def _get_container_runtime_cli():
    runtimes = ["docker", "podman"]
    for runtime in runtimes:
        if which(runtime):
            return runtime
-    raise RuntimeError(f"No container runtime found, tried: {' '.join(runtimes)}")
+    raise RuntimeError(f"No container runtime CLI found, tried: {' '.join(runtimes)}")


-def container_check_output(*args, **kwargs):
-    cmd = [container_runtime()] + list(*args)
-    print(f"Running {cmd} {kwargs}")
-    return subprocess.check_output(cmd, **kwargs)
+def _cli(args, log_failure=True):
+    cmd = [_get_container_runtime_cli(), *args]
+    try:
+        return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError:
+        if log_failure:
+            print(f"{cmd} failed!")
+        raise


-def container_run(*args, **kwargs):
-    cmd = [container_runtime()] + list(*args)
-    print(f"Running {cmd} {kwargs}")
-    return subprocess.run(cmd, **kwargs)
-
-
-def build_systemd_image(image_name, source_path, build_args=None):
+def _await_container_startup(container_name, timeout=60):
    """
-    Build docker image with systemd at source_path.
-
-    Built image is tagged with image_name
+    Await container to become ready, as checked by attempting to run a basic
+    command (id) inside it.
    """
-    cmd = ["build", f"-t={image_name}", source_path]
-    if build_args:
-        cmd.extend([f"--build-arg={ba}" for ba in build_args])
-    container_check_output(cmd)
-
-
-def check_container_ready(container_name, timeout=60):
-    """
-    Check if container is ready to run tests
-    """
-    now = time.time()
+    start = time.time()
    while True:
        try:
-            out = container_check_output(["exec", "-t", container_name, "id"])
-            print(out.decode())
+            _cli(["exec", "-t", container_name, "id"], log_failure=False)
            return
-        except subprocess.CalledProcessError as e:
-            print(e)
-            try:
-                out = container_check_output(["inspect", container_name])
-                print(out.decode())
-            except subprocess.CalledProcessError as e:
-                print(e)
-            try:
-                out = container_check_output(["logs", container_name])
-                print(out.decode())
-            except subprocess.CalledProcessError as e:
-                print(e)
-            if time.time() - now > timeout:
-                raise RuntimeError(f"Container {container_name} hasn't started")
-            time.sleep(5)
+        except subprocess.CalledProcessError:
+            if time.time() - start > timeout:
+                inspect = ""
+                logs = ""
+                try:
+                    inspect = _cli(["inspect", container_name], log_failure=False)
+                except subprocess.CalledProcessError as e:
+                    inspect = e.output
+                try:
+                    logs = _cli(["logs", container_name], log_failure=False)
+                except subprocess.CalledProcessError as e:
+                    logs = e.output
+                raise RuntimeError(
+                    f"Container {container_name} failed to start! Debugging info follows...\n\n"
+                    f"> docker inspect {container_name}\n"
+                    "----------------------------------------\n"
+                    f"{inspect}\n"
+                    f"> docker logs {container_name}\n"
+                    "----------------------------------------\n"
+                    f"{logs}\n"
+                )
+            time.sleep(1)


-def run_systemd_image(image_name, container_name, bootstrap_pip_spec):
+def build_image(build_args=None):
    """
-    Run docker image with systemd
+    Build Dockerfile with systemd in the integration-tests folder to run tests
+    from.
+    """
+    cmd = [
+        _get_container_runtime_cli(),
+        "build",
+        f"--tag={TEST_IMAGE_NAME}",
+        "integration-tests",
+    ]
+    if build_args:
+        cmd.extend([f"--build-arg={ba}" for ba in build_args])

-    Image named image_name should be built with build_systemd_image.
+    subprocess.run(cmd, check=True, text=True)

-    Container named container_name will be started.
+
+def start_container(container_name, bootstrap_pip_spec):
+    """
+    Starts a container based on an image expected to start systemd.
    """
    cmd = [
        "run",
-        "--privileged",
+        "--rm",
        "--detach",
+        "--privileged",
        f"--name={container_name}",
        # A bit less than 1GB to ensure TLJH runs on 1GB VMs.
        # If this is changed all docs references to the required memory must be changed too.
        "--memory=900m",
    ]
-
    if bootstrap_pip_spec:
-        cmd.append("-e")
-        cmd.append(f"TLJH_BOOTSTRAP_PIP_SPEC={bootstrap_pip_spec}")
+        cmd.append(f"--env=TLJH_BOOTSTRAP_PIP_SPEC={bootstrap_pip_spec}")
+    else:
+        cmd.append("--env=TLJH_BOOTSTRAP_DEV=yes")
+        cmd.append("--env=TLJH_BOOTSTRAP_PIP_SPEC=/srv/src")
+    cmd.append(TEST_IMAGE_NAME)

-    cmd.append(image_name)
-
-    container_check_output(cmd)
+    return _cli(cmd)


 def stop_container(container_name):
    """
-    Stop & remove docker container if it exists.
+    Stop and remove docker container if it exists.
    """
    try:
-        container_check_output(["inspect", container_name], stderr=subprocess.STDOUT)
+        return _cli(["rm", "--force", container_name], log_failure=False)
    except subprocess.CalledProcessError:
-        # No such container exists, nothing to do
-        return
-    container_check_output(["rm", "-f", container_name])
+        pass


-def run_container_command(container_name, cmd):
+def run_command(container_name, cmd):
    """
-    Run cmd in a running container with a bash shell
+    Run a bash command in a running container and error if it fails
    """
-    proc = container_run(
-        ["exec", "-t", container_name, "/bin/bash", "-c", cmd],
-        check=True,
-    )
+    cmd = [
+        _get_container_runtime_cli(),
+        "exec",
+        "-t",
+        container_name,
+        "/bin/bash",
+        "-c",
+        cmd,
+    ]
+    subprocess.run(cmd, check=True, text=True)


 def copy_to_container(container_name, src_path, dest_path):
    """
-    Copy files from src_path to dest_path inside container_name
+    Copy files from a path on the local file system to a destination in a
+    running container
    """
-    container_check_output(["cp", src_path, f"{container_name}:{dest_path}"])
+    _cli(["cp", src_path, f"{container_name}:{dest_path}"])


 def run_test(
-    image_name,
-    test_name,
+    container_name,
    bootstrap_pip_spec,
    test_files,
    upgrade_from,
    installer_args,
 ):
    """
-    Starts a new container based on image_name, runs the bootstrap script to
-    setup tljh with installer_args, and runs test_name.
+    (Re-)starts a named container with given (Systemd based) image, then runs
+    the bootstrap script inside it to setup tljh with installer_args.
+
+    Thereafter, source files are copied to the container and
    """
-    stop_container(test_name)
-    run_systemd_image(image_name, test_name, bootstrap_pip_spec)
-
-    check_container_ready(test_name)
-
-    source_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
-
-    copy_to_container(test_name, os.path.join(source_path, "bootstrap/."), "/srv/src")
-    copy_to_container(
-        test_name, os.path.join(source_path, "integration-tests/"), "/srv/src"
-    )
-
-    # These logs can be very relevant to debug a container startup failure
-    print(f"--- Start of logs from the container: {test_name}")
-    print(container_check_output(["logs", test_name]).decode())
-    print(f"--- End of logs from the container: {test_name}")
+    stop_container(container_name)
+    start_container(container_name, bootstrap_pip_spec)
+    _await_container_startup(container_name)
+    copy_to_container(container_name, GIT_REPO_PATH, "/srv/src")

    # To test upgrades, we run a bootstrap.py script two times instead of one,
    # where the initial run first installs some older version.
@@ -156,38 +162,32 @@ def run_test(
    # We want to support testing a PR by upgrading from "main", "latest" (latest
    # released version), and from a previous major-like version.
    #
-    # FIXME: We currently always rely on the main branch's bootstrap.py script.
-    #        Realistically, we should run previous versions of the bootstrap
-    #        script which also installs previous versions of TLJH.
-    #
-    #        2023-04-15 Erik observed that https://tljh.jupyter.org/bootstrap.py
-    #        is referencing to the master (now main) branch which didn't seem
-    #        obvious, thinking it could have been the latest released version
-    #        also.
-    #
    if upgrade_from:
-        run_container_command(
-            test_name,
-            f"curl -L https://tljh.jupyter.org/bootstrap.py | python3 - --version={upgrade_from}",
+        run_command(
+            container_name,
+            f"python3 /srv/src/bootstrap/bootstrap.py --version={upgrade_from}",
        )
-    run_container_command(test_name, f"python3 /srv/src/bootstrap.py {installer_args}")
+    run_command(
+        container_name, f"python3 /srv/src/bootstrap/bootstrap.py {installer_args}"
+    )

    # Install pkgs from requirements in hub's pip, where
    # the bootstrap script installed the others
-    run_container_command(
-        test_name,
+    run_command(
+        container_name,
        "/opt/tljh/hub/bin/python3 -m pip install -r /srv/src/integration-tests/requirements.txt",
    )

    # show environment
-    run_container_command(
-        test_name,
+    run_command(
+        container_name,
        "/opt/tljh/hub/bin/python3 -m pip freeze",
    )

-    run_container_command(
-        test_name,
-        "/opt/tljh/hub/bin/python3 -m pytest --capture=no {}".format(
+    # run tests
+    run_command(
+        container_name,
+        "/opt/tljh/hub/bin/python3 -m pytest {}".format(
            " ".join(
                [os.path.join("/srv/src/integration-tests/", f) for f in test_files]
            )
@@ -197,12 +197,20 @@ def run_test(

 def show_logs(container_name):
    """
-    Print logs from inside container to stdout
+    Print jupyterhub and traefik status and logs from both.
+
+    tljh logs ref: https://tljh.jupyter.org/en/latest/troubleshooting/logs.html
    """
-    run_container_command(container_name, "journalctl --no-pager")
-    run_container_command(
+    systemctl = run_command(
        container_name, "systemctl --no-pager status jupyterhub traefik"
    )
+    print(systemctl)
+
+    jupyterhub_logs = run_command(container_name, "journalctl --no-pager -u jupyterhub")
+    print(jupyterhub_logs)
+
+    traefik_logs = run_command(container_name, "journalctl --no-pager -u traefik")
+    print(traefik_logs)


 def main():
@@ -210,18 +218,14 @@ def main():
    subparsers = argparser.add_subparsers(dest="action")

    build_image_parser = subparsers.add_parser("build-image")
-    build_image_parser.add_argument(
-        "--build-arg",
-        action="append",
-        dest="build_args",
-    )
-
-    stop_container_parser = subparsers.add_parser("stop-container")
-    stop_container_parser.add_argument("container_name")
+    build_image_parser.add_argument("--build-arg", action="append", dest="build_args")

    start_container_parser = subparsers.add_parser("start-container")
    start_container_parser.add_argument("container_name")

+    stop_container_parser = subparsers.add_parser("stop-container")
+    stop_container_parser.add_argument("container_name")
+
    run_parser = subparsers.add_parser("run")
    run_parser.add_argument("container_name")
    run_parser.add_argument("command")
@@ -234,10 +238,8 @@ def main():
    run_test_parser = subparsers.add_parser("run-test")
    run_test_parser.add_argument("--installer-args", default="")
    run_test_parser.add_argument("--upgrade-from", default="")
-    run_test_parser.add_argument(
-        "--bootstrap-pip-spec", nargs="?", default="", type=str
-    )
-    run_test_parser.add_argument("test_name")
+    run_test_parser.add_argument("--bootstrap-pip-spec", default="/srv/src")
+    run_test_parser.add_argument("container_name")
    run_test_parser.add_argument("test_files", nargs="+")

    show_logs_parser = subparsers.add_parser("show-logs")
@@ -245,12 +247,19 @@ def main():

    args = argparser.parse_args()

-    image_name = "tljh-systemd"
-
-    if args.action == "run-test":
+    if args.action == "build-image":
+        build_image(args.build_args)
+    elif args.action == "start-container":
+        start_container(args.container_name, args.bootstrap_pip_spec)
+    elif args.action == "stop-container":
+        stop_container(args.container_name)
+    elif args.action == "run":
+        run_command(args.container_name, args.command)
+    elif args.action == "copy":
+        copy_to_container(args.container_name, args.src, args.dest)
+    elif args.action == "run-test":
        run_test(
-            image_name,
-            args.test_name,
+            args.container_name,
            args.bootstrap_pip_spec,
            args.test_files,
            args.upgrade_from,
@@ -258,16 +267,6 @@ def main():
        )
    elif args.action == "show-logs":
        show_logs(args.container_name)
-    elif args.action == "run":
-        run_container_command(args.container_name, args.command)
-    elif args.action == "copy":
-        copy_to_container(args.container_name, args.src, args.dest)
-    elif args.action == "start-container":
-        run_systemd_image(image_name, args.container_name, args.bootstrap_pip_spec)
-    elif args.action == "stop-container":
-        stop_container(args.container_name)
-    elif args.action == "build-image":
-        build_systemd_image(image_name, "integration-tests", args.build_args)


 if __name__ == "__main__":