diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index 4c04c11..1b7939e 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -69,7 +69,7 @@ jobs: run: | pytest --verbose --maxfail=2 --color=yes --durations=10 --capture=no \ integration-tests/test_bootstrap.py - timeout-minutes: 15 + timeout-minutes: 20 env: # integration-tests/test_bootstrap.py will build and start containers # based on this environment variable. This is similar to how diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml index e93c0ce..338cc9c 100644 --- a/.github/workflows/unit-test.yaml +++ b/.github/workflows/unit-test.yaml @@ -59,6 +59,7 @@ jobs: apt-get update apt-get install --yes \ python3-venv \ + bzip2 \ git python3 -m venv /srv/venv diff --git a/dev-requirements.txt b/dev-requirements.txt index e1e3068..9f14bc3 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,4 @@ +packaging pytest pytest-cov pytest-mock diff --git a/integration-tests/Dockerfile b/integration-tests/Dockerfile index 447bcb7..22cd2d4 100644 --- a/integration-tests/Dockerfile +++ b/integration-tests/Dockerfile @@ -8,6 +8,7 @@ RUN export DEBIAN_FRONTEND=noninteractive \ && apt-get update \ && apt-get install --yes \ systemd \ + bzip2 \ curl \ git \ sudo \ diff --git a/tests/test_conda.py b/tests/test_conda.py index a13ab39..46b7cac 100644 --- a/tests/test_conda.py +++ b/tests/test_conda.py @@ -2,6 +2,7 @@ Test conda commandline wrappers """ from tljh import conda +from tljh import installer import os import pytest import subprocess @@ -13,25 +14,13 @@ def prefix(): """ Provide a temporary directory with a mambaforge conda environment """ - # see https://github.com/conda-forge/miniforge/releases - mambaforge_version = "4.10.3-7" - if os.uname().machine == "aarch64": - installer_sha256 = ( - "ac95f137b287b3408e4f67f07a284357b1119ee157373b788b34e770ef2392b2" - ) - elif os.uname().machine == "x86_64": - installer_sha256 = ( - "fc872522ec427fcab10167a93e802efaf251024b58cc27b084b915a9a73c4474" - ) - installer_url = "https://github.com/conda-forge/miniforge/releases/download/{v}/Mambaforge-{v}-Linux-{arch}.sh".format( - v=mambaforge_version, arch=os.uname().machine - ) + machine = os.uname().machine + installer_url, checksum = installer._mambaforge_url() with tempfile.TemporaryDirectory() as tmpdir: with conda.download_miniconda_installer( - installer_url, installer_sha256 + installer_url, checksum ) as installer_path: conda.install_miniconda(installer_path, tmpdir) - conda.ensure_conda_packages(tmpdir, ["conda==4.10.3"]) yield tmpdir diff --git a/tests/test_installer.py b/tests/test_installer.py index 9b42d70..7675bd0 100644 --- a/tests/test_installer.py +++ b/tests/test_installer.py @@ -1,9 +1,16 @@ """ Unit test functions in installer.py """ +import json import os +from unittest import mock +from subprocess import run, PIPE + +from packaging.version import parse as V +from packaging.specifiers import SpecifierSet import pytest +from tljh import conda from tljh import installer from tljh.yaml import yaml @@ -36,3 +43,194 @@ def test_ensure_admins(tljh_dir, admins, expected_config): # verify the list was flattened assert config["users"]["admin"] == expected_config + + +def setup_conda(distro, version, prefix): + """Install mambaforge or miniconda in a prefix""" + if distro == "mambaforge": + installer_url, _ = installer._mambaforge_url(version) + elif distro == "miniforge": + installer_url, _ = installer._mambaforge_url(version) + installer_url = installer_url.replace("Mambaforge", "Miniforge3") + elif distro == "miniconda": + arch = os.uname().machine + installer_url = ( + f"https://repo.anaconda.com/miniconda/Miniconda3-{version}-Linux-{arch}.sh" + ) + else: + raise ValueError( + f"{distro=} must be 'miniconda' or 'mambaforge' or 'miniforge'" + ) + with conda.download_miniconda_installer(installer_url, None) as installer_path: + conda.install_miniconda(installer_path, str(prefix)) + # avoid auto-updating conda when we install other packages + run( + [ + str(prefix / "bin/conda"), + "config", + "--system", + "--set", + "auto_update_conda", + "false", + ], + input="", + check=True, + ) + + +@pytest.fixture +def user_env_prefix(tmp_path): + user_env_prefix = tmp_path / "user_env" + with mock.patch.object(installer, "USER_ENV_PREFIX", str(user_env_prefix)): + yield user_env_prefix + + +def _specifier(version): + """Convert version string to SpecifierSet + + If just a version number, add == to make it a specifier + + Any missing fields are replaced with .* + + If it's already a specifier string, pass it directly to SpecifierSet + + e.g. + + - 3.7 -> ==3.7.* + - 1.2.3 -> ==1.2.3 + """ + if version[0].isdigit(): + # it's a version number, not a specifier + if version.count(".") < 2: + # pad missing fields + version += ".*" + version = f"=={version}" + return SpecifierSet(version) + + +@pytest.mark.parametrize( + "distro, distro_version, expected_versions", + [ + # No previous install, start fresh + ( + None, + None, + { + "python": "3.10.*", + "conda": "23.1.0", + "mamba": "1.4.1", + }, + ), + # previous install, 1.0 + ( + "mambaforge", + "23.1.0-1", + { + "python": "3.10.*", + "conda": "23.1.0", + "mamba": "1.4.1", + }, + ), + # 0.2 install, no upgrade needed + ( + "mambaforge", + "4.10.3-7", + { + "conda": "4.10.3", + "mamba": "0.16.0", + "python": "3.9.*", + }, + ), + # simulate missing mamba + # will be installed but not pinned + # to avoid conflicts + ( + "miniforge", + "4.10.3-7", + { + "conda": "4.10.3", + "mamba": ">=1.1.0", + "python": "3.9.*", + }, + ), + # too-old Python (3.7), abort + ( + "miniconda", + "4.7.10", + ValueError, + ), + ], +) +def test_ensure_user_environment( + user_env_prefix, + distro, + distro_version, + expected_versions, +): + if ( + distro_version + and V(distro_version) < V("4.10.1") + and os.uname().machine == "aarch64" + ): + pytest.skip(f"{distro} {distro_version} not available for aarch64") + canary_file = user_env_prefix / "test-file.txt" + canary_package = "types-backports_abc" + if distro: + setup_conda(distro, distro_version, user_env_prefix) + # install a noarch: python package that won't be used otherwise + # should depend on Python, so it will interact with possible upgrades + pkgs = [canary_package] + run( + [ + str(user_env_prefix / "bin/conda"), + "install", + "-S", + "-y", + "-c", + "conda-forge", + ] + + pkgs, + input="", + check=True, + ) + + # make a file not managed by conda, to check for wipeouts + with canary_file.open("w") as f: + f.write("I'm here\n") + + if isinstance(expected_versions, type) and issubclass(expected_versions, Exception): + exc_class = expected_versions + with pytest.raises(exc_class): + installer.ensure_user_environment("") + return + else: + installer.ensure_user_environment("") + + p = run( + [str(user_env_prefix / "bin/conda"), "list", "--json"], + stdout=PIPE, + text=True, + check=True, + ) + package_list = json.loads(p.stdout) + packages = {package["name"]: package for package in package_list} + + if distro: + # make sure we didn't wipe out files + assert canary_file.exists() + # make sure we didn't delete the installed package + assert canary_package in packages + + for pkg, version in expected_versions.items(): + assert pkg in packages + assert V(packages[pkg]["version"]) in _specifier(version) + + +def test_ensure_user_environment_no_clobber(user_env_prefix): + # don't clobber existing user-env dir if it's non-empty and not a conda install + user_env_prefix.mkdir() + canary_file = user_env_prefix / "test-file.txt" + with canary_file.open("w") as f: + pass + with pytest.raises(OSError): + installer.ensure_user_environment("") diff --git a/tljh/conda.py b/tljh/conda.py index 88923f6..206e4df 100644 --- a/tljh/conda.py +++ b/tljh/conda.py @@ -6,9 +6,12 @@ import subprocess import json import hashlib import contextlib +import logging import tempfile +import time + import requests -from distutils.version import LooseVersion as V + from tljh import utils @@ -25,23 +28,21 @@ def sha256_file(fname): return hash_sha256.hexdigest() -def check_miniconda_version(prefix, version): - """ - Return true if a miniconda install with version exists at prefix - """ +def get_conda_package_versions(prefix): + """Get conda package versions, via `conda list --json`""" + versions = {} try: - installed_version = ( - subprocess.check_output( - [os.path.join(prefix, "bin", "conda"), "-V"], stderr=subprocess.STDOUT - ) - .decode() - .strip() - .split()[1] + out = subprocess.check_output( + [os.path.join(prefix, "bin", "conda"), "list", "--json"], + text=True, ) - return V(installed_version) >= V(version) except (subprocess.CalledProcessError, FileNotFoundError): - # Conda doesn't exist - return False + return versions + + packages = json.loads(out) + for package in packages: + versions[package["name"]] = package["version"] + return versions @contextlib.contextmanager @@ -53,14 +54,21 @@ def download_miniconda_installer(installer_url, sha256sum): of given version, verifies the sha256sum & provides path to it to the `with` block to run. """ - with tempfile.NamedTemporaryFile("wb") as f: - f.write(requests.get(installer_url).content) + logger = logging.getLogger("tljh") + logger.info(f"Downloading conda installer {installer_url}") + with tempfile.NamedTemporaryFile("wb", suffix=".sh") as f: + tic = time.perf_counter() + r = requests.get(installer_url) + r.raise_for_status() + f.write(r.content) # Remain in the NamedTemporaryFile context, but flush changes, see: # https://docs.python.org/3/library/os.html#os.fsync f.flush() os.fsync(f.fileno()) + t = time.perf_counter() - tic + logger.info(f"Downloaded conda installer {installer_url} in {t:.1f}s") - if sha256_file(f.name) != sha256sum: + if sha256sum and sha256_file(f.name) != sha256sum: raise Exception("sha256sum hash mismatch! Downloaded file corrupted") yield f.name @@ -97,39 +105,26 @@ def ensure_conda_packages(prefix, packages): Note that conda seem to update dependencies by default, so there is probably no need to have a update parameter exposed for this function. """ - conda_executable = [os.path.join(prefix, "bin", "mamba")] + conda_executable = os.path.join(prefix, "bin", "mamba") + if not os.path.isfile(conda_executable): + # fallback on conda if mamba is not present (e.g. for mamba to install itself) + conda_executable = os.path.join(prefix, "bin", "conda") + abspath = os.path.abspath(prefix) - # Let subprocess errors propagate - # Explicitly do *not* capture stderr, since that's not always JSON! - # Scripting conda is a PITA! - # FIXME: raise different exception when using - raw_output = subprocess.check_output( - conda_executable - + [ + + utils.run_subprocess( + [ + conda_executable, "install", + "-y", "-c", "conda-forge", # Make customizable if we ever need to - "--json", "--prefix", abspath, ] - + packages - ).decode() - # `conda install` outputs JSON lines for fetch updates, - # and a undelimited output at the end. There is no reasonable way to - # parse this outside of this kludge. - filtered_output = "\n".join( - [ - l - for l in raw_output.split("\n") - # Sometimes the JSON messages start with a \x00. The lstrip removes these. - # conda messages seem to randomly throw \x00 in places for no reason - if not l.lstrip("\x00").startswith('{"fetch"') - ] + + packages, + input="", ) - output = json.loads(filtered_output.lstrip("\x00")) - if "success" in output and output["success"] == True: - return fix_permissions(prefix) diff --git a/tljh/installer.py b/tljh/installer.py index ac49086..61d4d9c 100644 --- a/tljh/installer.py +++ b/tljh/installer.py @@ -26,6 +26,7 @@ from tljh import ( traefik, user, ) + from .config import ( CONFIG_DIR, CONFIG_FILE, @@ -34,6 +35,7 @@ from .config import ( STATE_DIR, USER_ENV_PREFIX, ) +from .utils import parse_version as V from .yaml import yaml HERE = os.path.abspath(os.path.dirname(__file__)) @@ -153,60 +155,115 @@ def ensure_usergroups(): f.write("Defaults exempt_group = jupyterhub-admins\n") +# Install mambaforge using an installer from +# https://github.com/conda-forge/miniforge/releases +MAMBAFORGE_VERSION = "23.1.0-1" +# sha256 checksums +MAMBAFORGE_CHECKSUMS = { + "aarch64": "d9d89c9e349369702171008d9ee7c5ce80ed420e5af60bd150a3db4bf674443a", + "x86_64": "cfb16c47dc2d115c8b114280aa605e322173f029fdb847a45348bf4bd23c62ab", +} + +# minimum versions of packages +MINIMUM_VERSIONS = { + # if conda/mamba are lower than this, upgrade them before installing the user packages + "mamba": "0.16.0", + "conda": "4.10", + # minimum Python version (if not matched, abort to avoid big disruptive updates) + "python": "3.9", +} + + +def _mambaforge_url(version=MAMBAFORGE_VERSION, arch=None): + """Return (URL, checksum) for mambaforge download for a given version and arch + + Default values provided for both version and arch + """ + if arch is None: + arch = os.uname().machine + installer_url = "https://github.com/conda-forge/miniforge/releases/download/{v}/Mambaforge-{v}-Linux-{arch}.sh".format( + v=version, + arch=arch, + ) + # Check system architecture, set appropriate installer checksum + checksum = MAMBAFORGE_CHECKSUMS.get(arch) + if not checksum: + raise ValueError( + f"Unsupported architecture: {arch}. TLJH only supports {','.join(MAMBAFORGE_CHECKSUMS.keys())}" + ) + return installer_url, checksum + + def ensure_user_environment(user_requirements_txt_file): """ Set up user conda environment with required packages """ logger.info("Setting up user environment...") - - miniconda_old_version = "4.5.4" - miniconda_new_version = "4.7.10" - # Install mambaforge using an installer from - # https://github.com/conda-forge/miniforge/releases - mambaforge_new_version = "4.10.3-7" - # Check system architecture, set appropriate installer checksum - if os.uname().machine == "aarch64": - installer_sha256 = ( - "ac95f137b287b3408e4f67f07a284357b1119ee157373b788b34e770ef2392b2" - ) - elif os.uname().machine == "x86_64": - installer_sha256 = ( - "fc872522ec427fcab10167a93e802efaf251024b58cc27b084b915a9a73c4474" - ) # Check OS, set appropriate string for conda installer path if os.uname().sysname != "Linux": raise OSError("TLJH is only supported on Linux platforms.") - # Then run `mamba --version` to get the conda and mamba versions - # Keep these in sync with tests/test_conda.py::prefix - mambaforge_conda_new_version = "4.10.3" - mambaforge_mamba_version = "0.16.0" - if conda.check_miniconda_version(USER_ENV_PREFIX, mambaforge_conda_new_version): - conda_version = "4.10.3" - elif conda.check_miniconda_version(USER_ENV_PREFIX, miniconda_new_version): - conda_version = "4.8.1" - elif conda.check_miniconda_version(USER_ENV_PREFIX, miniconda_old_version): - conda_version = "4.5.8" - # If no prior miniconda installation is found, we can install a newer version - else: + # Check the existing environment for what to do + package_versions = conda.get_conda_package_versions(USER_ENV_PREFIX) + + # Case 1: no existing environment + if not package_versions: + # 1a. no environment, but prefix exists. + # Abort to avoid clobbering something we don't recognize + if os.path.exists(USER_ENV_PREFIX) and os.listdir(USER_ENV_PREFIX): + msg = f"Found non-empty directory that is not a conda install in {USER_ENV_PREFIX}. Please remove it (or rename it to preserve files) and run tljh again." + logger.error(msg) + raise OSError(msg) + + # 1b. No environment, directory empty or doesn't exist + # start fresh install logger.info("Downloading & setting up user environment...") - installer_url = "https://github.com/conda-forge/miniforge/releases/download/{v}/Mambaforge-{v}-Linux-{arch}.sh".format( - v=mambaforge_new_version, arch=os.uname().machine - ) + installer_url, installer_sha256 = _mambaforge_url() with conda.download_miniconda_installer( installer_url, installer_sha256 ) as installer_path: conda.install_miniconda(installer_path, USER_ENV_PREFIX) - conda_version = "4.10.3" + package_versions = conda.get_conda_package_versions(USER_ENV_PREFIX) + # quick sanity check: we should have conda and mamba! + assert "conda" in package_versions + assert "mamba" in package_versions - conda.ensure_conda_packages( - USER_ENV_PREFIX, - [ - # Conda's latest version is on conda much more so than on PyPI. - "conda==" + conda_version, - "mamba==" + mambaforge_mamba_version, - ], - ) + # next, check Python + python_version = package_versions["python"] + logger.debug(f"Found python={python_version} in {USER_ENV_PREFIX}") + if V(python_version) < V(MINIMUM_VERSIONS["python"]): + msg = ( + f"TLJH requires Python >={MINIMUM_VERSIONS['python']}, found python={python_version} in {USER_ENV_PREFIX}." + f"\nPlease upgrade Python (may be highly disruptive!), or remove/rename {USER_ENV_PREFIX} to allow TLJH to make a fresh install." + f"\nYou can use `{USER_ENV_PREFIX}/bin/conda list` to save your current list of packages." + ) + logger.error(msg) + raise ValueError(msg) + + # at this point, we know we have an env ready with conda and are going to start installing + # first, check if we should upgrade/install conda and/or mamba + to_upgrade = [] + for pkg in ("conda", "mamba"): + version = package_versions.get(pkg) + min_version = MINIMUM_VERSIONS[pkg] + if not version: + logger.warning(f"{USER_ENV_PREFIX} is missing {pkg}, installing it...") + to_upgrade.append(pkg) + else: + logger.debug(f"Found {pkg}=={version} in {USER_ENV_PREFIX}") + if V(version) < V(min_version): + logger.info( + f"{USER_ENV_PREFIX} has {pkg}=={version}, it will be upgraded to {pkg}>={min_version}" + ) + to_upgrade.append(pkg) + + if to_upgrade: + conda.ensure_conda_packages( + USER_ENV_PREFIX, + # we _could_ explicitly pin Python here, + # but conda already does this by default + to_upgrade, + ) conda.ensure_pip_requirements( USER_ENV_PREFIX, diff --git a/tljh/utils.py b/tljh/utils.py index 0b61da6..8ab1ca8 100644 --- a/tljh/utils.py +++ b/tljh/utils.py @@ -2,6 +2,7 @@ Miscellaneous functions useful in at least two places unrelated to each other """ import logging +import re import subprocess # Copied into bootstrap/bootstrap.py. Make sure these two copies are exactly the same! @@ -24,10 +25,11 @@ def run_subprocess(cmd, *args, **kwargs): and failed output directly to the user's screen """ logger = logging.getLogger("tljh") + printable_command = " ".join(cmd) + logger.debug("Running %s", printable_command) proc = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, *args, **kwargs ) - printable_command = " ".join(cmd) if proc.returncode != 0: # Our process failed! Show output to the user logger.error( @@ -59,3 +61,14 @@ def get_plugin_manager(): pm.load_setuptools_entrypoints("tljh") return pm + + +def parse_version(version_string): + """Parse version string to tuple + + Finds all numbers and returns a tuple of ints + _very_ loose version parsing, like the old distutils.version.LooseVersion + """ + # return a tuple of all the numbers in the version string + # always succeeds, even if passed nonsense + return tuple(int(part) for part in re.findall(r"\d+", version_string))