Use idle culler from jupyterhub-idle-culler package

The idle culler lives as a script in at least 3 different places: - In the JupyterHub repo, as an 'example' d126baa443/examples/cull-idle - In the TLJH repo, as a core part of the service 01ba34857d/tljh/cull_idle_servers.py. This is an import from a specific version of the JupyterHub repo, and has had a couple of changes made to it since. - In the z2jh repo, as a core part of the service c3f3be25f8/jupyterhub/files/hub/cull_idle_servers.py This is also an import from a specific version of the JupyterHub repo, but has had a lot more work done on it. Most had been sync'd back the JupyterHub repo, but some had not been. See 9c15a42b12/images/hub/cull_idle_servers.py and https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commits/master/jupyterhub/files/hub/cull_idle_servers.py The idle culler is a core integral part of every JupyterHub deployment these days. It would be great if it was maintained separately on its own, without being split across multiple repos. The latest changes had been to the version in the JupyterHub repo, so I copied it (while preserving commit history, because credit is important) to a new repository: https://github.com/yuvipanda/jupyterhub-idle-culler I looked through z2jh and tljh copies, and cherry-picked the following changes manually ae80fb5163 836f19a4c7 a0787c64f1 b230ef8156 20374db7c6 (diff-f00cd100e9f673285208aaa6fc0c3212) There were a few from 9c15a42b12/images/hub/cull_idle_servers.py I could not apply, but mostly because those features had been re-implemented already. Right now, the package is a direct port of the code we had. Once this settles in, I am hopefull we can iterate faster and make cool new changes.
2025-12-18 21:54:05 +08:00 · 2020-04-27 16:39:03 +05:30
parent 8841bf056c
commit 474005cbd1
3 changed files with 2 additions and 343 deletions
--- a/tljh/configurer.py
+++ b/tljh/configurer.py
@@ -218,7 +218,7 @@ def set_cull_idle_service(config):
    Set Idle Culler service
    """
    cull_cmd = [
-       sys.executable, '-m', 'tljh.cull_idle_servers'
+       sys.executable, '-m', 'jupyterhub_idle_culler'
    ]
    cull_config = config['services']['cull']
    print()
--- a/tljh/cull_idle_servers.py
+++ b/tljh/cull_idle_servers.py
@@ -1,342 +0,0 @@
 #!/usr/bin/env python3
 """script to monitor and cull idle single-user servers
 Imported from https://github.com/jupyterhub/jupyterhub/blob/6b1046697/examples/cull-idle/cull_idle_servers.py
 Caveats:
 last_activity is not updated with high frequency,
 so cull timeout should be greater than the sum of:
 - single-user websocket ping interval (default: 30s)
 - JupyterHub.last_activity_interval (default: 5 minutes)
 You can run this as a service managed by JupyterHub with this in your config::
    c.JupyterHub.services = [
        {
            'name': 'cull-idle',
            'admin': True,
            'command': 'python cull_idle_servers.py --timeout=3600'.split(),
        }
    ]
 Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`:
    export JUPYTERHUB_API_TOKEN=`jupyterhub token`
    python cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
 """
 from datetime import datetime, timezone
 from functools import partial
 import json
 import os
 try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
 import dateutil.parser
 from tornado.gen import coroutine, multi
 from tornado.locks import Semaphore
 from tornado.log import app_log
 from tornado.httpclient import AsyncHTTPClient, HTTPRequest
 from tornado.ioloop import IOLoop, PeriodicCallback
 from tornado.options import define, options, parse_command_line
 def parse_date(date_string):
    """Parse a timestamp
    If it doesn't have a timezone, assume utc
    Returned datetime object will always be timezone-aware
    """
    dt = dateutil.parser.parse(date_string)
    if not dt.tzinfo:
        # assume naïve timestamps are UTC
        dt = dt.replace(tzinfo=timezone.utc)
    return dt
 def format_td(td):
    """
    Nicely format a timedelta object
    as HH:MM:SS
    """
    if td is None:
        return "unknown"
    if isinstance(td, str):
        return td
    seconds = int(td.total_seconds())
    h = seconds // 3600
    seconds = seconds % 3600
    m = seconds // 60
    seconds = seconds % 60
    return f"{h:02}:{m:02}:{seconds:02}"
@coroutine
 def cull_idle(url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10):
    """Shutdown idle single-user servers
    If cull_users, inactive *users* will be deleted as well.
    """
    auth_header = {
        'Authorization': 'token %s' % api_token,
    }
    req = HTTPRequest(
        url=url + '/users',
        headers=auth_header,
    )
    now = datetime.now(timezone.utc)
    client = AsyncHTTPClient()
    if concurrency:
        semaphore = Semaphore(concurrency)
        @coroutine
        def fetch(req):
            """client.fetch wrapped in a semaphore to limit concurrency"""
            yield semaphore.acquire()
            try:
                return (yield client.fetch(req))
            finally:
                yield semaphore.release()
    else:
        fetch = client.fetch
    resp = yield fetch(req)
    users = json.loads(resp.body.decode('utf8', 'replace'))
    futures = []
    @coroutine
    def handle_server(user, server_name, server):
        """Handle (maybe) culling a single server
        Returns True if server is now stopped (user removable),
        False otherwise.
        """
        log_name = user['name']
        if server_name:
            log_name = '%s/%s' % (user['name'], server_name)
        if server.get('pending'):
            app_log.warning(
                "Not culling server %s with pending %s",
                log_name, server['pending'])
            return False
        if server.get('started'):
            age = now - parse_date(server['started'])
        else:
            # started may be undefined on jupyterhub < 0.9
            age = None
        # check last activity
        # last_activity can be None in 0.9
        if server['last_activity']:
            inactive = now - parse_date(server['last_activity'])
        else:
            # no activity yet, use start date
            # last_activity may be None with jupyterhub 0.9,
            # which introduces the 'started' field which is never None
            # for running servers
            inactive = age
        should_cull = (inactive is not None and
                       inactive.total_seconds() >= inactive_limit)
        if should_cull:
            app_log.info(
                "Culling server %s (inactive for %s)",
                log_name, format_td(inactive))
        if max_age and not should_cull:
            # only check started if max_age is specified
            # so that we can still be compatible with jupyterhub 0.8
            # which doesn't define the 'started' field
            if age is not None and age.total_seconds() >= max_age:
                app_log.info(
                    "Culling server %s (age: %s, inactive for %s)",
                    log_name, format_td(age), format_td(inactive))
                should_cull = True
        if not should_cull:
            app_log.debug(
                "Not culling server %s (age: %s, inactive for %s)",
                log_name, format_td(age), format_td(inactive))
            return False
        req = HTTPRequest(
            url=url + '/users/%s/server' % quote(user['name']),
            method='DELETE',
            headers=auth_header,
        )
        resp = yield fetch(req)
        if resp.code == 202:
            app_log.warning(
                "Server %s is slow to stop",
                log_name,
            )
            # return False to prevent culling user with pending shutdowns
            return False
        return True
    @coroutine
    def handle_user(user):
        """Handle one user.
        Create a list of their servers, and async exec them.  Wait for
        that to be done, and if all servers are stopped, possibly cull
        the user.
        """
        # shutdown servers first.
        # Hub doesn't allow deleting users with running servers.
        # named servers contain the 'servers' dict
        if 'servers' in user:
            servers = user['servers']
        # Otherwise, server data is intermingled in with the user
        # model
        else:
            servers = {}
            if user['server']:
                servers[''] = {
                    'started': user.get('started'),
                    'last_activity': user['last_activity'],
                    'pending': user['pending'],
                    'url': user['server'],
                }
        server_futures = [
            handle_server(user, server_name, server)
            for server_name, server in servers.items()
        ]
        results = yield multi(server_futures)
        if not cull_users:
            return
        # some servers are still running, cannot cull users
        still_alive = len(results) - sum(results)
        if still_alive:
            app_log.debug(
                "Not culling user %s with %i servers still alive",
                user['name'], still_alive)
            return False
        should_cull = False
        if user.get('created'):
            age = now - parse_date(user['created'])
        else:
            # created may be undefined on jupyterhub < 0.9
            age = None
        # check last activity
        # last_activity can be None in 0.9
        if user['last_activity']:
            inactive = now - parse_date(user['last_activity'])
        else:
            # no activity yet, use start date
            # last_activity may be None with jupyterhub 0.9,
            # which introduces the 'created' field which is never None
            inactive = age
        should_cull = (inactive is not None and
                       inactive.total_seconds() >= inactive_limit)
        if should_cull:
            app_log.info(
                "Culling user %s (inactive for %s)",
                user['name'], inactive)
        if max_age and not should_cull:
            # only check created if max_age is specified
            # so that we can still be compatible with jupyterhub 0.8
            # which doesn't define the 'started' field
            if age is not None and age.total_seconds() >= max_age:
                app_log.info(
                    "Culling user %s (age: %s, inactive for %s)",
                    user['name'], format_td(age), format_td(inactive))
                should_cull = True
        if not should_cull:
            app_log.debug(
                "Not culling user %s (created: %s, last active: %s)",
                user['name'], format_td(age), format_td(inactive))
            return False
        req = HTTPRequest(
            url=url + '/users/%s' % user['name'],
            method='DELETE',
            headers=auth_header,
        )
        yield fetch(req)
        return True
    for user in users:
        futures.append((user['name'], handle_user(user)))
    for (name, f) in futures:
        try:
            result = yield f
        except Exception:
            app_log.exception("Error processing %s", name)
        else:
            if result:
                app_log.debug("Finished culling %s", name)
 if __name__ == '__main__':
    define(
        'url',
        default=os.environ.get('JUPYTERHUB_API_URL'),
        help="The JupyterHub API URL",
    )
    define('timeout', type=int, default=600, help="The idle timeout (in seconds)")
    define('cull_every', type=int, default=0,
           help="The interval (in seconds) for checking for idle servers to cull")
    define('max_age', type=int, default=0,
           help="The maximum age (in seconds) of servers that should be culled even if they are active")
    define('cull_users', type=bool, default=False,
           help="""Cull users in addition to servers.
                This is for use in temporary-user cases such as tmpnb.""",
           )
    define('concurrency', type=int, default=10,
           help="""Limit the number of concurrent requests made to the Hub.
                Deleting a lot of users at the same time can slow down the Hub,
                so limit the number of API requests we have outstanding at any given time.
                """
           )
    parse_command_line()
    if not options.cull_every:
        options.cull_every = options.timeout // 2
    api_token = os.environ['JUPYTERHUB_API_TOKEN']
    try:
        AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
    except ImportError as e:
        app_log.warning(
            "Could not load pycurl: %s\n"
            "pycurl is recommended if you have a large number of users.",
            e)
    loop = IOLoop.current()
    cull = partial(
        cull_idle,
        url=options.url,
        api_token=api_token,
        inactive_limit=options.timeout,
        cull_users=options.cull_users,
        max_age=options.max_age,
        concurrency=options.concurrency,
    )
    # schedule first cull immediately
    # because PeriodicCallback doesn't start until the end of the first interval
    loop.add_callback(cull)
    # schedule periodic cull
    pc = PeriodicCallback(cull, 1e3 * options.cull_every)
    pc.start()
    try:
        loop.start()
    except KeyboardInterrupt:
        pass
--- a/tljh/installer.py
+++ b/tljh/installer.py
@@ -225,6 +225,7 @@ def ensure_jupyterhub_package(prefix):
        'jupyterhub-ldapauthenticator==1.3.0',
        'jupyterhub-tmpauthenticator==0.6',
        'oauthenticator==0.10.0',
        'git+https://github.com/yuvipanda/jupyterhub-idle-culler@4e710c0f45d57a7435d9ae79055a4ce499052a1c'
    ])
    traefik.ensure_traefik_binary(prefix)