Merge pull request #559 from yuvipanda/feat/upstream-culler

Use idle culler from jupyterhub-idle-culler package
2025-12-18 21:54:05 +08:00 · 2020-05-05 00:16:49 +02:00
parent 8d2a1c80d7 ed9430e6b9
commit a97adcc7f4
5 changed files with 6 additions and 347 deletions
--- a/.circleci/integration-test.py
+++ b/.circleci/integration-test.py
@@ -32,7 +32,7 @@ def run_systemd_image(image_name, container_name, bootstrap_pip_spec):
        # This is the minimum VM size we support. JupyterLab extensions seem
        # to need at least this much RAM to build. Boo?
        # If we change this, need to change all other references to this number.
-        '--memory', '1G',
+        '--memory', '1.0G',
    ]
    if bootstrap_pip_spec:
--- a/tests/test_configurer.py
+++ b/tests/test_configurer.py
@@ -213,7 +213,7 @@ def test_cull_service_default():
    c = apply_mock_config({})
    cull_cmd = [
-       sys.executable, '-m', 'tljh.cull_idle_servers',
+       sys.executable, '-m', 'jupyterhub_idle_culler',
       '--timeout=600', '--cull-every=60', '--concurrency=5',
       '--max-age=0'
    ]
@@ -238,7 +238,7 @@ def test_set_cull_service():
        }
    })
    cull_cmd = [
-       sys.executable, '-m', 'tljh.cull_idle_servers',
+       sys.executable, '-m', 'jupyterhub_idle_culler',
       '--timeout=600', '--cull-every=10', '--concurrency=5',
       '--max-age=60', '--cull-users'
    ]
--- a/tljh/configurer.py
+++ b/tljh/configurer.py
@@ -218,7 +218,7 @@ def set_cull_idle_service(config):
    Set Idle Culler service
    """
    cull_cmd = [
-       sys.executable, '-m', 'tljh.cull_idle_servers'
+       sys.executable, '-m', 'jupyterhub_idle_culler'
    ]
    cull_config = config['services']['cull']
    print()
--- a/tljh/cull_idle_servers.py
+++ b/tljh/cull_idle_servers.py
@@ -1,342 +0,0 @@
 #!/usr/bin/env python3
 """script to monitor and cull idle single-user servers
 Imported from https://github.com/jupyterhub/jupyterhub/blob/6b1046697/examples/cull-idle/cull_idle_servers.py
 Caveats:
 last_activity is not updated with high frequency,
 so cull timeout should be greater than the sum of:
 - single-user websocket ping interval (default: 30s)
 - JupyterHub.last_activity_interval (default: 5 minutes)
 You can run this as a service managed by JupyterHub with this in your config::
    c.JupyterHub.services = [
        {
            'name': 'cull-idle',
            'admin': True,
            'command': 'python cull_idle_servers.py --timeout=3600'.split(),
        }
    ]
 Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`:
    export JUPYTERHUB_API_TOKEN=`jupyterhub token`
    python cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
 """
 from datetime import datetime, timezone
 from functools import partial
 import json
 import os
 try:
    from urllib.parse import quote
 except ImportError:
    from urllib import quote
 import dateutil.parser
 from tornado.gen import coroutine, multi
 from tornado.locks import Semaphore
 from tornado.log import app_log
 from tornado.httpclient import AsyncHTTPClient, HTTPRequest
 from tornado.ioloop import IOLoop, PeriodicCallback
 from tornado.options import define, options, parse_command_line
 def parse_date(date_string):
    """Parse a timestamp
    If it doesn't have a timezone, assume utc
    Returned datetime object will always be timezone-aware
    """
    dt = dateutil.parser.parse(date_string)
    if not dt.tzinfo:
        # assume naïve timestamps are UTC
        dt = dt.replace(tzinfo=timezone.utc)
    return dt
 def format_td(td):
    """
    Nicely format a timedelta object
    as HH:MM:SS
    """
    if td is None:
        return "unknown"
    if isinstance(td, str):
        return td
    seconds = int(td.total_seconds())
    h = seconds // 3600
    seconds = seconds % 3600
    m = seconds // 60
    seconds = seconds % 60
    return f"{h:02}:{m:02}:{seconds:02}"
@coroutine
 def cull_idle(url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10):
    """Shutdown idle single-user servers
    If cull_users, inactive *users* will be deleted as well.
    """
    auth_header = {
        'Authorization': 'token %s' % api_token,
    }
    req = HTTPRequest(
        url=url + '/users',
        headers=auth_header,
    )
    now = datetime.now(timezone.utc)
    client = AsyncHTTPClient()
    if concurrency:
        semaphore = Semaphore(concurrency)
        @coroutine
        def fetch(req):
            """client.fetch wrapped in a semaphore to limit concurrency"""
            yield semaphore.acquire()
            try:
                return (yield client.fetch(req))
            finally:
                yield semaphore.release()
    else:
        fetch = client.fetch
    resp = yield fetch(req)
    users = json.loads(resp.body.decode('utf8', 'replace'))
    futures = []
    @coroutine
    def handle_server(user, server_name, server):
        """Handle (maybe) culling a single server
        Returns True if server is now stopped (user removable),
        False otherwise.
        """
        log_name = user['name']
        if server_name:
            log_name = '%s/%s' % (user['name'], server_name)
        if server.get('pending'):
            app_log.warning(
                "Not culling server %s with pending %s",
                log_name, server['pending'])
            return False
        if server.get('started'):
            age = now - parse_date(server['started'])
        else:
            # started may be undefined on jupyterhub < 0.9
            age = None
        # check last activity
        # last_activity can be None in 0.9
        if server['last_activity']:
            inactive = now - parse_date(server['last_activity'])
        else:
            # no activity yet, use start date
            # last_activity may be None with jupyterhub 0.9,
            # which introduces the 'started' field which is never None
            # for running servers
            inactive = age
        should_cull = (inactive is not None and
                       inactive.total_seconds() >= inactive_limit)
        if should_cull:
            app_log.info(
                "Culling server %s (inactive for %s)",
                log_name, format_td(inactive))
        if max_age and not should_cull:
            # only check started if max_age is specified
            # so that we can still be compatible with jupyterhub 0.8
            # which doesn't define the 'started' field
            if age is not None and age.total_seconds() >= max_age:
                app_log.info(
                    "Culling server %s (age: %s, inactive for %s)",
                    log_name, format_td(age), format_td(inactive))
                should_cull = True
        if not should_cull:
            app_log.debug(
                "Not culling server %s (age: %s, inactive for %s)",
                log_name, format_td(age), format_td(inactive))
            return False
        req = HTTPRequest(
            url=url + '/users/%s/server' % quote(user['name']),
            method='DELETE',
            headers=auth_header,
        )
        resp = yield fetch(req)
        if resp.code == 202:
            app_log.warning(
                "Server %s is slow to stop",
                log_name,
            )
            # return False to prevent culling user with pending shutdowns
            return False
        return True
    @coroutine
    def handle_user(user):
        """Handle one user.
        Create a list of their servers, and async exec them.  Wait for
        that to be done, and if all servers are stopped, possibly cull
        the user.
        """
        # shutdown servers first.
        # Hub doesn't allow deleting users with running servers.
        # named servers contain the 'servers' dict
        if 'servers' in user:
            servers = user['servers']
        # Otherwise, server data is intermingled in with the user
        # model
        else:
            servers = {}
            if user['server']:
                servers[''] = {
                    'started': user.get('started'),
                    'last_activity': user['last_activity'],
                    'pending': user['pending'],
                    'url': user['server'],
                }
        server_futures = [
            handle_server(user, server_name, server)
            for server_name, server in servers.items()
        ]
        results = yield multi(server_futures)
        if not cull_users:
            return
        # some servers are still running, cannot cull users
        still_alive = len(results) - sum(results)
        if still_alive:
            app_log.debug(
                "Not culling user %s with %i servers still alive",
                user['name'], still_alive)
            return False
        should_cull = False
        if user.get('created'):
            age = now - parse_date(user['created'])
        else:
            # created may be undefined on jupyterhub < 0.9
            age = None
        # check last activity
        # last_activity can be None in 0.9
        if user['last_activity']:
            inactive = now - parse_date(user['last_activity'])
        else:
            # no activity yet, use start date
            # last_activity may be None with jupyterhub 0.9,
            # which introduces the 'created' field which is never None
            inactive = age
        should_cull = (inactive is not None and
                       inactive.total_seconds() >= inactive_limit)
        if should_cull:
            app_log.info(
                "Culling user %s (inactive for %s)",
                user['name'], inactive)
        if max_age and not should_cull:
            # only check created if max_age is specified
            # so that we can still be compatible with jupyterhub 0.8
            # which doesn't define the 'started' field
            if age is not None and age.total_seconds() >= max_age:
                app_log.info(
                    "Culling user %s (age: %s, inactive for %s)",
                    user['name'], format_td(age), format_td(inactive))
                should_cull = True
        if not should_cull:
            app_log.debug(
                "Not culling user %s (created: %s, last active: %s)",
                user['name'], format_td(age), format_td(inactive))
            return False
        req = HTTPRequest(
            url=url + '/users/%s' % user['name'],
            method='DELETE',
            headers=auth_header,
        )
        yield fetch(req)
        return True
    for user in users:
        futures.append((user['name'], handle_user(user)))
    for (name, f) in futures:
        try:
            result = yield f
        except Exception:
            app_log.exception("Error processing %s", name)
        else:
            if result:
                app_log.debug("Finished culling %s", name)
 if __name__ == '__main__':
    define(
        'url',
        default=os.environ.get('JUPYTERHUB_API_URL'),
        help="The JupyterHub API URL",
    )
    define('timeout', type=int, default=600, help="The idle timeout (in seconds)")
    define('cull_every', type=int, default=0,
           help="The interval (in seconds) for checking for idle servers to cull")
    define('max_age', type=int, default=0,
           help="The maximum age (in seconds) of servers that should be culled even if they are active")
    define('cull_users', type=bool, default=False,
           help="""Cull users in addition to servers.
                This is for use in temporary-user cases such as tmpnb.""",
           )
    define('concurrency', type=int, default=10,
           help="""Limit the number of concurrent requests made to the Hub.
                Deleting a lot of users at the same time can slow down the Hub,
                so limit the number of API requests we have outstanding at any given time.
                """
           )
    parse_command_line()
    if not options.cull_every:
        options.cull_every = options.timeout // 2
    api_token = os.environ['JUPYTERHUB_API_TOKEN']
    try:
        AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
    except ImportError as e:
        app_log.warning(
            "Could not load pycurl: %s\n"
            "pycurl is recommended if you have a large number of users.",
            e)
    loop = IOLoop.current()
    cull = partial(
        cull_idle,
        url=options.url,
        api_token=api_token,
        inactive_limit=options.timeout,
        cull_users=options.cull_users,
        max_age=options.max_age,
        concurrency=options.concurrency,
    )
    # schedule first cull immediately
    # because PeriodicCallback doesn't start until the end of the first interval
    loop.add_callback(cull)
    # schedule periodic cull
    pc = PeriodicCallback(cull, 1e3 * options.cull_every)
    pc.start()
    try:
        loop.start()
    except KeyboardInterrupt:
        pass
--- a/tljh/installer.py
+++ b/tljh/installer.py
@@ -225,6 +225,7 @@ def ensure_jupyterhub_package(prefix):
        'jupyterhub-ldapauthenticator==1.3.0',
        'jupyterhub-tmpauthenticator==0.6',
        'oauthenticator==0.10.0',
        'jupyterhub-idle-culler==1.0'
    ])
    traefik.ensure_traefik_binary(prefix)