Use idle culler from jupyterhub-idle-culler package

The idle culler lives as a script in at least 3 different
places:

- In the JupyterHub repo, as an 'example'
  d126baa443/examples/cull-idle
- In the TLJH repo, as a core part of the service
  01ba34857d/tljh/cull_idle_servers.py.
  This is an import from a specific version of the JupyterHub repo,
  and has had a couple of changes made to it since.
- In the z2jh repo, as a core part of the service
  c3f3be25f8/jupyterhub/files/hub/cull_idle_servers.py
  This is also an import from a specific version of the JupyterHub
  repo, but has had a lot more work done on it. Most had been sync'd
  back the JupyterHub repo, but some had not been. See
  9c15a42b12/images/hub/cull_idle_servers.py
  and https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commits/master/jupyterhub/files/hub/cull_idle_servers.py

The idle culler is a core integral part of every JupyterHub deployment
these days. It would be great if it was maintained separately on
its own, without being split across multiple repos.

The latest changes had been to the version in the JupyterHub repo, so I
copied it (while preserving commit history, because credit is important)
to a new repository: https://github.com/yuvipanda/jupyterhub-idle-culler

I looked through z2jh and tljh copies, and cherry-picked the following
changes manually

ae80fb5163
836f19a4c7
a0787c64f1
b230ef8156
20374db7c6 (diff-f00cd100e9f673285208aaa6fc0c3212)

There were a few from 9c15a42b12/images/hub/cull_idle_servers.py
I could not apply, but mostly because those features had been
re-implemented already.

Right now, the package is a direct port of the code we had. Once
this settles in, I am hopefull we can iterate faster and make cool
new changes.
This commit is contained in:
YuviPanda
2020-04-27 16:39:03 +05:30
parent 8841bf056c
commit 474005cbd1
3 changed files with 2 additions and 343 deletions

View File

@@ -218,7 +218,7 @@ def set_cull_idle_service(config):
Set Idle Culler service
"""
cull_cmd = [
sys.executable, '-m', 'tljh.cull_idle_servers'
sys.executable, '-m', 'jupyterhub_idle_culler'
]
cull_config = config['services']['cull']
print()

View File

@@ -1,342 +0,0 @@
#!/usr/bin/env python3
"""script to monitor and cull idle single-user servers
Imported from https://github.com/jupyterhub/jupyterhub/blob/6b1046697/examples/cull-idle/cull_idle_servers.py
Caveats:
last_activity is not updated with high frequency,
so cull timeout should be greater than the sum of:
- single-user websocket ping interval (default: 30s)
- JupyterHub.last_activity_interval (default: 5 minutes)
You can run this as a service managed by JupyterHub with this in your config::
c.JupyterHub.services = [
{
'name': 'cull-idle',
'admin': True,
'command': 'python cull_idle_servers.py --timeout=3600'.split(),
}
]
Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`:
export JUPYTERHUB_API_TOKEN=`jupyterhub token`
python cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
"""
from datetime import datetime, timezone
from functools import partial
import json
import os
try:
from urllib.parse import quote
except ImportError:
from urllib import quote
import dateutil.parser
from tornado.gen import coroutine, multi
from tornado.locks import Semaphore
from tornado.log import app_log
from tornado.httpclient import AsyncHTTPClient, HTTPRequest
from tornado.ioloop import IOLoop, PeriodicCallback
from tornado.options import define, options, parse_command_line
def parse_date(date_string):
"""Parse a timestamp
If it doesn't have a timezone, assume utc
Returned datetime object will always be timezone-aware
"""
dt = dateutil.parser.parse(date_string)
if not dt.tzinfo:
# assume naïve timestamps are UTC
dt = dt.replace(tzinfo=timezone.utc)
return dt
def format_td(td):
"""
Nicely format a timedelta object
as HH:MM:SS
"""
if td is None:
return "unknown"
if isinstance(td, str):
return td
seconds = int(td.total_seconds())
h = seconds // 3600
seconds = seconds % 3600
m = seconds // 60
seconds = seconds % 60
return f"{h:02}:{m:02}:{seconds:02}"
@coroutine
def cull_idle(url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10):
"""Shutdown idle single-user servers
If cull_users, inactive *users* will be deleted as well.
"""
auth_header = {
'Authorization': 'token %s' % api_token,
}
req = HTTPRequest(
url=url + '/users',
headers=auth_header,
)
now = datetime.now(timezone.utc)
client = AsyncHTTPClient()
if concurrency:
semaphore = Semaphore(concurrency)
@coroutine
def fetch(req):
"""client.fetch wrapped in a semaphore to limit concurrency"""
yield semaphore.acquire()
try:
return (yield client.fetch(req))
finally:
yield semaphore.release()
else:
fetch = client.fetch
resp = yield fetch(req)
users = json.loads(resp.body.decode('utf8', 'replace'))
futures = []
@coroutine
def handle_server(user, server_name, server):
"""Handle (maybe) culling a single server
Returns True if server is now stopped (user removable),
False otherwise.
"""
log_name = user['name']
if server_name:
log_name = '%s/%s' % (user['name'], server_name)
if server.get('pending'):
app_log.warning(
"Not culling server %s with pending %s",
log_name, server['pending'])
return False
if server.get('started'):
age = now - parse_date(server['started'])
else:
# started may be undefined on jupyterhub < 0.9
age = None
# check last activity
# last_activity can be None in 0.9
if server['last_activity']:
inactive = now - parse_date(server['last_activity'])
else:
# no activity yet, use start date
# last_activity may be None with jupyterhub 0.9,
# which introduces the 'started' field which is never None
# for running servers
inactive = age
should_cull = (inactive is not None and
inactive.total_seconds() >= inactive_limit)
if should_cull:
app_log.info(
"Culling server %s (inactive for %s)",
log_name, format_td(inactive))
if max_age and not should_cull:
# only check started if max_age is specified
# so that we can still be compatible with jupyterhub 0.8
# which doesn't define the 'started' field
if age is not None and age.total_seconds() >= max_age:
app_log.info(
"Culling server %s (age: %s, inactive for %s)",
log_name, format_td(age), format_td(inactive))
should_cull = True
if not should_cull:
app_log.debug(
"Not culling server %s (age: %s, inactive for %s)",
log_name, format_td(age), format_td(inactive))
return False
req = HTTPRequest(
url=url + '/users/%s/server' % quote(user['name']),
method='DELETE',
headers=auth_header,
)
resp = yield fetch(req)
if resp.code == 202:
app_log.warning(
"Server %s is slow to stop",
log_name,
)
# return False to prevent culling user with pending shutdowns
return False
return True
@coroutine
def handle_user(user):
"""Handle one user.
Create a list of their servers, and async exec them. Wait for
that to be done, and if all servers are stopped, possibly cull
the user.
"""
# shutdown servers first.
# Hub doesn't allow deleting users with running servers.
# named servers contain the 'servers' dict
if 'servers' in user:
servers = user['servers']
# Otherwise, server data is intermingled in with the user
# model
else:
servers = {}
if user['server']:
servers[''] = {
'started': user.get('started'),
'last_activity': user['last_activity'],
'pending': user['pending'],
'url': user['server'],
}
server_futures = [
handle_server(user, server_name, server)
for server_name, server in servers.items()
]
results = yield multi(server_futures)
if not cull_users:
return
# some servers are still running, cannot cull users
still_alive = len(results) - sum(results)
if still_alive:
app_log.debug(
"Not culling user %s with %i servers still alive",
user['name'], still_alive)
return False
should_cull = False
if user.get('created'):
age = now - parse_date(user['created'])
else:
# created may be undefined on jupyterhub < 0.9
age = None
# check last activity
# last_activity can be None in 0.9
if user['last_activity']:
inactive = now - parse_date(user['last_activity'])
else:
# no activity yet, use start date
# last_activity may be None with jupyterhub 0.9,
# which introduces the 'created' field which is never None
inactive = age
should_cull = (inactive is not None and
inactive.total_seconds() >= inactive_limit)
if should_cull:
app_log.info(
"Culling user %s (inactive for %s)",
user['name'], inactive)
if max_age and not should_cull:
# only check created if max_age is specified
# so that we can still be compatible with jupyterhub 0.8
# which doesn't define the 'started' field
if age is not None and age.total_seconds() >= max_age:
app_log.info(
"Culling user %s (age: %s, inactive for %s)",
user['name'], format_td(age), format_td(inactive))
should_cull = True
if not should_cull:
app_log.debug(
"Not culling user %s (created: %s, last active: %s)",
user['name'], format_td(age), format_td(inactive))
return False
req = HTTPRequest(
url=url + '/users/%s' % user['name'],
method='DELETE',
headers=auth_header,
)
yield fetch(req)
return True
for user in users:
futures.append((user['name'], handle_user(user)))
for (name, f) in futures:
try:
result = yield f
except Exception:
app_log.exception("Error processing %s", name)
else:
if result:
app_log.debug("Finished culling %s", name)
if __name__ == '__main__':
define(
'url',
default=os.environ.get('JUPYTERHUB_API_URL'),
help="The JupyterHub API URL",
)
define('timeout', type=int, default=600, help="The idle timeout (in seconds)")
define('cull_every', type=int, default=0,
help="The interval (in seconds) for checking for idle servers to cull")
define('max_age', type=int, default=0,
help="The maximum age (in seconds) of servers that should be culled even if they are active")
define('cull_users', type=bool, default=False,
help="""Cull users in addition to servers.
This is for use in temporary-user cases such as tmpnb.""",
)
define('concurrency', type=int, default=10,
help="""Limit the number of concurrent requests made to the Hub.
Deleting a lot of users at the same time can slow down the Hub,
so limit the number of API requests we have outstanding at any given time.
"""
)
parse_command_line()
if not options.cull_every:
options.cull_every = options.timeout // 2
api_token = os.environ['JUPYTERHUB_API_TOKEN']
try:
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
except ImportError as e:
app_log.warning(
"Could not load pycurl: %s\n"
"pycurl is recommended if you have a large number of users.",
e)
loop = IOLoop.current()
cull = partial(
cull_idle,
url=options.url,
api_token=api_token,
inactive_limit=options.timeout,
cull_users=options.cull_users,
max_age=options.max_age,
concurrency=options.concurrency,
)
# schedule first cull immediately
# because PeriodicCallback doesn't start until the end of the first interval
loop.add_callback(cull)
# schedule periodic cull
pc = PeriodicCallback(cull, 1e3 * options.cull_every)
pc.start()
try:
loop.start()
except KeyboardInterrupt:
pass

View File

@@ -225,6 +225,7 @@ def ensure_jupyterhub_package(prefix):
'jupyterhub-ldapauthenticator==1.3.0',
'jupyterhub-tmpauthenticator==0.6',
'oauthenticator==0.10.0',
'git+https://github.com/yuvipanda/jupyterhub-idle-culler@4e710c0f45d57a7435d9ae79055a4ce499052a1c'
])
traefik.ensure_traefik_binary(prefix)