mirror of
https://github.com/jupyterhub/the-littlest-jupyterhub.git
synced 2025-12-18 21:54:05 +08:00
Merge pull request #366 from GeorgianaElena/addIdleCuller
Add idle culler
This commit is contained in:
@@ -124,6 +124,7 @@ Topic guides provide in-depth explanations of specific topics.
|
|||||||
topic/tljh-config
|
topic/tljh-config
|
||||||
topic/authenticator-configuration
|
topic/authenticator-configuration
|
||||||
topic/escape-hatch
|
topic/escape-hatch
|
||||||
|
topic/idle-culler
|
||||||
|
|
||||||
|
|
||||||
Troubleshooting
|
Troubleshooting
|
||||||
|
|||||||
114
docs/topic/idle-culler.rst
Normal file
114
docs/topic/idle-culler.rst
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
.. _topic/idle-culler:
|
||||||
|
|
||||||
|
=============================
|
||||||
|
Culling idle notebook servers
|
||||||
|
=============================
|
||||||
|
|
||||||
|
The idle culler automatically shuts down user notebook servers when they have
|
||||||
|
not been used for a certain time period, in order to reduce the total resource
|
||||||
|
usage on your JupyterHub.
|
||||||
|
|
||||||
|
|
||||||
|
JupyterHub pings the user's notebook server at certain time intervals. If no response
|
||||||
|
is received from the server during this checks and the timeout expires, the server is
|
||||||
|
considered to be *inactive (idle)* and will be culled.
|
||||||
|
|
||||||
|
|
||||||
|
Default settings
|
||||||
|
================
|
||||||
|
|
||||||
|
By default, JupyterHub will ping the user notebook servers every 60s to check their
|
||||||
|
status. Every server found to be idle for more than 10 minutes will be culled.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
services.cull.every = 60
|
||||||
|
services.cull.timeout = 600
|
||||||
|
|
||||||
|
Because the servers don't have a maximum age set, an active server will not be shut down
|
||||||
|
regardless of how long it has been up and running.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
services.cull.max_age = 0
|
||||||
|
|
||||||
|
If after the culling process, there are users with no active notebook servers, by default,
|
||||||
|
the users will not be culled alongside their notebooks and will continue to exist.
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
services.cull.users = False
|
||||||
|
|
||||||
|
|
||||||
|
Configuring the idle culler
|
||||||
|
===========================
|
||||||
|
|
||||||
|
The available configuration options are:
|
||||||
|
|
||||||
|
Idle timeout
|
||||||
|
------------
|
||||||
|
The idle timeout is the maximum time (in seconds) a server can be inactive before it
|
||||||
|
will be culled. The timeout can be configured using:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
sudo tljh-config set services.cull.timeout <max-idle-sec-before-server-is-culled>
|
||||||
|
sudo tljh-config reload
|
||||||
|
|
||||||
|
Idle check interval
|
||||||
|
-------------------
|
||||||
|
The idle check interval represents how frequent (in seconds) the Hub will
|
||||||
|
check if there are any idle servers to cull. It can be configured using:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
sudo tljh-config set services.cull.every <number-of-sec-this-check-is-done>
|
||||||
|
sudo tljh-config reload
|
||||||
|
|
||||||
|
Maximum age
|
||||||
|
-----------
|
||||||
|
The maximum age sets the time (in seconds) a server should be running.
|
||||||
|
The servers that exceed the maximum age, will be culled even if they are active.
|
||||||
|
A maximum age of 0, will deactivate this option.
|
||||||
|
The maximum age can be configured using:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
sudo tljh-config set services.cull.max_age <server-max-age>
|
||||||
|
sudo tljh-config reload
|
||||||
|
|
||||||
|
User culling
|
||||||
|
------------
|
||||||
|
In addition to servers, it is also possible to cull the users. This is usually
|
||||||
|
suited for temporary-user cases such as *tmpnb*.
|
||||||
|
User culling can be activated using the following command:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
sudo tljh-config set services.cull.users True
|
||||||
|
sudo tljh-config reload
|
||||||
|
|
||||||
|
Concurrency
|
||||||
|
-----------
|
||||||
|
Deleting a lot of users at the same time can slow down the Hub.
|
||||||
|
The number of concurrent requests made to the Hub can be configured using:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
sudo tljh-config set services.cull.concurrency <number-of-concurrent-hub-requests>
|
||||||
|
sudo tljh-config reload
|
||||||
|
|
||||||
|
Because TLJH it's used for a small number of users, the cases that may require to
|
||||||
|
modify the concurrency limit should be rare.
|
||||||
|
|
||||||
|
|
||||||
|
Disabling the idle culler
|
||||||
|
=========================
|
||||||
|
|
||||||
|
The idle culling service is enabled by default. To disable it, use the following
|
||||||
|
command:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
sudo tljh-config set services.cull.enabled False
|
||||||
|
sudo tljh-config reload
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
import requests
|
import requests
|
||||||
from hubtraf.user import User
|
from hubtraf.user import User
|
||||||
from hubtraf.auth.dummy import login_dummy
|
from hubtraf.auth.dummy import login_dummy
|
||||||
|
from jupyterhub.utils import exponential_backoff
|
||||||
import secrets
|
import secrets
|
||||||
import pytest
|
import pytest
|
||||||
from functools import partial
|
from functools import partial
|
||||||
@@ -137,4 +138,99 @@ async def test_long_username():
|
|||||||
'-u', 'jupyterhub',
|
'-u', 'jupyterhub',
|
||||||
'--no-pager'
|
'--no-pager'
|
||||||
])
|
])
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_idle_server_culled():
|
||||||
|
"""
|
||||||
|
User logs in, starts a server & stays idle for 1 min.
|
||||||
|
(the user's server should be culled during this period)
|
||||||
|
"""
|
||||||
|
# This *must* be localhost, not an IP
|
||||||
|
# aiohttp throws away cookies if we are connecting to an IP!
|
||||||
|
hub_url = 'http://localhost'
|
||||||
|
username = secrets.token_hex(8)
|
||||||
|
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'set', 'auth.type', 'dummyauthenticator.DummyAuthenticator')).wait()
|
||||||
|
# Check every 10s for idle servers to cull
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'set', 'services.cull.every', "10")).wait()
|
||||||
|
# Apart from servers, also cull users
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'set', 'services.cull.users', "True")).wait()
|
||||||
|
# Cull servers and users after 60s of activity
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'set', 'services.cull.max_age', "60")).wait()
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'reload')).wait()
|
||||||
|
|
||||||
|
async with User(username, hub_url, partial(login_dummy, password='')) as u:
|
||||||
|
await u.login()
|
||||||
|
# Start user's server
|
||||||
|
await u.ensure_server()
|
||||||
|
# Assert that the user exists
|
||||||
|
assert pwd.getpwnam(f'jupyter-{username}') is not None
|
||||||
|
|
||||||
|
# Check that we can get to the user's server
|
||||||
|
r = await u.session.get(u.hub_url / 'hub/api/users' / username,
|
||||||
|
headers={'Referer': str(u.hub_url / 'hub/')})
|
||||||
|
assert r.status == 200
|
||||||
|
|
||||||
|
async def _check_culling_done():
|
||||||
|
# Check that after 60s, the user and server have been culled and are not reacheable anymore
|
||||||
|
r = await u.session.get(u.hub_url / 'hub/api/users' / username,
|
||||||
|
headers={'Referer': str(u.hub_url / 'hub/')})
|
||||||
|
print(r.status)
|
||||||
|
return r.status == 403
|
||||||
|
|
||||||
|
await exponential_backoff(
|
||||||
|
_check_culling_done,
|
||||||
|
"Server culling failed!",
|
||||||
|
timeout=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_active_server_not_culled():
|
||||||
|
"""
|
||||||
|
User logs in, starts a server & stays idle for 30s
|
||||||
|
(the user's server should not be culled during this period).
|
||||||
|
"""
|
||||||
|
# This *must* be localhost, not an IP
|
||||||
|
# aiohttp throws away cookies if we are connecting to an IP!
|
||||||
|
hub_url = 'http://localhost'
|
||||||
|
username = secrets.token_hex(8)
|
||||||
|
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'set', 'auth.type', 'dummyauthenticator.DummyAuthenticator')).wait()
|
||||||
|
# Check every 10s for idle servers to cull
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'set', 'services.cull.every', "10")).wait()
|
||||||
|
# Apart from servers, also cull users
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'set', 'services.cull.users', "True")).wait()
|
||||||
|
# Cull servers and users after 60s of activity
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'set', 'services.cull.max_age', "60")).wait()
|
||||||
|
assert 0 == await (await asyncio.create_subprocess_exec(*TLJH_CONFIG_PATH, 'reload')).wait()
|
||||||
|
|
||||||
|
async with User(username, hub_url, partial(login_dummy, password='')) as u:
|
||||||
|
await u.login()
|
||||||
|
# Start user's server
|
||||||
|
await u.ensure_server()
|
||||||
|
# Assert that the user exists
|
||||||
|
assert pwd.getpwnam(f'jupyter-{username}') is not None
|
||||||
|
|
||||||
|
# Check that we can get to the user's server
|
||||||
|
r = await u.session.get(u.hub_url / 'hub/api/users' / username,
|
||||||
|
headers={'Referer': str(u.hub_url / 'hub/')})
|
||||||
|
assert r.status == 200
|
||||||
|
|
||||||
|
async def _check_culling_done():
|
||||||
|
# Check that after 30s, we can still reach the user's server
|
||||||
|
r = await u.session.get(u.hub_url / 'hub/api/users' / username,
|
||||||
|
headers={'Referer': str(u.hub_url / 'hub/')})
|
||||||
|
print(r.status)
|
||||||
|
return r.status != 200
|
||||||
|
|
||||||
|
try:
|
||||||
|
await exponential_backoff(
|
||||||
|
_check_culling_done,
|
||||||
|
"User's server is still reacheable!",
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
except TimeoutError:
|
||||||
|
# During the 30s timeout the user's server wasn't culled, which is what we intended.
|
||||||
|
pass
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ Test configurer
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
from tljh import configurer
|
from tljh import configurer
|
||||||
|
|
||||||
@@ -187,6 +188,49 @@ def test_set_traefik_api():
|
|||||||
assert c.TraefikTomlProxy.traefik_api_password == '1234'
|
assert c.TraefikTomlProxy.traefik_api_password == '1234'
|
||||||
|
|
||||||
|
|
||||||
|
def test_cull_service_default():
|
||||||
|
"""
|
||||||
|
Test default cull service settings with no overrides
|
||||||
|
"""
|
||||||
|
c = apply_mock_config({})
|
||||||
|
|
||||||
|
cull_cmd = [
|
||||||
|
sys.executable, '/srv/src/tljh/cull_idle_servers.py',
|
||||||
|
'--timeout=600', '--cull-every=60', '--concurrency=5',
|
||||||
|
'--max-age=0'
|
||||||
|
]
|
||||||
|
assert c.JupyterHub.services == [{
|
||||||
|
'name': 'cull-idle',
|
||||||
|
'admin': True,
|
||||||
|
'command': cull_cmd,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_set_cull_service():
|
||||||
|
"""
|
||||||
|
Test setting cull service options
|
||||||
|
"""
|
||||||
|
c = apply_mock_config({
|
||||||
|
'services': {
|
||||||
|
'cull': {
|
||||||
|
'every': 10,
|
||||||
|
'users': True,
|
||||||
|
'max_age': 60
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
cull_cmd = [
|
||||||
|
sys.executable, '/srv/src/tljh/cull_idle_servers.py',
|
||||||
|
'--timeout=600', '--cull-every=10', '--concurrency=5',
|
||||||
|
'--max-age=60', '--cull-users'
|
||||||
|
]
|
||||||
|
assert c.JupyterHub.services == [{
|
||||||
|
'name': 'cull-idle',
|
||||||
|
'admin': True,
|
||||||
|
'command': cull_cmd,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
def test_load_secrets(tljh_dir):
|
def test_load_secrets(tljh_dir):
|
||||||
"""
|
"""
|
||||||
Test loading secret files
|
Test loading secret files
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ FIXME: A strong feeling that JSON Schema should be involved somehow.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
from .config import CONFIG_FILE, STATE_DIR
|
from .config import CONFIG_FILE, STATE_DIR
|
||||||
from .yaml import yaml
|
from .yaml import yaml
|
||||||
@@ -55,6 +56,16 @@ default = {
|
|||||||
'user_environment': {
|
'user_environment': {
|
||||||
'default_app': 'classic',
|
'default_app': 'classic',
|
||||||
},
|
},
|
||||||
|
'services': {
|
||||||
|
'cull': {
|
||||||
|
'enabled': True,
|
||||||
|
'timeout': 600,
|
||||||
|
'every': 60,
|
||||||
|
'concurrency': 5,
|
||||||
|
'users': False,
|
||||||
|
'max_age': 0
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def load_config(config_file=CONFIG_FILE):
|
def load_config(config_file=CONFIG_FILE):
|
||||||
@@ -86,6 +97,7 @@ def apply_config(config_overrides, c):
|
|||||||
update_user_environment(c, tljh_config)
|
update_user_environment(c, tljh_config)
|
||||||
update_user_account_config(c, tljh_config)
|
update_user_account_config(c, tljh_config)
|
||||||
update_traefik_api(c, tljh_config)
|
update_traefik_api(c, tljh_config)
|
||||||
|
update_services(c, tljh_config)
|
||||||
|
|
||||||
|
|
||||||
def set_if_not_none(parent, key, value):
|
def set_if_not_none(parent, key, value):
|
||||||
@@ -191,6 +203,38 @@ def update_traefik_api(c, config):
|
|||||||
c.TraefikTomlProxy.traefik_api_password = config['traefik_api']['password']
|
c.TraefikTomlProxy.traefik_api_password = config['traefik_api']['password']
|
||||||
|
|
||||||
|
|
||||||
|
def set_cull_idle_service(config):
|
||||||
|
"""
|
||||||
|
Set Idle Culler service
|
||||||
|
"""
|
||||||
|
cull_cmd = [
|
||||||
|
sys.executable, '/srv/src/tljh/cull_idle_servers.py'
|
||||||
|
]
|
||||||
|
cull_config = config['services']['cull']
|
||||||
|
print()
|
||||||
|
|
||||||
|
cull_cmd += ['--timeout=%d' % cull_config['timeout']]
|
||||||
|
cull_cmd += ['--cull-every=%d' % cull_config['every']]
|
||||||
|
cull_cmd += ['--concurrency=%d' % cull_config['concurrency']]
|
||||||
|
cull_cmd += ['--max-age=%d' % cull_config['max_age']]
|
||||||
|
if cull_config['users']:
|
||||||
|
cull_cmd += ['--cull-users']
|
||||||
|
|
||||||
|
cull_service = {
|
||||||
|
'name': 'cull-idle',
|
||||||
|
'admin': True,
|
||||||
|
'command': cull_cmd,
|
||||||
|
}
|
||||||
|
|
||||||
|
return cull_service
|
||||||
|
|
||||||
|
|
||||||
|
def update_services(c, config):
|
||||||
|
c.JupyterHub.services = []
|
||||||
|
if config['services']['cull']['enabled']:
|
||||||
|
c.JupyterHub.services.append(set_cull_idle_service(config))
|
||||||
|
|
||||||
|
|
||||||
def _merge_dictionaries(a, b, path=None, update=True):
|
def _merge_dictionaries(a, b, path=None, update=True):
|
||||||
"""
|
"""
|
||||||
Merge two dictionaries recursively.
|
Merge two dictionaries recursively.
|
||||||
|
|||||||
342
tljh/cull_idle_servers.py
Normal file
342
tljh/cull_idle_servers.py
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""script to monitor and cull idle single-user servers
|
||||||
|
|
||||||
|
Imported from https://github.com/jupyterhub/jupyterhub/blob/6b1046697/examples/cull-idle/cull_idle_servers.py
|
||||||
|
|
||||||
|
Caveats:
|
||||||
|
|
||||||
|
last_activity is not updated with high frequency,
|
||||||
|
so cull timeout should be greater than the sum of:
|
||||||
|
|
||||||
|
- single-user websocket ping interval (default: 30s)
|
||||||
|
- JupyterHub.last_activity_interval (default: 5 minutes)
|
||||||
|
|
||||||
|
You can run this as a service managed by JupyterHub with this in your config::
|
||||||
|
|
||||||
|
|
||||||
|
c.JupyterHub.services = [
|
||||||
|
{
|
||||||
|
'name': 'cull-idle',
|
||||||
|
'admin': True,
|
||||||
|
'command': 'python cull_idle_servers.py --timeout=3600'.split(),
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
Or run it manually by generating an API token and storing it in `JUPYTERHUB_API_TOKEN`:
|
||||||
|
|
||||||
|
export JUPYTERHUB_API_TOKEN=`jupyterhub token`
|
||||||
|
python cull_idle_servers.py [--timeout=900] [--url=http://127.0.0.1:8081/hub/api]
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from functools import partial
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urllib.parse import quote
|
||||||
|
except ImportError:
|
||||||
|
from urllib import quote
|
||||||
|
|
||||||
|
import dateutil.parser
|
||||||
|
|
||||||
|
from tornado.gen import coroutine, multi
|
||||||
|
from tornado.locks import Semaphore
|
||||||
|
from tornado.log import app_log
|
||||||
|
from tornado.httpclient import AsyncHTTPClient, HTTPRequest
|
||||||
|
from tornado.ioloop import IOLoop, PeriodicCallback
|
||||||
|
from tornado.options import define, options, parse_command_line
|
||||||
|
|
||||||
|
|
||||||
|
def parse_date(date_string):
|
||||||
|
"""Parse a timestamp
|
||||||
|
|
||||||
|
If it doesn't have a timezone, assume utc
|
||||||
|
|
||||||
|
Returned datetime object will always be timezone-aware
|
||||||
|
"""
|
||||||
|
dt = dateutil.parser.parse(date_string)
|
||||||
|
if not dt.tzinfo:
|
||||||
|
# assume naïve timestamps are UTC
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt
|
||||||
|
|
||||||
|
|
||||||
|
def format_td(td):
|
||||||
|
"""
|
||||||
|
Nicely format a timedelta object
|
||||||
|
|
||||||
|
as HH:MM:SS
|
||||||
|
"""
|
||||||
|
if td is None:
|
||||||
|
return "unknown"
|
||||||
|
if isinstance(td, str):
|
||||||
|
return td
|
||||||
|
seconds = int(td.total_seconds())
|
||||||
|
h = seconds // 3600
|
||||||
|
seconds = seconds % 3600
|
||||||
|
m = seconds // 60
|
||||||
|
seconds = seconds % 60
|
||||||
|
return f"{h:02}:{m:02}:{seconds:02}"
|
||||||
|
|
||||||
|
|
||||||
|
@coroutine
|
||||||
|
def cull_idle(url, api_token, inactive_limit, cull_users=False, max_age=0, concurrency=10):
|
||||||
|
"""Shutdown idle single-user servers
|
||||||
|
|
||||||
|
If cull_users, inactive *users* will be deleted as well.
|
||||||
|
"""
|
||||||
|
auth_header = {
|
||||||
|
'Authorization': 'token %s' % api_token,
|
||||||
|
}
|
||||||
|
req = HTTPRequest(
|
||||||
|
url=url + '/users',
|
||||||
|
headers=auth_header,
|
||||||
|
)
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
client = AsyncHTTPClient()
|
||||||
|
|
||||||
|
if concurrency:
|
||||||
|
semaphore = Semaphore(concurrency)
|
||||||
|
@coroutine
|
||||||
|
def fetch(req):
|
||||||
|
"""client.fetch wrapped in a semaphore to limit concurrency"""
|
||||||
|
yield semaphore.acquire()
|
||||||
|
try:
|
||||||
|
return (yield client.fetch(req))
|
||||||
|
finally:
|
||||||
|
yield semaphore.release()
|
||||||
|
else:
|
||||||
|
fetch = client.fetch
|
||||||
|
|
||||||
|
resp = yield fetch(req)
|
||||||
|
users = json.loads(resp.body.decode('utf8', 'replace'))
|
||||||
|
futures = []
|
||||||
|
|
||||||
|
@coroutine
|
||||||
|
def handle_server(user, server_name, server):
|
||||||
|
"""Handle (maybe) culling a single server
|
||||||
|
|
||||||
|
Returns True if server is now stopped (user removable),
|
||||||
|
False otherwise.
|
||||||
|
"""
|
||||||
|
log_name = user['name']
|
||||||
|
if server_name:
|
||||||
|
log_name = '%s/%s' % (user['name'], server_name)
|
||||||
|
if server.get('pending'):
|
||||||
|
app_log.warning(
|
||||||
|
"Not culling server %s with pending %s",
|
||||||
|
log_name, server['pending'])
|
||||||
|
return False
|
||||||
|
|
||||||
|
if server.get('started'):
|
||||||
|
age = now - parse_date(server['started'])
|
||||||
|
else:
|
||||||
|
# started may be undefined on jupyterhub < 0.9
|
||||||
|
age = None
|
||||||
|
|
||||||
|
# check last activity
|
||||||
|
# last_activity can be None in 0.9
|
||||||
|
if server['last_activity']:
|
||||||
|
inactive = now - parse_date(server['last_activity'])
|
||||||
|
else:
|
||||||
|
# no activity yet, use start date
|
||||||
|
# last_activity may be None with jupyterhub 0.9,
|
||||||
|
# which introduces the 'started' field which is never None
|
||||||
|
# for running servers
|
||||||
|
inactive = age
|
||||||
|
|
||||||
|
should_cull = (inactive is not None and
|
||||||
|
inactive.total_seconds() >= inactive_limit)
|
||||||
|
if should_cull:
|
||||||
|
app_log.info(
|
||||||
|
"Culling server %s (inactive for %s)",
|
||||||
|
log_name, format_td(inactive))
|
||||||
|
|
||||||
|
if max_age and not should_cull:
|
||||||
|
# only check started if max_age is specified
|
||||||
|
# so that we can still be compatible with jupyterhub 0.8
|
||||||
|
# which doesn't define the 'started' field
|
||||||
|
if age is not None and age.total_seconds() >= max_age:
|
||||||
|
app_log.info(
|
||||||
|
"Culling server %s (age: %s, inactive for %s)",
|
||||||
|
log_name, format_td(age), format_td(inactive))
|
||||||
|
should_cull = True
|
||||||
|
|
||||||
|
if not should_cull:
|
||||||
|
app_log.debug(
|
||||||
|
"Not culling server %s (age: %s, inactive for %s)",
|
||||||
|
log_name, format_td(age), format_td(inactive))
|
||||||
|
return False
|
||||||
|
|
||||||
|
req = HTTPRequest(
|
||||||
|
url=url + '/users/%s/server' % quote(user['name']),
|
||||||
|
method='DELETE',
|
||||||
|
headers=auth_header,
|
||||||
|
)
|
||||||
|
resp = yield fetch(req)
|
||||||
|
if resp.code == 202:
|
||||||
|
app_log.warning(
|
||||||
|
"Server %s is slow to stop",
|
||||||
|
log_name,
|
||||||
|
)
|
||||||
|
# return False to prevent culling user with pending shutdowns
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
@coroutine
|
||||||
|
def handle_user(user):
|
||||||
|
"""Handle one user.
|
||||||
|
|
||||||
|
Create a list of their servers, and async exec them. Wait for
|
||||||
|
that to be done, and if all servers are stopped, possibly cull
|
||||||
|
the user.
|
||||||
|
"""
|
||||||
|
# shutdown servers first.
|
||||||
|
# Hub doesn't allow deleting users with running servers.
|
||||||
|
# named servers contain the 'servers' dict
|
||||||
|
if 'servers' in user:
|
||||||
|
servers = user['servers']
|
||||||
|
# Otherwise, server data is intermingled in with the user
|
||||||
|
# model
|
||||||
|
else:
|
||||||
|
servers = {}
|
||||||
|
if user['server']:
|
||||||
|
servers[''] = {
|
||||||
|
'started': user.get('started'),
|
||||||
|
'last_activity': user['last_activity'],
|
||||||
|
'pending': user['pending'],
|
||||||
|
'url': user['server'],
|
||||||
|
}
|
||||||
|
server_futures = [
|
||||||
|
handle_server(user, server_name, server)
|
||||||
|
for server_name, server in servers.items()
|
||||||
|
]
|
||||||
|
results = yield multi(server_futures)
|
||||||
|
if not cull_users:
|
||||||
|
return
|
||||||
|
# some servers are still running, cannot cull users
|
||||||
|
still_alive = len(results) - sum(results)
|
||||||
|
if still_alive:
|
||||||
|
app_log.debug(
|
||||||
|
"Not culling user %s with %i servers still alive",
|
||||||
|
user['name'], still_alive)
|
||||||
|
return False
|
||||||
|
|
||||||
|
should_cull = False
|
||||||
|
if user.get('created'):
|
||||||
|
age = now - parse_date(user['created'])
|
||||||
|
else:
|
||||||
|
# created may be undefined on jupyterhub < 0.9
|
||||||
|
age = None
|
||||||
|
|
||||||
|
# check last activity
|
||||||
|
# last_activity can be None in 0.9
|
||||||
|
if user['last_activity']:
|
||||||
|
inactive = now - parse_date(user['last_activity'])
|
||||||
|
else:
|
||||||
|
# no activity yet, use start date
|
||||||
|
# last_activity may be None with jupyterhub 0.9,
|
||||||
|
# which introduces the 'created' field which is never None
|
||||||
|
inactive = age
|
||||||
|
|
||||||
|
should_cull = (inactive is not None and
|
||||||
|
inactive.total_seconds() >= inactive_limit)
|
||||||
|
if should_cull:
|
||||||
|
app_log.info(
|
||||||
|
"Culling user %s (inactive for %s)",
|
||||||
|
user['name'], inactive)
|
||||||
|
|
||||||
|
if max_age and not should_cull:
|
||||||
|
# only check created if max_age is specified
|
||||||
|
# so that we can still be compatible with jupyterhub 0.8
|
||||||
|
# which doesn't define the 'started' field
|
||||||
|
if age is not None and age.total_seconds() >= max_age:
|
||||||
|
app_log.info(
|
||||||
|
"Culling user %s (age: %s, inactive for %s)",
|
||||||
|
user['name'], format_td(age), format_td(inactive))
|
||||||
|
should_cull = True
|
||||||
|
|
||||||
|
if not should_cull:
|
||||||
|
app_log.debug(
|
||||||
|
"Not culling user %s (created: %s, last active: %s)",
|
||||||
|
user['name'], format_td(age), format_td(inactive))
|
||||||
|
return False
|
||||||
|
|
||||||
|
req = HTTPRequest(
|
||||||
|
url=url + '/users/%s' % user['name'],
|
||||||
|
method='DELETE',
|
||||||
|
headers=auth_header,
|
||||||
|
)
|
||||||
|
yield fetch(req)
|
||||||
|
return True
|
||||||
|
|
||||||
|
for user in users:
|
||||||
|
futures.append((user['name'], handle_user(user)))
|
||||||
|
|
||||||
|
for (name, f) in futures:
|
||||||
|
try:
|
||||||
|
result = yield f
|
||||||
|
except Exception:
|
||||||
|
app_log.exception("Error processing %s", name)
|
||||||
|
else:
|
||||||
|
if result:
|
||||||
|
app_log.debug("Finished culling %s", name)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
define(
|
||||||
|
'url',
|
||||||
|
default=os.environ.get('JUPYTERHUB_API_URL'),
|
||||||
|
help="The JupyterHub API URL",
|
||||||
|
)
|
||||||
|
define('timeout', type=int, default=600, help="The idle timeout (in seconds)")
|
||||||
|
define('cull_every', type=int, default=0,
|
||||||
|
help="The interval (in seconds) for checking for idle servers to cull")
|
||||||
|
define('max_age', type=int, default=0,
|
||||||
|
help="The maximum age (in seconds) of servers that should be culled even if they are active")
|
||||||
|
define('cull_users', type=bool, default=False,
|
||||||
|
help="""Cull users in addition to servers.
|
||||||
|
This is for use in temporary-user cases such as tmpnb.""",
|
||||||
|
)
|
||||||
|
define('concurrency', type=int, default=10,
|
||||||
|
help="""Limit the number of concurrent requests made to the Hub.
|
||||||
|
|
||||||
|
Deleting a lot of users at the same time can slow down the Hub,
|
||||||
|
so limit the number of API requests we have outstanding at any given time.
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
parse_command_line()
|
||||||
|
if not options.cull_every:
|
||||||
|
options.cull_every = options.timeout // 2
|
||||||
|
api_token = os.environ['JUPYTERHUB_API_TOKEN']
|
||||||
|
|
||||||
|
try:
|
||||||
|
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
|
||||||
|
except ImportError as e:
|
||||||
|
app_log.warning(
|
||||||
|
"Could not load pycurl: %s\n"
|
||||||
|
"pycurl is recommended if you have a large number of users.",
|
||||||
|
e)
|
||||||
|
|
||||||
|
loop = IOLoop.current()
|
||||||
|
cull = partial(
|
||||||
|
cull_idle,
|
||||||
|
url=options.url,
|
||||||
|
api_token=api_token,
|
||||||
|
inactive_limit=options.timeout,
|
||||||
|
cull_users=options.cull_users,
|
||||||
|
max_age=options.max_age,
|
||||||
|
concurrency=options.concurrency,
|
||||||
|
)
|
||||||
|
# schedule first cull immediately
|
||||||
|
# because PeriodicCallback doesn't start until the end of the first interval
|
||||||
|
loop.add_callback(cull)
|
||||||
|
# schedule periodic cull
|
||||||
|
pc = PeriodicCallback(cull, 1e3 * options.cull_every)
|
||||||
|
pc.start()
|
||||||
|
try:
|
||||||
|
loop.start()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
Reference in New Issue
Block a user