Files
the-littlest-jupyterhub/tljh/configurer.py

269 lines
7.1 KiB
Python
Raw Normal View History

"""
Parse YAML config file & update JupyterHub config.
Config should never append or mutate, only set. Functions here could
be called many times per lifetime of a jupyterhub.
Traitlets that modify the startup of JupyterHub should not be here.
FIXME: A strong feeling that JSON Schema should be involved somehow.
"""
import os
2019-06-06 16:52:04 +03:00
import sys
2019-02-13 14:10:28 +02:00
from .config import CONFIG_FILE, STATE_DIR
from .yaml import yaml
# Default configuration for tljh
# User provided config is merged into this
default = {
'auth': {
'type': 'firstuseauthenticator.FirstUseAuthenticator',
'FirstUseAuthenticator': {
'create_users': False
}
},
'users': {
'allowed': [],
'banned': [],
'admin': [],
2019-06-21 11:38:26 +03:00
'extra_user_groups': {}
},
'limits': {
'memory': None,
'cpu': None,
},
'http': {
'port': 80,
},
'https': {
'enabled': False,
'port': 443,
'tls': {
'cert': '',
'key': '',
},
'letsencrypt': {
'email': '',
'domains': [],
},
},
'traefik_api': {
2019-02-11 09:24:16 +02:00
'ip': "127.0.0.1",
'port': 8099,
'username': 'api_admin',
2019-02-13 14:10:28 +02:00
'password': '',
2019-02-11 09:24:16 +02:00
},
'user_environment': {
'default_app': 'classic',
},
2019-06-06 16:52:04 +03:00
'services': {
'cull': {
'enabled': True,
'timeout': 600,
'every': 60,
'concurrency': 5,
'users': False,
'max_age': 0
}
}
}
def load_config(config_file=CONFIG_FILE):
"""Load the current config as a dictionary
merges overrides from config.yaml with default config
"""
if os.path.exists(config_file):
with open(config_file) as f:
config_overrides = yaml.load(f)
else:
config_overrides = {}
2019-02-11 09:24:16 +02:00
secrets = load_secrets()
config = _merge_dictionaries(dict(default), secrets)
config = _merge_dictionaries(config, config_overrides)
return config
def apply_config(config_overrides, c):
"""
Merge config_overrides with config defaults & apply to JupyterHub config c
"""
tljh_config = _merge_dictionaries(dict(default), config_overrides)
update_auth(c, tljh_config)
update_userlists(c, tljh_config)
2019-06-20 21:54:51 +03:00
update_usergroups(c, tljh_config)
update_limits(c, tljh_config)
update_user_environment(c, tljh_config)
update_user_account_config(c, tljh_config)
update_traefik_api(c, tljh_config)
2019-06-06 16:52:04 +03:00
update_services(c, tljh_config)
def set_if_not_none(parent, key, value):
"""
Set attribute 'key' on parent if value is not None
"""
if value is not None:
setattr(parent, key, value)
def load_traefik_api_credentials():
"""Load traefik api secret from a file"""
2019-02-13 14:10:28 +02:00
proxy_secret_path = os.path.join(STATE_DIR, 'traefik-api.secret')
if not os.path.exists(proxy_secret_path):
return {}
2019-02-13 14:10:28 +02:00
with open(proxy_secret_path,'r') as f:
password = f.read()
return {
'traefik_api': {
'password': password,
}
}
2019-02-11 09:24:16 +02:00
def load_secrets():
"""Load any secret values stored on disk
Returns dict to be merged into config during load
"""
config = {}
config = _merge_dictionaries(config, load_traefik_api_credentials())
return config
2019-02-11 09:24:16 +02:00
def update_auth(c, config):
"""
Set auth related configuration from YAML config file
Use auth.type to determine authenticator to use. All parameters
in the config under auth.{auth.type} will be passed straight to the
authenticators themselves.
"""
auth = config.get('auth')
# FIXME: Make sure this is something importable.
# FIXME: SECURITY: Class must inherit from Authenticator, to prevent us being
# used to set arbitrary properties on arbitrary types of objects!
authenticator_class = auth['type']
# When specifying fully qualified name, use classname as key for config
authenticator_configname = authenticator_class.split('.')[-1]
c.JupyterHub.authenticator_class = authenticator_class
# Use just class name when setting config. If authenticator is dummyauthenticator.DummyAuthenticator,
# its config will be set under c.DummyAuthenticator
authenticator_parent = getattr(c, authenticator_class.split('.')[-1])
for k, v in auth.get(authenticator_configname, {}).items():
set_if_not_none(authenticator_parent, k, v)
def update_userlists(c, config):
"""
Set user whitelists & admin lists
"""
users = config['users']
c.Authenticator.whitelist = set(users['allowed'])
c.Authenticator.blacklist = set(users['banned'])
c.Authenticator.admin_users = set(users['admin'])
2019-06-20 21:54:51 +03:00
def update_usergroups(c, config):
"""
Set user groups
"""
users = config['users']
2019-06-21 11:38:26 +03:00
c.UserCreatingSpawner.user_groups = users['extra_user_groups']
2019-06-20 21:54:51 +03:00
def update_limits(c, config):
"""
Set user server limits
"""
limits = config['limits']
c.Spawner.mem_limit = limits['memory']
c.Spawner.cpu_limit = limits['cpu']
def update_user_environment(c, config):
"""
Set user environment configuration
"""
user_env = config['user_environment']
# Set default application users are launched into
if user_env['default_app'] == 'jupyterlab':
c.Spawner.default_url = '/lab'
elif user_env['default_app'] == 'nteract':
c.Spawner.default_url = '/nteract'
def update_user_account_config(c, config):
c.SystemdSpawner.username_template = 'jupyter-{USERNAME}'
def update_traefik_api(c, config):
2019-02-11 09:24:16 +02:00
"""
Set traefik api endpoint credentials
"""
c.TraefikTomlProxy.traefik_api_username = config['traefik_api']['username']
c.TraefikTomlProxy.traefik_api_password = config['traefik_api']['password']
2019-02-11 09:24:16 +02:00
def set_cull_idle_service(config):
2019-06-06 16:52:04 +03:00
"""
Set Idle Culler service
"""
cull_cmd = [
Use idle culler from jupyterhub-idle-culler package The idle culler lives as a script in at least 3 different places: - In the JupyterHub repo, as an 'example' https://github.com/jupyterhub/jupyterhub/tree/d126baa443ad7d893be2ff4a70afe9ef5b8a4a1a/examples/cull-idle - In the TLJH repo, as a core part of the service https://github.com/jupyterhub/the-littlest-jupyterhub/blob/01ba34857dd4e316d839034ae2b3cc400b929964/tljh/cull_idle_servers.py. This is an import from a specific version of the JupyterHub repo, and has had a couple of changes made to it since. - In the z2jh repo, as a core part of the service https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/c3f3be25f8ae6c72d02f385f41983b70ee1d416e/jupyterhub/files/hub/cull_idle_servers.py This is also an import from a specific version of the JupyterHub repo, but has had a lot more work done on it. Most had been sync'd back the JupyterHub repo, but some had not been. See https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commits/9c15a42b1227f3b54826f273f1689e4dc8c8e12e/images/hub/cull_idle_servers.py and https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commits/master/jupyterhub/files/hub/cull_idle_servers.py The idle culler is a core integral part of every JupyterHub deployment these days. It would be great if it was maintained separately on its own, without being split across multiple repos. The latest changes had been to the version in the JupyterHub repo, so I copied it (while preserving commit history, because credit is important) to a new repository: https://github.com/yuvipanda/jupyterhub-idle-culler I looked through z2jh and tljh copies, and cherry-picked the following changes manually https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commit/ae80fb516337b653306aa4a74f4118a38c3cb3f6 https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commit/836f19a4c7445f3dc32120c892873275ff870d1a https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commit/a0787c64f19550ec96f2e06a3d7079e029fc6465 https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commit/b230ef8156924e25368e7abf2174a9d1edf99c55 https://github.com/jupyterhub/the-littlest-jupyterhub/commit/20374db7c6ce2934ce700873b9d1225929e29456#diff-f00cd100e9f673285208aaa6fc0c3212 There were a few from https://github.com/jupyterhub/zero-to-jupyterhub-k8s/commits/9c15a42b1227f3b54826f273f1689e4dc8c8e12e/images/hub/cull_idle_servers.py I could not apply, but mostly because those features had been re-implemented already. Right now, the package is a direct port of the code we had. Once this settles in, I am hopefull we can iterate faster and make cool new changes.
2020-04-27 16:39:03 +05:30
sys.executable, '-m', 'jupyterhub_idle_culler'
2019-06-06 16:52:04 +03:00
]
cull_config = config['services']['cull']
print()
2019-06-06 16:52:04 +03:00
cull_cmd += ['--timeout=%d' % cull_config['timeout']]
cull_cmd += ['--cull-every=%d' % cull_config['every']]
cull_cmd += ['--concurrency=%d' % cull_config['concurrency']]
cull_cmd += ['--max-age=%d' % cull_config['max_age']]
if cull_config['users']:
cull_cmd += ['--cull-users']
2019-06-06 16:52:04 +03:00
cull_service = {
'name': 'cull-idle',
'admin': True,
'command': cull_cmd,
}
return cull_service
def update_services(c, config):
2019-06-06 17:42:10 +03:00
c.JupyterHub.services = []
if config['services']['cull']['enabled']:
c.JupyterHub.services.append(set_cull_idle_service(config))
2019-06-06 16:52:04 +03:00
def _merge_dictionaries(a, b, path=None, update=True):
"""
Merge two dictionaries recursively.
From https://stackoverflow.com/a/7205107
"""
if path is None:
path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
_merge_dictionaries(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
elif update:
a[key] = b[key]
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a