Respect --insecure (-k) flag when fetching list_url. (#5178)

* Respect --insecure when fetching list_url.
* Ensure support for Python 2.6, and that urlopen works for python versions prior 2.7.9 and between 3.0 and 3.4.3.
This commit is contained in:
Sergey Kosukhin 2017-08-25 11:15:46 +02:00 committed by Todd Gamblin
parent c0d3d33b58
commit fda3249c8b

View File

@ -24,6 +24,7 @@
############################################################################## ##############################################################################
import re import re
import os import os
import ssl
import sys import sys
import traceback import traceback
@ -105,6 +106,20 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
root = re.sub('/index.html$', '', root) root = re.sub('/index.html$', '', root)
try: try:
context = None
if sys.version_info < (2, 7, 9) or \
((3,) < sys.version_info < (3, 4, 3)):
if not spack.insecure:
tty.warn("Spack will not check SSL certificates. You need to "
"update your Python to enable certificate "
"verification.")
else:
# We explicitly create default context to avoid error described in
# https://blog.sucuri.net/2016/03/beware-unverified-tls-certificates-php-python.html
context = ssl._create_unverified_context() \
if spack.insecure \
else ssl.create_default_context()
# Make a HEAD request first to check the content type. This lets # Make a HEAD request first to check the content type. This lets
# us ignore tarballs and gigantic files. # us ignore tarballs and gigantic files.
# It would be nice to do this with the HTTP Accept header to avoid # It would be nice to do this with the HTTP Accept header to avoid
@ -112,7 +127,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
# if you ask for a tarball with Accept: text/html. # if you ask for a tarball with Accept: text/html.
req = Request(url) req = Request(url)
req.get_method = lambda: "HEAD" req.get_method = lambda: "HEAD"
resp = urlopen(req, timeout=TIMEOUT) resp = _urlopen(req, timeout=TIMEOUT, context=context)
if "Content-type" not in resp.headers: if "Content-type" not in resp.headers:
tty.debug("ignoring page " + url) tty.debug("ignoring page " + url)
@ -125,7 +140,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
# Do the real GET request when we know it's just HTML. # Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET" req.get_method = lambda: "GET"
response = urlopen(req, timeout=TIMEOUT) response = _urlopen(req, timeout=TIMEOUT, context=context)
response_url = response.geturl() response_url = response.geturl()
# Read the page and and stick it in the map we'll return # Read the page and and stick it in the map we'll return
@ -176,6 +191,13 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
except URLError as e: except URLError as e:
tty.debug(e) tty.debug(e)
if isinstance(e.reason, ssl.SSLError):
tty.warn("Spack was unable to fetch url list due to a certificate "
"verification problem. You can try running spack -k, "
"which will not check SSL certificates. Use this at your "
"own risk.")
if raise_on_error: if raise_on_error:
raise spack.error.NoNetworkConnectionError(str(e), url) raise spack.error.NoNetworkConnectionError(str(e), url)
@ -202,8 +224,16 @@ def _spider_wrapper(args):
return _spider(*args) return _spider(*args)
def spider(root_url, depth=0): def _urlopen(*args, **kwargs):
"""Wrapper for compatibility with old versions of Python."""
# We don't pass 'context' parameter to urlopen because it
# was introduces only starting versions 2.7.9 and 3.4.3 of Python.
if 'context' in kwargs and kwargs['context'] is None:
del kwargs['context']
return urlopen(*args, **kwargs)
def spider(root_url, depth=0):
"""Gets web pages from a root URL. """Gets web pages from a root URL.
If depth is specified (e.g., depth=2), then this will also follow If depth is specified (e.g., depth=2), then this will also follow