Isolate util/web

This commit is contained in:
Douglas Jacobsen 2023-11-09 10:13:28 -07:00
parent 330a5c0010
commit bc06e2bc17
15 changed files with 220 additions and 107 deletions

View File

@ -646,7 +646,9 @@ def _linting_package_file(pkgs, error_cls):
if pkg_cls.homepage.startswith("http://"):
https = re.sub("http", "https", pkg_cls.homepage, 1)
try:
response = urlopen(https)
response = urlopen(https,
verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10))
except Exception as e:
msg = 'Error with attempting https for "{0}": '
errors.append(error_cls(msg.format(pkg_cls.name), [str(e)]))

View File

@ -487,7 +487,10 @@ def _fetch_and_cache_index(self, mirror_url, cache_entry={}):
scheme = urllib.parse.urlparse(mirror_url).scheme
if scheme != "oci" and not web_util.url_exists(
url_util.join(mirror_url, BUILD_CACHE_RELATIVE_PATH, "index.json")
url_util.join(mirror_url, BUILD_CACHE_RELATIVE_PATH, "index.json"),
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'),
verify_ssl=spack.config.get('config:verify_ssl'),
timeout=spack.config.get('config:connect_timeout', 10)
):
return False
@ -978,9 +981,13 @@ def _specs_from_cache_fallback(cache_prefix):
def url_read_method(url):
contents = None
try:
_, _, spec_file = web_util.read_from_url(url)
_, _, spec_file = web_util.read_from_url(
url,
verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10)
)
contents = codecs.getreader("utf-8")(spec_file).read()
except (URLError, web_util.SpackWebError) as url_err:
except (URLError, web_util.WebError) as url_err:
tty.error("Error reading specfile: {0}".format(url))
tty.error(url_err)
return contents
@ -1366,8 +1373,18 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
spackfile_path = os.path.join(cache_prefix, tarball_path_name(spec, ".spack"))
remote_spackfile_path = url_util.join(out_url, os.path.relpath(spackfile_path, stage_dir))
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'),
verify_ssl=spack.config.get('config:verify_ssl'),
timeout=spack.config.get('config:connect_timeout', 10)
url_args = {
'fetch_method': fetch_method,
'verify_ssl': verify_ssl,
'timeout': timeout
}
mkdirp(tarfile_dir)
if web_util.url_exists(remote_spackfile_path):
if web_util.url_exists(remote_spackfile_path, **url_args):
if options.force:
web_util.remove_url(remote_spackfile_path)
else:
@ -1389,12 +1406,12 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
# If force and exists, overwrite. Otherwise raise exception on collision.
if options.force:
if web_util.url_exists(remote_specfile_path):
if web_util.url_exists(remote_specfile_path, **url_args):
web_util.remove_url(remote_specfile_path)
if web_util.url_exists(remote_signed_specfile_path):
if web_util.url_exists(remote_signed_specfile_path, **url_args):
web_util.remove_url(remote_signed_specfile_path)
elif web_util.url_exists(remote_specfile_path) or web_util.url_exists(
remote_signed_specfile_path
elif web_util.url_exists(remote_specfile_path, **url_args) or web_util.url_exists(
remote_signed_specfile_path, **url_args
):
raise NoOverwriteException(url_util.format(remote_specfile_path))
@ -2213,7 +2230,8 @@ def install_root_node(spec, unsigned=False, force=False, sha256=None):
tty.debug("Verified SHA256 checksum of the build cache")
# don't print long padded paths while extracting/relocating binaries
with spack.util.path.filter_padding():
padding = spack.config.get('config:install_tree:padded_length', None)
with spack.util.path.filter_padding(padding=padding):
tty.msg('Installing "{0}" from a buildcache'.format(spec.format()))
extract_tarball(spec, download_result, unsigned, force)
spack.hooks.post_install(spec, False)
@ -2252,12 +2270,20 @@ def try_direct_fetch(spec, mirrors=None):
mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, signed_specfile_name
)
try:
_, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json)
_, _, fs = web_util.read_from_url(
buildcache_fetch_url_signed_json,
verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10)
)
specfile_is_signed = True
except (URLError, web_util.SpackWebError, HTTPError) as url_err:
except (URLError, web_util.WebError, HTTPError) as url_err:
try:
_, _, fs = web_util.read_from_url(buildcache_fetch_url_json)
except (URLError, web_util.SpackWebError, HTTPError) as url_err_x:
_, _, fs = web_util.read_from_url(
buildcache_fetch_url_json,
verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10)
)
except (URLError, web_util.WebError, HTTPError) as url_err_x:
tty.debug(
"Did not find {0} on {1}".format(
specfile_name, buildcache_fetch_url_signed_json
@ -2361,10 +2387,19 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
tty.debug("Finding public keys in {0}".format(url_util.format(fetch_url)))
try:
_, _, json_file = web_util.read_from_url(keys_index)
_, _, json_file = web_util.read_from_url(
keys_index,
verify_ssl=spack.config.get('config:verify_ssl', True),
timeout=spack.config.get('config:connect_timeout', 10)
)
json_index = sjson.load(codecs.getreader("utf-8")(json_file))
except (URLError, web_util.SpackWebError) as url_err:
if web_util.url_exists(keys_index):
except (URLError, web_util.WebError) as url_err:
if web_util.url_exists(
keys_index,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'),
verify_ssl=spack.config.get('config:verify_ssl'),
timeout=spack.config.get('config:connect_timeout', 10)
):
err_msg = [
"Unable to find public keys in {0},",
" caught exception attempting to read from {1}.",
@ -2488,7 +2523,12 @@ def needs_rebuild(spec, mirror_url):
# Only check for the presence of the json version of the spec. If the
# mirror only has the json version, or doesn't have the spec at all, we
# need to rebuild.
return not web_util.url_exists(specfile_path)
return not web_util.url_exists(
specfile_path,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'),
verify_ssl=spack.config.get('config:verify_ssl'),
timeout=spack.config.get('config:connect_timeout', 10)
)
def check_specs_against_mirrors(mirrors, specs, output_file=None):
@ -2654,7 +2694,9 @@ def get_remote_hash(self):
# Failure to fetch index.json.hash is not fatal
url_index_hash = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json.hash")
try:
response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers))
response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers),
verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10))
except urllib.error.URLError:
return None
@ -2676,7 +2718,9 @@ def conditional_fetch(self) -> FetchIndexResult:
url_index = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
try:
response = self.urlopen(urllib.request.Request(url_index, headers=self.headers))
response = self.urlopen(urllib.request.Request(url_index, headers=self.headers),
verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10))
except urllib.error.URLError as e:
raise FetchIndexError("Could not fetch index from {}".format(url_index), e) from e
@ -2724,7 +2768,9 @@ def conditional_fetch(self) -> FetchIndexResult:
}
try:
response = self.urlopen(urllib.request.Request(url, headers=headers))
response = self.urlopen(urllib.request.Request(url, headers=headers),
verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10))
except urllib.error.HTTPError as e:
if e.getcode() == 304:
# Not modified; that means fresh.

View File

@ -1756,7 +1756,11 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
gpg_path = None
if gpg_url:
gpg_path = web_util.fetch_url_text(gpg_url, dest_dir=os.path.join(work_dir, "_pgp"))
gpg_path = web_util.fetch_url_text(
gpg_url,
dest_dir=os.path.join(work_dir, "_pgp"),
fetch_method=spack.config.get('config:url_fetch_method')
)
rel_gpg_path = gpg_path.replace(work_dir, "").lstrip(os.path.sep)
lock_file = fs.find(work_dir, "spack.lock")[0]
@ -2130,8 +2134,12 @@ def read_broken_spec(broken_spec_url):
object.
"""
try:
_, _, fs = web_util.read_from_url(broken_spec_url)
except (URLError, web_util.SpackWebError, HTTPError):
_, _, fs = web_util.read_from_url(
broken_spec_url,
verify_ssl=cfg.get('config:verify_ssl', True),
timeout=cfg.get('config:connect_timeout', 10)
)
except (URLError, web_util.WebError, HTTPError):
tty.warn("Unable to read broken spec from {0}".format(broken_spec_url))
return None

View File

@ -730,7 +730,10 @@ def ci_rebuild(args):
broken_specs_url = ci_config["broken-specs-url"]
just_built_hash = job_spec.dag_hash()
broken_spec_path = url_util.join(broken_specs_url, just_built_hash)
if web_util.url_exists(broken_spec_path):
if web_util.url_exists(broken_spec_path,
fetch_method=cfg.get('config:url_fetch_method', 'urllib'),
verify_ssl=cfg.get('config:verify_ssl'),
timeout=cfg.get('config:connect_timeout', 10)):
tty.msg("Removing {0} from the list of broken specs".format(broken_spec_path))
try:
web_util.remove_url(broken_spec_path)

View File

@ -1451,7 +1451,11 @@ def fetch_remote_configs(url: str, dest_dir: str, skip_existing: bool = True) ->
def _fetch_file(url):
raw = raw_github_gitlab_url(url)
tty.debug("Reading config from url {0}".format(raw))
return web_util.fetch_url_text(raw, dest_dir=dest_dir)
return web_util.fetch_url_text(
raw,
dest_dir=dest_dir,
fetch_method=CONFIG.get('config:url_fetch_method')
)
if not url:
raise ConfigFileError("Cannot retrieve configuration without a URL")

View File

@ -301,7 +301,12 @@ def fetch(self):
url = None
errors = []
for url in self.candidate_urls:
if not web_util.url_exists(url):
if not web_util.url_exists(
url,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'),
verify_ssl=spack.config.get('config:verify_ssl'),
timeout=spack.config.get('config:connect_timeout', 10)
):
tty.debug("URL does not exist: " + url)
continue
@ -338,8 +343,12 @@ def _fetch_urllib(self, url):
# Run urllib but grab the mime type from the http headers
try:
url, headers, response = web_util.read_from_url(url)
except web_util.SpackWebError as e:
url, headers, response = web_util.read_from_url(
url,
verify_ssl=spack.config.get('config:verify_ssl', True),
timeout=spack.config.get('config:connect_timeout', 10)
)
except web_util.WebError as e:
# clean up archive on failure.
if self.archive_file:
os.remove(self.archive_file)
@ -385,7 +394,14 @@ def _fetch_curl(self, url):
timeout = self.extra_options.get("timeout")
base_args = web_util.base_curl_fetch_args(url, timeout)
connect_timeout = spack.config.get('config:connect_timeout', 10)
if timeout:
timeout = max(int(timeout), int(connect_timeout))
else:
timeout = int(connect_timeout)
base_args = web_util.base_curl_fetch_args(url, timeout=timeout,
verify_ssl=spack.config.get('config:verify_ssl'))
curl_args = save_args + base_args + cookie_args
# Run curl but grab the mime type from the http headers
@ -403,7 +419,7 @@ def _fetch_curl(self, url):
try:
web_util.check_curl_code(curl.returncode)
except spack.error.FetchError as err:
except web_util.WebError as err:
raise spack.fetch_strategy.FailedDownloadError(url, str(err))
self._check_headers(headers)
@ -1330,7 +1346,11 @@ def fetch(self):
basename = os.path.basename(parsed_url.path)
with working_dir(self.stage.path):
_, headers, stream = web_util.read_from_url(self.url)
_, headers, stream = web_util.read_from_url(
self.url,
verify_ssl=spack.config.get('config:verify_ssl', True),
timeout=spack.config.get('config:connect_timeout', 10)
)
with open(basename, "wb") as f:
shutil.copyfileobj(stream, f)
@ -1377,7 +1397,11 @@ def fetch(self):
basename = os.path.basename(parsed_url.path)
with working_dir(self.stage.path):
_, headers, stream = web_util.read_from_url(self.url)
_, headers, stream = web_util.read_from_url(
self.url,
verify_ssl=spack.config.get('config:verify_ssl', True),
timeout=spack.config.get('config:connect_timeout', 10)
)
with open(basename, "wb") as f:
shutil.copyfileobj(stream, f)

View File

@ -491,7 +491,8 @@ def _process_binary_cache_tarball(
tty.msg(f"Extracting {package_id(pkg)} from binary cache")
with timer.measure("install"), spack.util.path.filter_padding():
padding = spack.config.get('config:install_tree:padded_length', None)
with timer.measure("install"), spack.util.path.filter_padding(padding=padding):
binary_distribution.extract_tarball(
pkg.spec, download_result, unsigned=unsigned, force=False, timer=timer
)
@ -2492,7 +2493,8 @@ def build_process(pkg: "spack.package_base.PackageBase", install_args: dict) ->
installer = BuildProcessInstaller(pkg, install_args)
# don't print long padded paths in executable debug output.
with spack.util.path.filter_padding():
padding = spack.config.get('config:install_tree:padded_length', None)
with spack.util.path.filter_padding(padding=padding):
return installer.run()

View File

@ -986,7 +986,12 @@ def find_valid_url_for_version(self, version):
urls = self.all_urls_for_version(version)
for u in urls:
if spack.util.web.url_exists(u):
if spack.util.web.url_exists(
u,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'),
verify_ssl=spack.config.get('config:verify_ssl'),
timeout=spack.config.get('config:connect_timeout', 10)
):
return u
return None

View File

@ -672,7 +672,7 @@ def test_etag_fetching_304():
# Test conditional fetch with etags. If the remote hasn't modified the file
# it returns 304, which is an HTTPError in urllib-land. That should be
# handled as success, since it means the local cache is up-to-date.
def response_304(request: urllib.request.Request):
def response_304(request: urllib.request.Request, verify_ssl=True, timeout=10):
url = request.get_full_url()
if url == "https://www.example.com/build_cache/index.json":
assert request.get_header("If-none-match") == '"112a8bbc1b3f7f185621c1ee335f0502"'
@ -694,7 +694,7 @@ def response_304(request: urllib.request.Request):
def test_etag_fetching_200():
# Test conditional fetch with etags. The remote has modified the file.
def response_200(request: urllib.request.Request):
def response_200(request: urllib.request.Request, verify_ssl=True, timeout=10):
url = request.get_full_url()
if url == "https://www.example.com/build_cache/index.json":
assert request.get_header("If-none-match") == '"112a8bbc1b3f7f185621c1ee335f0502"'
@ -722,7 +722,7 @@ def response_200(request: urllib.request.Request):
def test_etag_fetching_404():
# Test conditional fetch with etags. The remote has modified the file.
def response_404(request: urllib.request.Request):
def response_404(request: urllib.request.Request, verify_ssl=True, timeout=10):
raise urllib.error.HTTPError(
request.get_full_url(),
404,
@ -745,7 +745,7 @@ def test_default_index_fetch_200():
index_json = '{"Hello": "World"}'
index_json_hash = bindist.compute_hash(index_json)
def urlopen(request: urllib.request.Request):
def urlopen(request: urllib.request.Request, **kwargs):
url = request.get_full_url()
if url.endswith("index.json.hash"):
return urllib.response.addinfourl( # type: ignore[arg-type]
@ -784,7 +784,7 @@ def test_default_index_dont_fetch_index_json_hash_if_no_local_hash():
index_json = '{"Hello": "World"}'
index_json_hash = bindist.compute_hash(index_json)
def urlopen(request: urllib.request.Request):
def urlopen(request: urllib.request.Request, **kwargs):
url = request.get_full_url()
if url.endswith("index.json"):
return urllib.response.addinfourl(
@ -813,7 +813,7 @@ def test_default_index_not_modified():
index_json = '{"Hello": "World"}'
index_json_hash = bindist.compute_hash(index_json)
def urlopen(request: urllib.request.Request):
def urlopen(request: urllib.request.Request, **kwargs):
url = request.get_full_url()
if url.endswith("index.json.hash"):
return urllib.response.addinfourl(
@ -838,7 +838,7 @@ def test_default_index_invalid_hash_file(index_json):
# Test invalid unicode / invalid hash type
index_json_hash = bindist.compute_hash(index_json)
def urlopen(request: urllib.request.Request):
def urlopen(request: urllib.request.Request, **kwargs):
return urllib.response.addinfourl(
io.BytesIO(),
headers={}, # type: ignore[arg-type]
@ -858,7 +858,7 @@ def test_default_index_json_404():
index_json = '{"Hello": "World"}'
index_json_hash = bindist.compute_hash(index_json)
def urlopen(request: urllib.request.Request):
def urlopen(request: urllib.request.Request, **kwargs):
url = request.get_full_url()
if url.endswith("index.json.hash"):
return urllib.response.addinfourl(

View File

@ -904,7 +904,8 @@ def test_env_with_included_config_var_path(tmpdir, packages_file):
spack_yaml = env_path / ev.manifest_name
spack_yaml.write_text(mpileaks_env_config(config_var_path))
config_real_path = substitute_path_variables(config_var_path)
config_real_path = substitute_path_variables(config_var_path,
replacements=spack.paths.path_replacements())
shutil.move(included_file, config_real_path)
assert os.path.exists(config_real_path)

View File

@ -350,8 +350,9 @@ def _which(*args, **kwargs):
def test_url_fetch_text_without_url(tmpdir):
with pytest.raises(spack.error.FetchError, match="URL is required"):
web_util.fetch_url_text(None)
with pytest.raises(web_util.WebError, match="URL is required"):
web_util.fetch_url_text(None,
fetch_method=spack.config.get('config:url_fetch_method'))
def test_url_fetch_text_curl_failures(tmpdir, monkeypatch):
@ -367,18 +368,19 @@ def _which(*args, **kwargs):
monkeypatch.setattr(spack.util.web, "which", _which)
with spack.config.override("config:url_fetch_method", "curl"):
with pytest.raises(spack.error.FetchError, match="Missing required curl"):
web_util.fetch_url_text("https://github.com/")
with pytest.raises(web_util.WebError, match="Missing required curl"):
web_util.fetch_url_text("https://github.com/",
fetch_method=spack.config.get('config:url_fetch_method'))
def test_url_check_curl_errors():
"""Check that standard curl error returncodes raise expected errors."""
# Check returncode 22 (i.e., 404)
with pytest.raises(spack.error.FetchError, match="not found"):
with pytest.raises(web_util.WebError, match="not found"):
web_util.check_curl_code(22)
# Check returncode 60 (certificate error)
with pytest.raises(spack.error.FetchError, match="invalid certificate"):
with pytest.raises(web_util.WebError, match="invalid certificate"):
web_util.check_curl_code(60)
@ -395,8 +397,9 @@ def _which(*args, **kwargs):
monkeypatch.setattr(spack.util.web, "which", _which)
with spack.config.override("config:url_fetch_method", "curl"):
with pytest.raises(spack.error.FetchError, match="Missing required curl"):
web_util.url_exists("https://github.com/")
with pytest.raises(web_util.WebError, match="Missing required curl"):
web_util.url_exists("https://github.com/",
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'))
def test_url_fetch_text_urllib_bad_returncode(tmpdir, monkeypatch):
@ -410,16 +413,18 @@ def _read_from_url(*args, **kwargs):
monkeypatch.setattr(spack.util.web, "read_from_url", _read_from_url)
with spack.config.override("config:url_fetch_method", "urllib"):
with pytest.raises(spack.error.FetchError, match="failed with error code"):
web_util.fetch_url_text("https://github.com/")
with pytest.raises(web_util.WebError, match="failed with error code"):
web_util.fetch_url_text("https://github.com/",
fetch_method=spack.config.get('config:url_fetch_method'))
def test_url_fetch_text_urllib_web_error(tmpdir, monkeypatch):
def _raise_web_error(*args, **kwargs):
raise web_util.SpackWebError("bad url")
raise web_util.WebError("bad url")
monkeypatch.setattr(spack.util.web, "read_from_url", _raise_web_error)
with spack.config.override("config:url_fetch_method", "urllib"):
with pytest.raises(spack.error.FetchError, match="fetch failed to verify"):
web_util.fetch_url_text("https://github.com/")
with pytest.raises(web_util.WebError, match="fetch failed to verify"):
web_util.fetch_url_text("https://github.com/",
fetch_method=spack.config.get('config:url_fetch_method'))

View File

@ -79,25 +79,26 @@ def test_output_filtering(self, capfd, install_mockery, mutable_config):
# test filtering when padding is enabled
with spack.config.override("config:install_tree", {"padded_length": 256}):
# tty.msg with filtering on the first argument
with sup.filter_padding():
padding = spack.config.get('config:install_tree:padded_length', None)
with sup.filter_padding(padding=padding):
tty.msg("here is a long path: %s/with/a/suffix" % long_path)
out, err = capfd.readouterr()
assert padding_string in out
# tty.msg with filtering on a laterargument
with sup.filter_padding():
with sup.filter_padding(padding=padding):
tty.msg("here is a long path:", "%s/with/a/suffix" % long_path)
out, err = capfd.readouterr()
assert padding_string in out
# tty.error with filtering on the first argument
with sup.filter_padding():
with sup.filter_padding(padding=padding):
tty.error("here is a long path: %s/with/a/suffix" % long_path)
out, err = capfd.readouterr()
assert padding_string in err
# tty.error with filtering on a later argument
with sup.filter_padding():
with sup.filter_padding(padding=padding):
tty.error("here is a long path:", "%s/with/a/suffix" % long_path)
out, err = capfd.readouterr()
assert padding_string in err

View File

@ -332,10 +332,20 @@ def get_s3_session(url, method="fetch"):
monkeypatch.setattr(spack.util.s3, "get_s3_session", get_s3_session)
fake_s3_url_exists = "s3://my-bucket/subdirectory/my-file"
assert spack.util.web.url_exists(fake_s3_url_exists)
assert spack.util.web.url_exists(
fake_s3_url_exists,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'),
verify_ssl=spack.config.get('config:verify_ssl'),
timeout=spack.config.get('config:connect_timeout', 10)
)
fake_s3_url_does_not_exist = "s3://my-bucket/subdirectory/my-notfound-file"
assert not spack.util.web.url_exists(fake_s3_url_does_not_exist)
assert not spack.util.web.url_exists(
fake_s3_url_does_not_exist,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'),
verify_ssl=spack.config.get('config:verify_ssl'),
timeout=spack.config.get('config:connect_timeout', 10)
)
def test_s3_url_parsing():

View File

@ -284,15 +284,12 @@ def replacer(match):
@contextlib.contextmanager
def filter_padding():
def filter_padding(padding=None):
"""Context manager to safely disable path padding in all Spack output.
This is needed because Spack's debug output gets extremely long when we use a
long padded installation path.
"""
import spack.config
padding = spack.config.get("config:install_tree:padded_length", None)
if padding:
# filter out all padding from the intsall command output
with tty.output_filter(padding_filter):

View File

@ -25,15 +25,27 @@
from llnl.util import lang, tty
from llnl.util.filesystem import mkdirp, rename, working_dir
import spack.config
import spack.error
import spack.util.url as url_util
import spack.util.error
from .executable import CommandNotFoundError, which
from .gcs import GCSBlob, GCSBucket, GCSHandler
from .s3 import UrllibS3Handler, get_s3_session
def build_web_keywords(config_object):
key_map = {
'verify_ssl': {'path': 'config:verify_ssl', 'default': True},
'timeout': {'path': 'config:connect_timeout', 'default': 10},
}
keywords = {}
for key, conf in key_map.items():
keywords.update({key: config_object.get(conf['path'], conf['default'])})
return keywords
class DetailedHTTPError(HTTPError):
def __init__(
self, req: Request, code: int, msg: str, hdrs: email.message.Message, fp: Optional[IO]
@ -75,9 +87,8 @@ def _urlopen():
)
# And dynamically dispatch based on the config:verify_ssl.
def dispatch_open(fullurl, data=None, timeout=None):
opener = with_ssl if spack.config.get("config:verify_ssl", True) else without_ssl
timeout = timeout or spack.config.get("config:connect_timeout", 10)
def dispatch_open(fullurl, data=None, timeout=10, verify_ssl=True):
opener = with_ssl if verify_ssl else without_ssl
return opener.open(fullurl, data, timeout)
return dispatch_open
@ -134,7 +145,7 @@ def handle_starttag(self, tag, attrs):
self.base_url = val
def read_from_url(url, accept_content_type=None):
def read_from_url(url, accept_content_type=None, verify_ssl=True, timeout=10, **kwargs):
if isinstance(url, str):
url = urllib.parse.urlparse(url)
@ -142,9 +153,9 @@ def read_from_url(url, accept_content_type=None):
request = Request(url.geturl(), headers={"User-Agent": SPACK_USER_AGENT})
try:
response = urlopen(request)
response = urlopen(request, **kwargs)
except URLError as err:
raise SpackWebError("Download failed: {}".format(str(err)))
raise WebError("Download failed: {}".format(str(err)))
if accept_content_type:
try:
@ -211,23 +222,23 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non
)
def base_curl_fetch_args(url, timeout=0):
def base_curl_fetch_args(url, verify_ssl=True, timeout=0):
"""Return the basic fetch arguments typically used in calls to curl.
The arguments include those for ensuring behaviors such as failing on
errors for codes over 400, printing HTML headers, resolving 3xx redirects,
status or failure handling, and connection timeouts.
It also uses the following configuration option to set an additional
It also uses the following input arguments to set an additional
argument as needed:
* config:connect_timeout (int): connection timeout
* config:verify_ssl (str): Perform SSL verification
* timeout (int): connection timeout
* verify_ssl (bool): Perform SSL verification
Arguments:
url (str): URL whose contents will be fetched
timeout (int): Connection timeout, which is only used if higher than
config:connect_timeout
timeout (int): Connection timeout
verify_ssl (bool): Performing SSL verification
Returns (list): list of argument strings
"""
@ -238,7 +249,7 @@ def base_curl_fetch_args(url, timeout=0):
"-L", # resolve 3xx redirects
url,
]
if not spack.config.get("config:verify_ssl"):
if not verify_ssl:
curl_args.append("-k")
if sys.stdout.isatty() and tty.msg_enabled():
@ -246,11 +257,8 @@ def base_curl_fetch_args(url, timeout=0):
else:
curl_args.append("-sS") # show errors if fail
connect_timeout = spack.config.get("config:connect_timeout", 10)
if timeout:
connect_timeout = max(int(connect_timeout), int(timeout))
if connect_timeout > 0:
curl_args.extend(["--connect-timeout", str(connect_timeout)])
if timeout > 0:
curl_args.extend(["--connect-timeout", str(timeout)])
return curl_args
@ -266,11 +274,11 @@ def check_curl_code(returncode):
if returncode != 0:
if returncode == 22:
# This is a 404. Curl will print the error.
raise spack.error.FetchError("URL was not found!")
raise WebError("URL was not found!")
if returncode == 60:
# This is a certificate error. Suggest spack -k
raise spack.error.FetchError(
raise WebError(
"Curl was unable to fetch due to invalid certificate. "
"This is either an attack, or your cluster's SSL "
"configuration is bad. If you believe your SSL "
@ -279,7 +287,7 @@ def check_curl_code(returncode):
"Use this at your own risk."
)
raise spack.error.FetchError("Curl failed with error {0}".format(returncode))
raise WebError("Curl failed with error {0}".format(returncode))
def _curl(curl=None):
@ -288,11 +296,11 @@ def _curl(curl=None):
curl = which("curl", required=True)
except CommandNotFoundError as exc:
tty.error(str(exc))
raise spack.error.FetchError("Missing required curl fetch method")
raise WebError("Missing required curl fetch method")
return curl
def fetch_url_text(url, curl=None, dest_dir="."):
def fetch_url_text(url, curl=None, dest_dir=".", fetch_method=None):
"""Retrieves text-only URL content using the configured fetch method.
It determines the fetch method from:
@ -316,19 +324,18 @@ def fetch_url_text(url, curl=None, dest_dir="."):
Raises FetchError if the curl returncode indicates failure
"""
if not url:
raise spack.error.FetchError("A URL is required to fetch its text")
raise WebError("A URL is required to fetch its text")
tty.debug("Fetching text at {0}".format(url))
filename = os.path.basename(url)
path = os.path.join(dest_dir, filename)
fetch_method = spack.config.get("config:url_fetch_method")
tty.debug("Using '{0}' to fetch {1} into {2}".format(fetch_method, url, path))
if fetch_method == "curl":
curl_exe = _curl(curl)
if not curl_exe:
raise spack.error.FetchError("Missing required fetch method (curl)")
raise WebError("Missing required fetch method (curl)")
curl_args = ["-O"]
curl_args.extend(base_curl_fetch_args(url))
@ -346,7 +353,7 @@ def fetch_url_text(url, curl=None, dest_dir="."):
returncode = response.getcode()
if returncode and returncode != 200:
raise spack.error.FetchError(
raise WebError(
"Urllib failed with error code {0}".format(returncode)
)
@ -358,13 +365,13 @@ def fetch_url_text(url, curl=None, dest_dir="."):
return path
except SpackWebError as err:
raise spack.error.FetchError("Urllib fetch failed to verify url: {0}".format(str(err)))
except WebError as err:
raise WebError("Urllib fetch failed to verify url: {0}".format(str(err)))
return None
def url_exists(url, curl=None):
def url_exists(url, curl=None, fetch_method=None, verify_ssl=True, timeout=10):
"""Determines whether url exists.
A scheme-specific process is used for Google Storage (`gs`) and Amazon
@ -382,9 +389,7 @@ def url_exists(url, curl=None):
url_result = urllib.parse.urlparse(url)
# Use curl if configured to do so
use_curl = spack.config.get(
"config:url_fetch_method", "urllib"
) == "curl" and url_result.scheme not in ("gs", "s3")
use_curl = fetch_method == "curl" and url_result.scheme not in ("gs", "s3")
if use_curl:
curl_exe = _curl(curl)
if not curl_exe:
@ -393,7 +398,7 @@ def url_exists(url, curl=None):
# Telling curl to fetch the first byte (-r 0-0) is supposed to be
# portable.
curl_args = ["--stderr", "-", "-s", "-f", "-r", "0-0", url]
if not spack.config.get("config:verify_ssl"):
if not verify_ssl:
curl_args.append("-k")
_ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull)
return curl_exe.returncode == 0
@ -402,7 +407,7 @@ def url_exists(url, curl=None):
try:
urlopen(
Request(url, method="HEAD", headers={"User-Agent": SPACK_USER_AGENT}),
timeout=spack.config.get("config:connect_timeout", 10),
timeout=timeout,
)
return True
except URLError as e:
@ -771,11 +776,11 @@ def parse_etag(header_value):
return valid.group(1) if valid else None
class SpackWebError(spack.error.SpackError):
class WebError(spack.util.error.UtilityError):
"""Superclass for Spack web spidering errors."""
class NoNetworkConnectionError(SpackWebError):
class NoNetworkConnectionError(WebError):
"""Raised when an operation can't get an internet connection."""
def __init__(self, message, url):