isolate util/s3

This commit is contained in:
Gregory Becker 2024-02-20 10:56:40 -08:00
parent ee36214f83
commit 0b92a19620
9 changed files with 153 additions and 106 deletions

View File

@ -488,9 +488,9 @@ def _fetch_and_cache_index(self, mirror_url, cache_entry={}):
if scheme != "oci" and not web_util.url_exists( if scheme != "oci" and not web_util.url_exists(
url_util.join(mirror_url, BUILD_CACHE_RELATIVE_PATH, "index.json"), url_util.join(mirror_url, BUILD_CACHE_RELATIVE_PATH, "index.json"),
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'), fetch_method=spack.config.get("config:url_fetch_method", "urllib"),
verify_ssl=spack.config.get('config:verify_ssl'), verify_ssl=spack.config.get("config:verify_ssl"),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
): ):
return False return False
@ -536,8 +536,9 @@ def _fetch_and_cache_index(self, mirror_url, cache_entry={}):
def binary_index_location(): def binary_index_location():
"""Set up a BinaryCacheIndex for remote buildcache dbs in the user's homedir.""" """Set up a BinaryCacheIndex for remote buildcache dbs in the user's homedir."""
cache_root = os.path.join(misc_cache_location(), "indices") cache_root = os.path.join(misc_cache_location(), "indices")
return spack.util.path.canonicalize_path(cache_root, return spack.util.path.canonicalize_path(
replacements=spack.paths.path_replacements()) cache_root, replacements=spack.paths.path_replacements()
)
#: Default binary cache index instance #: Default binary cache index instance
@ -909,6 +910,7 @@ def _read_specs_and_push_index(file_list, read_method, cache_prefix, db, temp_di
url_util.join(cache_prefix, "index.json"), url_util.join(cache_prefix, "index.json"),
keep_original=False, keep_original=False,
extra_args={"ContentType": "application/json", "CacheControl": "no-cache"}, extra_args={"ContentType": "application/json", "CacheControl": "no-cache"},
verify_ssl=spack.config.get("config:verify_ssl", True),
) )
# Push the hash # Push the hash
@ -917,6 +919,7 @@ def _read_specs_and_push_index(file_list, read_method, cache_prefix, db, temp_di
url_util.join(cache_prefix, "index.json.hash"), url_util.join(cache_prefix, "index.json.hash"),
keep_original=False, keep_original=False,
extra_args={"ContentType": "text/plain", "CacheControl": "no-cache"}, extra_args={"ContentType": "text/plain", "CacheControl": "no-cache"},
verify_ssl=spack.config.get("config:verify_ssl", True),
) )
@ -984,7 +987,7 @@ def url_read_method(url):
_, _, spec_file = web_util.read_from_url( _, _, spec_file = web_util.read_from_url(
url, url,
verify_ssl=spack.config.get("config:verify_ssl", True), verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
contents = codecs.getreader("utf-8")(spec_file).read() contents = codecs.getreader("utf-8")(spec_file).read()
except (URLError, web_util.WebError) as url_err: except (URLError, web_util.WebError) as url_err:
@ -995,7 +998,9 @@ def url_read_method(url):
try: try:
file_list = [ file_list = [
url_util.join(cache_prefix, entry) url_util.join(cache_prefix, entry)
for entry in web_util.list_url(cache_prefix) for entry in web_util.list_url(
cache_prefix, verify_ssl=spack.config.get("config:verify_ssl", True)
)
if entry.endswith("spec.json") or entry.endswith("spec.json.sig") if entry.endswith("spec.json") or entry.endswith("spec.json.sig")
] ]
read_fn = url_read_method read_fn = url_read_method
@ -1093,7 +1098,9 @@ def generate_key_index(key_prefix, tmpdir=None):
try: try:
fingerprints = ( fingerprints = (
entry[:-4] entry[:-4]
for entry in web_util.list_url(key_prefix, recursive=False) for entry in web_util.list_url(
key_prefix, recursive=False, verify_ssl=spack.config.get("config:verify_ssl", True)
)
if entry.endswith(".pub") if entry.endswith(".pub")
) )
except KeyError as inst: except KeyError as inst:
@ -1130,6 +1137,7 @@ def generate_key_index(key_prefix, tmpdir=None):
url_util.join(key_prefix, "index.json"), url_util.join(key_prefix, "index.json"),
keep_original=False, keep_original=False,
extra_args={"ContentType": "application/json"}, extra_args={"ContentType": "application/json"},
verify_ssl=spack.config.get("config:verify_ssl", True),
) )
except Exception as err: except Exception as err:
msg = "Encountered problem pushing key index to {0}: {1}".format(key_prefix, err) msg = "Encountered problem pushing key index to {0}: {1}".format(key_prefix, err)
@ -1373,20 +1381,18 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
spackfile_path = os.path.join(cache_prefix, tarball_path_name(spec, ".spack")) spackfile_path = os.path.join(cache_prefix, tarball_path_name(spec, ".spack"))
remote_spackfile_path = url_util.join(out_url, os.path.relpath(spackfile_path, stage_dir)) remote_spackfile_path = url_util.join(out_url, os.path.relpath(spackfile_path, stage_dir))
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'), fetch_method = (spack.config.get("config:url_fetch_method", "urllib"),)
verify_ssl=spack.config.get('config:verify_ssl'), verify_ssl = (spack.config.get("config:verify_ssl"),)
timeout=spack.config.get('config:connect_timeout', 10) timeout = spack.config.get("config:connect_timeout", 10)
url_args = { url_args = {"fetch_method": fetch_method, "verify_ssl": verify_ssl, "timeout": timeout}
'fetch_method': fetch_method,
'verify_ssl': verify_ssl,
'timeout': timeout
}
mkdirp(tarfile_dir) mkdirp(tarfile_dir)
if web_util.url_exists(remote_spackfile_path, **url_args): if web_util.url_exists(remote_spackfile_path, **url_args):
if options.force: if options.force:
web_util.remove_url(remote_spackfile_path) web_util.remove_url(
remote_spackfile_path, verify_ssl=spack.config.get("config:verify_ssl", True)
)
else: else:
raise NoOverwriteException(url_util.format(remote_spackfile_path)) raise NoOverwriteException(url_util.format(remote_spackfile_path))
@ -1406,10 +1412,11 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
# If force and exists, overwrite. Otherwise raise exception on collision. # If force and exists, overwrite. Otherwise raise exception on collision.
if options.force: if options.force:
verify_ssl = spack.config.get("config:verify_ssl", True)
if web_util.url_exists(remote_specfile_path, **url_args): if web_util.url_exists(remote_specfile_path, **url_args):
web_util.remove_url(remote_specfile_path) web_util.remove_url(remote_specfile_path, verify_ssl=verify_ssl)
if web_util.url_exists(remote_signed_specfile_path, **url_args): if web_util.url_exists(remote_signed_specfile_path, **url_args):
web_util.remove_url(remote_signed_specfile_path) web_util.remove_url(remote_signed_specfile_path, verify_ssl=verify_ssl)
elif web_util.url_exists(remote_specfile_path, **url_args) or web_util.url_exists( elif web_util.url_exists(remote_specfile_path, **url_args) or web_util.url_exists(
remote_signed_specfile_path, **url_args remote_signed_specfile_path, **url_args
): ):
@ -1445,11 +1452,17 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
sign_specfile(key, options.force, specfile_path) sign_specfile(key, options.force, specfile_path)
# push tarball and signed spec json to remote mirror # push tarball and signed spec json to remote mirror
web_util.push_to_url(spackfile_path, remote_spackfile_path, keep_original=False) web_util.push_to_url(
spackfile_path,
remote_spackfile_path,
keep_original=False,
verify_ssl=spack.config.get("config:verify_ssl", True),
)
web_util.push_to_url( web_util.push_to_url(
signed_specfile_path if not options.unsigned else specfile_path, signed_specfile_path if not options.unsigned else specfile_path,
remote_signed_specfile_path if not options.unsigned else remote_specfile_path, remote_signed_specfile_path if not options.unsigned else remote_specfile_path,
keep_original=False, keep_original=False,
verify_ssl=spack.config.get("config:verify_ssl", True),
) )
# push the key to the build cache's _pgp directory so it can be # push the key to the build cache's _pgp directory so it can be
@ -2230,7 +2243,7 @@ def install_root_node(spec, unsigned=False, force=False, sha256=None):
tty.debug("Verified SHA256 checksum of the build cache") tty.debug("Verified SHA256 checksum of the build cache")
# don't print long padded paths while extracting/relocating binaries # don't print long padded paths while extracting/relocating binaries
padding = spack.config.get('config:install_tree:padded_length', None) padding = spack.config.get("config:install_tree:padded_length", None)
with spack.util.path.filter_padding(padding=padding): with spack.util.path.filter_padding(padding=padding):
tty.msg('Installing "{0}" from a buildcache'.format(spec.format())) tty.msg('Installing "{0}" from a buildcache'.format(spec.format()))
extract_tarball(spec, download_result, unsigned, force) extract_tarball(spec, download_result, unsigned, force)
@ -2273,7 +2286,7 @@ def try_direct_fetch(spec, mirrors=None):
_, _, fs = web_util.read_from_url( _, _, fs = web_util.read_from_url(
buildcache_fetch_url_signed_json, buildcache_fetch_url_signed_json,
verify_ssl=spack.config.get("config:verify_ssl", True), verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
specfile_is_signed = True specfile_is_signed = True
except (URLError, web_util.WebError, HTTPError) as url_err: except (URLError, web_util.WebError, HTTPError) as url_err:
@ -2281,7 +2294,7 @@ def try_direct_fetch(spec, mirrors=None):
_, _, fs = web_util.read_from_url( _, _, fs = web_util.read_from_url(
buildcache_fetch_url_json, buildcache_fetch_url_json,
verify_ssl=spack.config.get("config:verify_ssl", True), verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
except (URLError, web_util.WebError, HTTPError) as url_err_x: except (URLError, web_util.WebError, HTTPError) as url_err_x:
tty.debug( tty.debug(
@ -2389,17 +2402,17 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
try: try:
_, _, json_file = web_util.read_from_url( _, _, json_file = web_util.read_from_url(
keys_index, keys_index,
verify_ssl=spack.config.get('config:verify_ssl', True), verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
json_index = sjson.load(codecs.getreader("utf-8")(json_file)) json_index = sjson.load(codecs.getreader("utf-8")(json_file))
except (URLError, web_util.WebError) as url_err: except (URLError, web_util.WebError) as url_err:
if web_util.url_exists( if web_util.url_exists(
keys_index, keys_index,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'), fetch_method=spack.config.get("config:url_fetch_method", "urllib"),
verify_ssl=spack.config.get('config:verify_ssl'), verify_ssl=spack.config.get("config:verify_ssl"),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
): ):
err_msg = [ err_msg = [
"Unable to find public keys in {0},", "Unable to find public keys in {0},",
" caught exception attempting to read from {1}.", " caught exception attempting to read from {1}.",
@ -2489,7 +2502,10 @@ def push_keys(*mirrors, **kwargs):
# uploaded to the mirror. # uploaded to the mirror.
if not keys_local: if not keys_local:
spack.util.web.push_to_url( spack.util.web.push_to_url(
export_target, url_util.join(keys_url, filename), keep_original=False export_target,
url_util.join(keys_url, filename),
keep_original=False,
verify_ssl=spack.config.get("config:verify_ssl", True),
) )
if regenerate_index: if regenerate_index:
@ -2525,9 +2541,9 @@ def needs_rebuild(spec, mirror_url):
# need to rebuild. # need to rebuild.
return not web_util.url_exists( return not web_util.url_exists(
specfile_path, specfile_path,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'), fetch_method=spack.config.get("config:url_fetch_method", "urllib"),
verify_ssl=spack.config.get('config:verify_ssl'), verify_ssl=spack.config.get("config:verify_ssl"),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
@ -2694,9 +2710,11 @@ def get_remote_hash(self):
# Failure to fetch index.json.hash is not fatal # Failure to fetch index.json.hash is not fatal
url_index_hash = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json.hash") url_index_hash = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json.hash")
try: try:
response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers), response = self.urlopen(
verify_ssl=spack.config.get("config:verify_ssl", True), urllib.request.Request(url_index_hash, headers=self.headers),
timeout=spack.config.get("config:connect_timeout", 10)) verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10),
)
except urllib.error.URLError: except urllib.error.URLError:
return None return None
@ -2718,9 +2736,11 @@ def conditional_fetch(self) -> FetchIndexResult:
url_index = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json") url_index = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
try: try:
response = self.urlopen(urllib.request.Request(url_index, headers=self.headers), response = self.urlopen(
verify_ssl=spack.config.get("config:verify_ssl", True), urllib.request.Request(url_index, headers=self.headers),
timeout=spack.config.get("config:connect_timeout", 10)) verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10),
)
except urllib.error.URLError as e: except urllib.error.URLError as e:
raise FetchIndexError("Could not fetch index from {}".format(url_index), e) from e raise FetchIndexError("Could not fetch index from {}".format(url_index), e) from e
@ -2768,9 +2788,11 @@ def conditional_fetch(self) -> FetchIndexResult:
} }
try: try:
response = self.urlopen(urllib.request.Request(url, headers=headers), response = self.urlopen(
verify_ssl=spack.config.get("config:verify_ssl", True), urllib.request.Request(url, headers=headers),
timeout=spack.config.get("config:connect_timeout", 10)) verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get("config:connect_timeout", 10),
)
except urllib.error.HTTPError as e: except urllib.error.HTTPError as e:
if e.getcode() == 304: if e.getcode() == 304:
# Not modified; that means fresh. # Not modified; that means fresh.

View File

@ -1759,7 +1759,7 @@ def reproduce_ci_job(url, work_dir, autostart, gpg_url, runtime):
gpg_path = web_util.fetch_url_text( gpg_path = web_util.fetch_url_text(
gpg_url, gpg_url,
dest_dir=os.path.join(work_dir, "_pgp"), dest_dir=os.path.join(work_dir, "_pgp"),
fetch_method=spack.config.get('config:url_fetch_method') fetch_method=spack.config.get("config:url_fetch_method"),
) )
rel_gpg_path = gpg_path.replace(work_dir, "").lstrip(os.path.sep) rel_gpg_path = gpg_path.replace(work_dir, "").lstrip(os.path.sep)
@ -2117,7 +2117,11 @@ def write_broken_spec(url, pkg_name, stack_name, job_url, pipeline_url, spec_dic
with open(file_path, "w") as fd: with open(file_path, "w") as fd:
fd.write(syaml.dump(broken_spec_details)) fd.write(syaml.dump(broken_spec_details))
web_util.push_to_url( web_util.push_to_url(
file_path, url, keep_original=False, extra_args={"ContentType": "text/plain"} file_path,
url,
keep_original=False,
extra_args={"ContentType": "text/plain"},
verify_ssl=spack.config.get("config:verify_ssl", True),
) )
except Exception as err: except Exception as err:
# If there is an S3 error (e.g., access denied or connection # If there is an S3 error (e.g., access denied or connection
@ -2136,8 +2140,8 @@ def read_broken_spec(broken_spec_url):
try: try:
_, _, fs = web_util.read_from_url( _, _, fs = web_util.read_from_url(
broken_spec_url, broken_spec_url,
verify_ssl=cfg.get('config:verify_ssl', True), verify_ssl=cfg.get("config:verify_ssl", True),
timeout=cfg.get('config:connect_timeout', 10) timeout=cfg.get("config:connect_timeout", 10),
) )
except (URLError, web_util.WebError, HTTPError): except (URLError, web_util.WebError, HTTPError):
tty.warn("Unable to read broken spec from {0}".format(broken_spec_url)) tty.warn("Unable to read broken spec from {0}".format(broken_spec_url))

View File

@ -918,7 +918,12 @@ def copy_buildcache_file(src_url, dest_url, local_path=None):
try: try:
temp_stage.create() temp_stage.create()
temp_stage.fetch() temp_stage.fetch()
web_util.push_to_url(local_path, dest_url, keep_original=True) web_util.push_to_url(
local_path,
dest_url,
keep_original=True,
verify_ssl=spack.config.get("config:verify_ssl", True),
)
except spack.error.FetchError as e: except spack.error.FetchError as e:
# Expected, since we have to try all the possible extensions # Expected, since we have to try all the possible extensions
tty.debug("no such file: {0}".format(src_url)) tty.debug("no such file: {0}".format(src_url))

View File

@ -730,13 +730,17 @@ def ci_rebuild(args):
broken_specs_url = ci_config["broken-specs-url"] broken_specs_url = ci_config["broken-specs-url"]
just_built_hash = job_spec.dag_hash() just_built_hash = job_spec.dag_hash()
broken_spec_path = url_util.join(broken_specs_url, just_built_hash) broken_spec_path = url_util.join(broken_specs_url, just_built_hash)
if web_util.url_exists(broken_spec_path, if web_util.url_exists(
fetch_method=cfg.get('config:url_fetch_method', 'urllib'), broken_spec_path,
verify_ssl=cfg.get('config:verify_ssl'), fetch_method=cfg.get("config:url_fetch_method", "urllib"),
timeout=cfg.get('config:connect_timeout', 10)): verify_ssl=cfg.get("config:verify_ssl"),
timeout=cfg.get("config:connect_timeout", 10),
):
tty.msg("Removing {0} from the list of broken specs".format(broken_spec_path)) tty.msg("Removing {0} from the list of broken specs".format(broken_spec_path))
try: try:
web_util.remove_url(broken_spec_path) web_util.remove_url(
broken_spec_path, verify_ssl=cfg.get("config:verify_ssl", True)
)
except Exception as err: except Exception as err:
# If there is an S3 error (e.g., access denied or connection # If there is an S3 error (e.g., access denied or connection
# error), the first non boto-specific class in the exception # error), the first non boto-specific class in the exception

View File

@ -495,7 +495,9 @@ def mirror_destroy(args):
elif args.mirror_url: elif args.mirror_url:
mirror_url = args.mirror_url mirror_url = args.mirror_url
web_util.remove_url(mirror_url, recursive=True) web_util.remove_url(
mirror_url, recursive=True, verify_ssl=spack.config.get("config:verify_ssl", True)
)
def mirror(parser, args): def mirror(parser, args):

View File

@ -302,11 +302,11 @@ def fetch(self):
errors = [] errors = []
for url in self.candidate_urls: for url in self.candidate_urls:
if not web_util.url_exists( if not web_util.url_exists(
url, url,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'), fetch_method=spack.config.get("config:url_fetch_method", "urllib"),
verify_ssl=spack.config.get('config:verify_ssl'), verify_ssl=spack.config.get("config:verify_ssl"),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
): ):
tty.debug("URL does not exist: " + url) tty.debug("URL does not exist: " + url)
continue continue
@ -345,8 +345,8 @@ def _fetch_urllib(self, url):
try: try:
url, headers, response = web_util.read_from_url( url, headers, response = web_util.read_from_url(
url, url,
verify_ssl=spack.config.get('config:verify_ssl', True), verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
except web_util.WebError as e: except web_util.WebError as e:
# clean up archive on failure. # clean up archive on failure.
@ -394,14 +394,15 @@ def _fetch_curl(self, url):
timeout = self.extra_options.get("timeout") timeout = self.extra_options.get("timeout")
connect_timeout = spack.config.get('config:connect_timeout', 10) connect_timeout = spack.config.get("config:connect_timeout", 10)
if timeout: if timeout:
timeout = max(int(timeout), int(connect_timeout)) timeout = max(int(timeout), int(connect_timeout))
else: else:
timeout = int(connect_timeout) timeout = int(connect_timeout)
base_args = web_util.base_curl_fetch_args(url, timeout=timeout, base_args = web_util.base_curl_fetch_args(
verify_ssl=spack.config.get('config:verify_ssl')) url, timeout=timeout, verify_ssl=spack.config.get("config:verify_ssl")
)
curl_args = save_args + base_args + cookie_args curl_args = save_args + base_args + cookie_args
# Run curl but grab the mime type from the http headers # Run curl but grab the mime type from the http headers
@ -479,7 +480,10 @@ def archive(self, destination):
raise NoArchiveFileError("Cannot call archive() before fetching.") raise NoArchiveFileError("Cannot call archive() before fetching.")
web_util.push_to_url( web_util.push_to_url(
self.archive_file, url_util.path_to_file_url(destination), keep_original=True self.archive_file,
url_util.path_to_file_url(destination),
keep_original=True,
verify_ssl=spack.config.get("config:verify_ssl", True),
) )
@_needs_stage @_needs_stage
@ -1348,8 +1352,8 @@ def fetch(self):
with working_dir(self.stage.path): with working_dir(self.stage.path):
_, headers, stream = web_util.read_from_url( _, headers, stream = web_util.read_from_url(
self.url, self.url,
verify_ssl=spack.config.get('config:verify_ssl', True), verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
with open(basename, "wb") as f: with open(basename, "wb") as f:
@ -1399,8 +1403,8 @@ def fetch(self):
with working_dir(self.stage.path): with working_dir(self.stage.path):
_, headers, stream = web_util.read_from_url( _, headers, stream = web_util.read_from_url(
self.url, self.url,
verify_ssl=spack.config.get('config:verify_ssl', True), verify_ssl=spack.config.get("config:verify_ssl", True),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
with open(basename, "wb") as f: with open(basename, "wb") as f:

View File

@ -287,7 +287,7 @@ def test_gather_s3_information(monkeypatch, capfd):
} }
) )
session_args, client_args = spack.util.s3.get_mirror_s3_connection_info(mirror, "push") session_args, client_args = spack.util.s3.get_mirror_s3_connection_info(mirror, "push", False)
# Session args are used to create the S3 Session object # Session args are used to create the S3 Session object
assert "aws_session_token" in session_args assert "aws_session_token" in session_args
@ -307,7 +307,7 @@ def test_gather_s3_information(monkeypatch, capfd):
def test_remove_s3_url(monkeypatch, capfd): def test_remove_s3_url(monkeypatch, capfd):
fake_s3_url = "s3://my-bucket/subdirectory/mirror" fake_s3_url = "s3://my-bucket/subdirectory/mirror"
def get_s3_session(url, method="fetch"): def get_s3_session(url, method="fetch", verify_ssl=True):
return MockS3Client() return MockS3Client()
monkeypatch.setattr(spack.util.web, "get_s3_session", get_s3_session) monkeypatch.setattr(spack.util.web, "get_s3_session", get_s3_session)
@ -315,7 +315,9 @@ def get_s3_session(url, method="fetch"):
current_debug_level = tty.debug_level() current_debug_level = tty.debug_level()
tty.set_debug(1) tty.set_debug(1)
spack.util.web.remove_url(fake_s3_url, recursive=True) spack.util.web.remove_url(
fake_s3_url, recursive=True, verify_ssl=spack.config.get("config:verify_ssl", True)
)
err = capfd.readouterr()[1] err = capfd.readouterr()[1]
tty.set_debug(current_debug_level) tty.set_debug(current_debug_level)
@ -326,26 +328,26 @@ def get_s3_session(url, method="fetch"):
def test_s3_url_exists(monkeypatch, capfd): def test_s3_url_exists(monkeypatch, capfd):
def get_s3_session(url, method="fetch"): def get_s3_session(url, method="fetch", verify_ssl=True):
return MockS3Client() return MockS3Client()
monkeypatch.setattr(spack.util.s3, "get_s3_session", get_s3_session) monkeypatch.setattr(spack.util.s3, "get_s3_session", get_s3_session)
fake_s3_url_exists = "s3://my-bucket/subdirectory/my-file" fake_s3_url_exists = "s3://my-bucket/subdirectory/my-file"
assert spack.util.web.url_exists( assert spack.util.web.url_exists(
fake_s3_url_exists, fake_s3_url_exists,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'), fetch_method=spack.config.get("config:url_fetch_method", "urllib"),
verify_ssl=spack.config.get('config:verify_ssl'), verify_ssl=spack.config.get("config:verify_ssl"),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
fake_s3_url_does_not_exist = "s3://my-bucket/subdirectory/my-notfound-file" fake_s3_url_does_not_exist = "s3://my-bucket/subdirectory/my-notfound-file"
assert not spack.util.web.url_exists( assert not spack.util.web.url_exists(
fake_s3_url_does_not_exist, fake_s3_url_does_not_exist,
fetch_method=spack.config.get('config:url_fetch_method', 'urllib'), fetch_method=spack.config.get("config:url_fetch_method", "urllib"),
verify_ssl=spack.config.get('config:verify_ssl'), verify_ssl=spack.config.get("config:verify_ssl"),
timeout=spack.config.get('config:connect_timeout', 10) timeout=spack.config.get("config:connect_timeout", 10),
) )
def test_s3_url_parsing(): def test_s3_url_parsing():

View File

@ -10,13 +10,11 @@
from io import BufferedReader, BytesIO, IOBase from io import BufferedReader, BytesIO, IOBase
from typing import Any, Dict, Tuple from typing import Any, Dict, Tuple
import spack.config
#: Map (mirror name, method) tuples to s3 client instances. #: Map (mirror name, method) tuples to s3 client instances.
s3_client_cache: Dict[Tuple[str, str], Any] = dict() s3_client_cache: Dict[Tuple[str, str], Any] = dict()
def get_s3_session(url, method="fetch"): def get_s3_session(url, method="fetch", verify_ssl=True):
# import boto and friends as late as possible. We don't want to require boto as a # import boto and friends as late as possible. We don't want to require boto as a
# dependency unless the user actually wants to access S3 mirrors. # dependency unless the user actually wants to access S3 mirrors.
from boto3 import Session from boto3 import Session
@ -62,7 +60,7 @@ def get_mirror_url(mirror):
return s3_client_cache[key] return s3_client_cache[key]
# Otherwise, create it. # Otherwise, create it.
s3_connection, s3_client_args = get_mirror_s3_connection_info(mirror, method) s3_connection, s3_client_args = get_mirror_s3_connection_info(mirror, method, verify_ssl)
session = Session(**s3_connection) session = Session(**s3_connection)
# if no access credentials provided above, then access anonymously # if no access credentials provided above, then access anonymously
@ -84,13 +82,13 @@ def _parse_s3_endpoint_url(endpoint_url):
return endpoint_url return endpoint_url
def get_mirror_s3_connection_info(mirror, method): def get_mirror_s3_connection_info(mirror, method, verify_ssl):
"""Create s3 config for session/client from a Mirror instance (or just set defaults """Create s3 config for session/client from a Mirror instance (or just set defaults
when no mirror is given.)""" when no mirror is given.)"""
from spack.mirror import Mirror from spack.mirror import Mirror
s3_connection = {} s3_connection = {}
s3_client_args = {"use_ssl": spack.config.get("config:verify_ssl")} s3_client_args = {"use_ssl": verify_ssl}
# access token # access token
if isinstance(mirror, Mirror): if isinstance(mirror, Mirror):
@ -150,9 +148,9 @@ def __getattr__(self, key):
return getattr(self.raw, key) return getattr(self.raw, key)
def _s3_open(url, method="GET"): def _s3_open(url, method="GET", verify_ssl=True):
parsed = urllib.parse.urlparse(url) parsed = urllib.parse.urlparse(url)
s3 = get_s3_session(url, method="fetch") s3 = get_s3_session(url, method="fetch", verify_ssl=verify_ssl)
bucket = parsed.netloc bucket = parsed.netloc
key = parsed.path key = parsed.path
@ -182,7 +180,13 @@ def _s3_open(url, method="GET"):
class UrllibS3Handler(urllib.request.BaseHandler): class UrllibS3Handler(urllib.request.BaseHandler):
def __init__(self, verify_ssl=True):
super().__init__()
self.verify_ssl = verify_ssl
def s3_open(self, req): def s3_open(self, req):
orig_url = req.get_full_url() orig_url = req.get_full_url()
url, headers, stream = _s3_open(orig_url, method=req.get_method()) url, headers, stream = _s3_open(
orig_url, method=req.get_method(), verify_ssl=self.verify_ssl
)
return urllib.response.addinfourl(stream, headers, url) return urllib.response.addinfourl(stream, headers, url)

View File

@ -59,18 +59,19 @@ def http_error_default(self, req, fp, code, msg, hdrs):
def _urlopen(): def _urlopen():
s3 = UrllibS3Handler() s3_with_ssl = UrllibS3Handler(verify_ssl=True)
s3_no_ssl = UrllibS3Handler(verify_ssl=False)
gcs = GCSHandler() gcs = GCSHandler()
error_handler = SpackHTTPDefaultErrorHandler() error_handler = SpackHTTPDefaultErrorHandler()
# One opener with HTTPS ssl enabled # One opener with HTTPS ssl enabled
with_ssl = build_opener( with_ssl = build_opener(
s3, gcs, HTTPSHandler(context=ssl.create_default_context()), error_handler s3_with_ssl, gcs, HTTPSHandler(context=ssl.create_default_context()), error_handler
) )
# One opener with HTTPS ssl disabled # One opener with HTTPS ssl disabled
without_ssl = build_opener( without_ssl = build_opener(
s3, gcs, HTTPSHandler(context=ssl._create_unverified_context()), error_handler s3_no_ssl, gcs, HTTPSHandler(context=ssl._create_unverified_context()), error_handler
) )
# And dynamically dispatch based on the config:verify_ssl. # And dynamically dispatch based on the config:verify_ssl.
@ -162,7 +163,9 @@ def read_from_url(url, accept_content_type=None, verify_ssl=True, timeout=10, **
return response.geturl(), response.headers, response return response.geturl(), response.headers, response
def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=None): def push_to_url(
local_file_path, remote_path, keep_original=True, extra_args=None, verify_ssl=True
):
remote_url = urllib.parse.urlparse(remote_path) remote_url = urllib.parse.urlparse(remote_path)
if remote_url.scheme == "file": if remote_url.scheme == "file":
remote_file_path = url_util.local_file_path(remote_url) remote_file_path = url_util.local_file_path(remote_url)
@ -191,7 +194,7 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non
while remote_path.startswith("/"): while remote_path.startswith("/"):
remote_path = remote_path[1:] remote_path = remote_path[1:]
s3 = get_s3_session(remote_url, method="push") s3 = get_s3_session(remote_url, method="push", verify_ssl=verify_ssl)
s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args) s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args)
if not keep_original: if not keep_original:
@ -340,9 +343,7 @@ def fetch_url_text(url, curl=None, dest_dir=".", fetch_method=None):
returncode = response.getcode() returncode = response.getcode()
if returncode and returncode != 200: if returncode and returncode != 200:
raise WebError( raise WebError("Urllib failed with error code {0}".format(returncode))
"Urllib failed with error code {0}".format(returncode)
)
output = codecs.getreader("utf-8")(response).read() output = codecs.getreader("utf-8")(response).read()
if output: if output:
@ -393,8 +394,7 @@ def url_exists(url, curl=None, fetch_method=None, verify_ssl=True, timeout=10):
# Otherwise use urllib. # Otherwise use urllib.
try: try:
urlopen( urlopen(
Request(url, method="HEAD", headers={"User-Agent": SPACK_USER_AGENT}), Request(url, method="HEAD", headers={"User-Agent": SPACK_USER_AGENT}), timeout=timeout
timeout=timeout,
) )
return True return True
except URLError as e: except URLError as e:
@ -411,7 +411,7 @@ def _debug_print_delete_results(result):
tty.debug("Failed to delete {0} ({1})".format(e["Key"], e["Message"])) tty.debug("Failed to delete {0} ({1})".format(e["Key"], e["Message"]))
def remove_url(url, recursive=False): def remove_url(url, recursive=False, verify_ssl=True):
url = urllib.parse.urlparse(url) url = urllib.parse.urlparse(url)
local_path = url_util.local_file_path(url) local_path = url_util.local_file_path(url)
@ -424,7 +424,7 @@ def remove_url(url, recursive=False):
if url.scheme == "s3": if url.scheme == "s3":
# Try to find a mirror for potential connection information # Try to find a mirror for potential connection information
s3 = get_s3_session(url, method="push") s3 = get_s3_session(url, method="push", verify_ssl=verify_ssl)
bucket = url.netloc bucket = url.netloc
if recursive: if recursive:
# Because list_objects_v2 can only return up to 1000 items # Because list_objects_v2 can only return up to 1000 items
@ -520,7 +520,7 @@ def _iter_local_prefix(path):
yield os.path.relpath(os.path.join(root, f), path) yield os.path.relpath(os.path.join(root, f), path)
def list_url(url, recursive=False): def list_url(url, recursive=False, verify_ssl=True):
url = urllib.parse.urlparse(url) url = urllib.parse.urlparse(url)
local_path = url_util.local_file_path(url) local_path = url_util.local_file_path(url)
@ -535,7 +535,7 @@ def list_url(url, recursive=False):
] ]
if url.scheme == "s3": if url.scheme == "s3":
s3 = get_s3_session(url, method="fetch") s3 = get_s3_session(url, method="fetch", verify_ssl=verify_ssl)
if recursive: if recursive:
return list(_iter_s3_prefix(s3, url)) return list(_iter_s3_prefix(s3, url))