Revert "Use urllib
handler for s3://
and gs://
, improve url_exists
through HEAD requests (#34324)"
This reverts commit db8f115013
.
This commit is contained in:
parent
57383a2294
commit
8035eeb36d
@ -3,10 +3,9 @@
|
|||||||
#
|
#
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
import urllib.response
|
import urllib.response
|
||||||
from urllib.error import URLError
|
|
||||||
from urllib.request import BaseHandler
|
|
||||||
|
|
||||||
import spack.util.url as url_util
|
import spack.util.url as url_util
|
||||||
|
import spack.util.web as web_util
|
||||||
|
|
||||||
|
|
||||||
def gcs_open(req, *args, **kwargs):
|
def gcs_open(req, *args, **kwargs):
|
||||||
@ -17,13 +16,8 @@ def gcs_open(req, *args, **kwargs):
|
|||||||
gcsblob = gcs_util.GCSBlob(url)
|
gcsblob = gcs_util.GCSBlob(url)
|
||||||
|
|
||||||
if not gcsblob.exists():
|
if not gcsblob.exists():
|
||||||
raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
|
raise web_util.SpackWebError("GCS blob {0} does not exist".format(gcsblob.blob_path))
|
||||||
stream = gcsblob.get_blob_byte_stream()
|
stream = gcsblob.get_blob_byte_stream()
|
||||||
headers = gcsblob.get_blob_headers()
|
headers = gcsblob.get_blob_headers()
|
||||||
|
|
||||||
return urllib.response.addinfourl(stream, headers, url)
|
return urllib.response.addinfourl(stream, headers, url)
|
||||||
|
|
||||||
|
|
||||||
class GCSHandler(BaseHandler):
|
|
||||||
def gs_open(self, req):
|
|
||||||
return gcs_open(req)
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.response
|
import urllib.response
|
||||||
from io import BufferedReader, BytesIO, IOBase
|
from io import BufferedReader, IOBase
|
||||||
|
|
||||||
import spack.util.s3 as s3_util
|
import spack.util.s3 as s3_util
|
||||||
import spack.util.url as url_util
|
import spack.util.url as url_util
|
||||||
@ -42,7 +42,7 @@ def __getattr__(self, key):
|
|||||||
return getattr(self.raw, key)
|
return getattr(self.raw, key)
|
||||||
|
|
||||||
|
|
||||||
def _s3_open(url, method="GET"):
|
def _s3_open(url):
|
||||||
parsed = url_util.parse(url)
|
parsed = url_util.parse(url)
|
||||||
s3 = s3_util.get_s3_session(url, method="fetch")
|
s3 = s3_util.get_s3_session(url, method="fetch")
|
||||||
|
|
||||||
@ -52,29 +52,27 @@ def _s3_open(url, method="GET"):
|
|||||||
if key.startswith("/"):
|
if key.startswith("/"):
|
||||||
key = key[1:]
|
key = key[1:]
|
||||||
|
|
||||||
if method not in ("GET", "HEAD"):
|
obj = s3.get_object(Bucket=bucket, Key=key)
|
||||||
raise urllib.error.URLError(
|
|
||||||
"Only GET and HEAD verbs are currently supported for the s3:// scheme"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if method == "GET":
|
|
||||||
obj = s3.get_object(Bucket=bucket, Key=key)
|
|
||||||
# NOTE(opadron): Apply workaround here (see above)
|
|
||||||
stream = WrapStream(obj["Body"])
|
|
||||||
elif method == "HEAD":
|
|
||||||
obj = s3.head_object(Bucket=bucket, Key=key)
|
|
||||||
stream = BytesIO()
|
|
||||||
except s3.ClientError as e:
|
|
||||||
raise urllib.error.URLError(e) from e
|
|
||||||
|
|
||||||
|
# NOTE(opadron): Apply workaround here (see above)
|
||||||
|
stream = WrapStream(obj["Body"])
|
||||||
headers = obj["ResponseMetadata"]["HTTPHeaders"]
|
headers = obj["ResponseMetadata"]["HTTPHeaders"]
|
||||||
|
|
||||||
return url, headers, stream
|
return url, headers, stream
|
||||||
|
|
||||||
|
|
||||||
class UrllibS3Handler(urllib.request.BaseHandler):
|
class UrllibS3Handler(urllib.request.HTTPSHandler):
|
||||||
def s3_open(self, req):
|
def s3_open(self, req):
|
||||||
orig_url = req.get_full_url()
|
orig_url = req.get_full_url()
|
||||||
url, headers, stream = _s3_open(orig_url, method=req.get_method())
|
from botocore.exceptions import ClientError # type: ignore[import]
|
||||||
return urllib.response.addinfourl(stream, headers, url)
|
|
||||||
|
try:
|
||||||
|
url, headers, stream = _s3_open(orig_url)
|
||||||
|
return urllib.response.addinfourl(stream, headers, url)
|
||||||
|
except ClientError as err:
|
||||||
|
raise urllib.error.URLError(err) from err
|
||||||
|
|
||||||
|
|
||||||
|
S3OpenerDirector = urllib.request.build_opener(UrllibS3Handler())
|
||||||
|
|
||||||
|
open = S3OpenerDirector.open
|
||||||
|
@ -223,10 +223,7 @@ def paginate(self, *args, **kwargs):
|
|||||||
|
|
||||||
class MockClientError(Exception):
|
class MockClientError(Exception):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.response = {
|
self.response = {"Error": {"Code": "NoSuchKey"}}
|
||||||
"Error": {"Code": "NoSuchKey"},
|
|
||||||
"ResponseMetadata": {"HTTPStatusCode": 404},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class MockS3Client(object):
|
class MockS3Client(object):
|
||||||
@ -245,13 +242,7 @@ def delete_object(self, *args, **kwargs):
|
|||||||
def get_object(self, Bucket=None, Key=None):
|
def get_object(self, Bucket=None, Key=None):
|
||||||
self.ClientError = MockClientError
|
self.ClientError = MockClientError
|
||||||
if Bucket == "my-bucket" and Key == "subdirectory/my-file":
|
if Bucket == "my-bucket" and Key == "subdirectory/my-file":
|
||||||
return {"ResponseMetadata": {"HTTPHeaders": {}}}
|
return True
|
||||||
raise self.ClientError
|
|
||||||
|
|
||||||
def head_object(self, Bucket=None, Key=None):
|
|
||||||
self.ClientError = MockClientError
|
|
||||||
if Bucket == "my-bucket" and Key == "subdirectory/my-file":
|
|
||||||
return {"ResponseMetadata": {"HTTPHeaders": {}}}
|
|
||||||
raise self.ClientError
|
raise self.ClientError
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
import traceback
|
import traceback
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from urllib.error import URLError
|
from urllib.error import URLError
|
||||||
from urllib.request import HTTPSHandler, Request, build_opener
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
import llnl.util.lang
|
import llnl.util.lang
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
@ -26,8 +26,6 @@
|
|||||||
import spack
|
import spack
|
||||||
import spack.config
|
import spack.config
|
||||||
import spack.error
|
import spack.error
|
||||||
import spack.gcs_handler
|
|
||||||
import spack.s3_handler
|
|
||||||
import spack.url
|
import spack.url
|
||||||
import spack.util.crypto
|
import spack.util.crypto
|
||||||
import spack.util.gcs as gcs_util
|
import spack.util.gcs as gcs_util
|
||||||
@ -37,28 +35,6 @@
|
|||||||
from spack.util.executable import CommandNotFoundError, which
|
from spack.util.executable import CommandNotFoundError, which
|
||||||
from spack.util.path import convert_to_posix_path
|
from spack.util.path import convert_to_posix_path
|
||||||
|
|
||||||
|
|
||||||
def _urlopen():
|
|
||||||
s3 = spack.s3_handler.UrllibS3Handler()
|
|
||||||
gcs = spack.gcs_handler.GCSHandler()
|
|
||||||
|
|
||||||
# One opener with HTTPS ssl enabled
|
|
||||||
with_ssl = build_opener(s3, gcs, HTTPSHandler(context=ssl.create_default_context()))
|
|
||||||
|
|
||||||
# One opener with HTTPS ssl disabled
|
|
||||||
without_ssl = build_opener(s3, gcs, HTTPSHandler(context=ssl._create_unverified_context()))
|
|
||||||
|
|
||||||
# And dynamically dispatch based on the config:verify_ssl.
|
|
||||||
def dispatch_open(*args, **kwargs):
|
|
||||||
opener = with_ssl if spack.config.get("config:verify_ssl", True) else without_ssl
|
|
||||||
return opener.open(*args, **kwargs)
|
|
||||||
|
|
||||||
return dispatch_open
|
|
||||||
|
|
||||||
|
|
||||||
#: Dispatches to the correct OpenerDirector.open, based on Spack configuration.
|
|
||||||
urlopen = llnl.util.lang.Singleton(_urlopen)
|
|
||||||
|
|
||||||
#: User-Agent used in Request objects
|
#: User-Agent used in Request objects
|
||||||
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
|
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
|
||||||
|
|
||||||
@ -83,12 +59,43 @@ def handle_starttag(self, tag, attrs):
|
|||||||
self.links.append(val)
|
self.links.append(val)
|
||||||
|
|
||||||
|
|
||||||
|
def uses_ssl(parsed_url):
|
||||||
|
if parsed_url.scheme == "https":
|
||||||
|
return True
|
||||||
|
|
||||||
|
if parsed_url.scheme == "s3":
|
||||||
|
endpoint_url = os.environ.get("S3_ENDPOINT_URL")
|
||||||
|
if not endpoint_url:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if url_util.parse(endpoint_url, scheme="https").scheme == "https":
|
||||||
|
return True
|
||||||
|
|
||||||
|
elif parsed_url.scheme == "gs":
|
||||||
|
tty.debug("(uses_ssl) GCS Blob is https")
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def read_from_url(url, accept_content_type=None):
|
def read_from_url(url, accept_content_type=None):
|
||||||
url = url_util.parse(url)
|
url = url_util.parse(url)
|
||||||
|
context = None
|
||||||
|
|
||||||
# Timeout in seconds for web requests
|
# Timeout in seconds for web requests
|
||||||
timeout = spack.config.get("config:connect_timeout", 10)
|
timeout = spack.config.get("config:connect_timeout", 10)
|
||||||
|
|
||||||
|
# Don't even bother with a context unless the URL scheme is one that uses
|
||||||
|
# SSL certs.
|
||||||
|
if uses_ssl(url):
|
||||||
|
if spack.config.get("config:verify_ssl"):
|
||||||
|
# User wants SSL verification, and it *can* be provided.
|
||||||
|
context = ssl.create_default_context()
|
||||||
|
else:
|
||||||
|
# User has explicitly indicated that they do not want SSL
|
||||||
|
# verification.
|
||||||
|
context = ssl._create_unverified_context()
|
||||||
|
|
||||||
url_scheme = url.scheme
|
url_scheme = url.scheme
|
||||||
url = url_util.format(url)
|
url = url_util.format(url)
|
||||||
if sys.platform == "win32" and url_scheme == "file":
|
if sys.platform == "win32" and url_scheme == "file":
|
||||||
@ -104,7 +111,7 @@ def read_from_url(url, accept_content_type=None):
|
|||||||
# one round-trip. However, most servers seem to ignore the header
|
# one round-trip. However, most servers seem to ignore the header
|
||||||
# if you ask for a tarball with Accept: text/html.
|
# if you ask for a tarball with Accept: text/html.
|
||||||
req.get_method = lambda: "HEAD"
|
req.get_method = lambda: "HEAD"
|
||||||
resp = urlopen(req, timeout=timeout)
|
resp = _urlopen(req, timeout=timeout, context=context)
|
||||||
|
|
||||||
content_type = get_header(resp.headers, "Content-type")
|
content_type = get_header(resp.headers, "Content-type")
|
||||||
|
|
||||||
@ -112,7 +119,7 @@ def read_from_url(url, accept_content_type=None):
|
|||||||
req.get_method = lambda: "GET"
|
req.get_method = lambda: "GET"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = urlopen(req, timeout=timeout)
|
response = _urlopen(req, timeout=timeout, context=context)
|
||||||
except URLError as err:
|
except URLError as err:
|
||||||
raise SpackWebError("Download failed: {ERROR}".format(ERROR=str(err)))
|
raise SpackWebError("Download failed: {ERROR}".format(ERROR=str(err)))
|
||||||
|
|
||||||
@ -344,6 +351,12 @@ def url_exists(url, curl=None):
|
|||||||
Simple Storage Service (`s3`) URLs; otherwise, the configured fetch
|
Simple Storage Service (`s3`) URLs; otherwise, the configured fetch
|
||||||
method defined by `config:url_fetch_method` is used.
|
method defined by `config:url_fetch_method` is used.
|
||||||
|
|
||||||
|
If the method is `curl`, it also uses the following configuration option:
|
||||||
|
|
||||||
|
* config:verify_ssl (str): Perform SSL verification
|
||||||
|
|
||||||
|
Otherwise, `urllib` will be used.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
url (str): URL whose existence is being checked
|
url (str): URL whose existence is being checked
|
||||||
curl (spack.util.executable.Executable or None): (optional) curl
|
curl (spack.util.executable.Executable or None): (optional) curl
|
||||||
@ -354,11 +367,31 @@ def url_exists(url, curl=None):
|
|||||||
tty.debug("Checking existence of {0}".format(url))
|
tty.debug("Checking existence of {0}".format(url))
|
||||||
url_result = url_util.parse(url)
|
url_result = url_util.parse(url)
|
||||||
|
|
||||||
# Use curl if configured to do so
|
# Check if a local file
|
||||||
use_curl = spack.config.get(
|
local_path = url_util.local_file_path(url_result)
|
||||||
"config:url_fetch_method", "urllib"
|
if local_path:
|
||||||
) == "curl" and url_result.scheme not in ("gs", "s3")
|
return os.path.exists(local_path)
|
||||||
if use_curl:
|
|
||||||
|
# Check if Amazon Simple Storage Service (S3) .. urllib-based fetch
|
||||||
|
if url_result.scheme == "s3":
|
||||||
|
# Check for URL-specific connection information
|
||||||
|
s3 = s3_util.get_s3_session(url_result, method="fetch")
|
||||||
|
|
||||||
|
try:
|
||||||
|
s3.get_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/"))
|
||||||
|
return True
|
||||||
|
except s3.ClientError as err:
|
||||||
|
if err.response["Error"]["Code"] == "NoSuchKey":
|
||||||
|
return False
|
||||||
|
raise err
|
||||||
|
|
||||||
|
# Check if Google Storage .. urllib-based fetch
|
||||||
|
if url_result.scheme == "gs":
|
||||||
|
gcs = gcs_util.GCSBlob(url_result)
|
||||||
|
return gcs.exists()
|
||||||
|
|
||||||
|
# Otherwise, use the configured fetch method
|
||||||
|
if spack.config.get("config:url_fetch_method") == "curl":
|
||||||
curl_exe = _curl(curl)
|
curl_exe = _curl(curl)
|
||||||
if not curl_exe:
|
if not curl_exe:
|
||||||
return False
|
return False
|
||||||
@ -371,14 +404,13 @@ def url_exists(url, curl=None):
|
|||||||
_ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull)
|
_ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull)
|
||||||
return curl_exe.returncode == 0
|
return curl_exe.returncode == 0
|
||||||
|
|
||||||
# Otherwise use urllib.
|
# If we get here, then the only other fetch method option is urllib.
|
||||||
|
# So try to "read" from the URL and assume that *any* non-throwing
|
||||||
|
# response contains the resource represented by the URL.
|
||||||
try:
|
try:
|
||||||
urlopen(
|
read_from_url(url)
|
||||||
Request(url, method="HEAD", headers={"User-Agent": SPACK_USER_AGENT}),
|
|
||||||
timeout=spack.config.get("config:connect_timeout", 10),
|
|
||||||
)
|
|
||||||
return True
|
return True
|
||||||
except URLError as e:
|
except (SpackWebError, URLError) as e:
|
||||||
tty.debug("Failure reading URL: " + str(e))
|
tty.debug("Failure reading URL: " + str(e))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -661,6 +693,35 @@ def _spider(url, collect_nested):
|
|||||||
return pages, links
|
return pages, links
|
||||||
|
|
||||||
|
|
||||||
|
def _urlopen(req, *args, **kwargs):
|
||||||
|
"""Wrapper for compatibility with old versions of Python."""
|
||||||
|
url = req
|
||||||
|
try:
|
||||||
|
url = url.get_full_url()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
del kwargs["context"]
|
||||||
|
|
||||||
|
opener = urlopen
|
||||||
|
if url_util.parse(url).scheme == "s3":
|
||||||
|
import spack.s3_handler
|
||||||
|
|
||||||
|
opener = spack.s3_handler.open
|
||||||
|
elif url_util.parse(url).scheme == "gs":
|
||||||
|
import spack.gcs_handler
|
||||||
|
|
||||||
|
opener = spack.gcs_handler.gcs_open
|
||||||
|
|
||||||
|
try:
|
||||||
|
return opener(req, *args, **kwargs)
|
||||||
|
except TypeError as err:
|
||||||
|
# If the above fails because of 'context', call without 'context'.
|
||||||
|
if "context" in kwargs and "context" in str(err):
|
||||||
|
del kwargs["context"]
|
||||||
|
return opener(req, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def find_versions_of_archive(
|
def find_versions_of_archive(
|
||||||
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
|
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
|
||||||
):
|
):
|
||||||
|
Loading…
Reference in New Issue
Block a user