Revert "Use urllib handler for s3:// and gs://, improve url_exists through HEAD requests (#34324)"
				
					
				
			This reverts commit db8f115013.
			
			
This commit is contained in:
		@@ -3,10 +3,9 @@
 | 
			
		||||
#
 | 
			
		||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
 | 
			
		||||
import urllib.response
 | 
			
		||||
from urllib.error import URLError
 | 
			
		||||
from urllib.request import BaseHandler
 | 
			
		||||
 | 
			
		||||
import spack.util.url as url_util
 | 
			
		||||
import spack.util.web as web_util
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def gcs_open(req, *args, **kwargs):
 | 
			
		||||
@@ -17,13 +16,8 @@ def gcs_open(req, *args, **kwargs):
 | 
			
		||||
    gcsblob = gcs_util.GCSBlob(url)
 | 
			
		||||
 | 
			
		||||
    if not gcsblob.exists():
 | 
			
		||||
        raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
 | 
			
		||||
        raise web_util.SpackWebError("GCS blob {0} does not exist".format(gcsblob.blob_path))
 | 
			
		||||
    stream = gcsblob.get_blob_byte_stream()
 | 
			
		||||
    headers = gcsblob.get_blob_headers()
 | 
			
		||||
 | 
			
		||||
    return urllib.response.addinfourl(stream, headers, url)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GCSHandler(BaseHandler):
 | 
			
		||||
    def gs_open(self, req):
 | 
			
		||||
        return gcs_open(req)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@
 | 
			
		||||
import urllib.error
 | 
			
		||||
import urllib.request
 | 
			
		||||
import urllib.response
 | 
			
		||||
from io import BufferedReader, BytesIO, IOBase
 | 
			
		||||
from io import BufferedReader, IOBase
 | 
			
		||||
 | 
			
		||||
import spack.util.s3 as s3_util
 | 
			
		||||
import spack.util.url as url_util
 | 
			
		||||
@@ -42,7 +42,7 @@ def __getattr__(self, key):
 | 
			
		||||
        return getattr(self.raw, key)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _s3_open(url, method="GET"):
 | 
			
		||||
def _s3_open(url):
 | 
			
		||||
    parsed = url_util.parse(url)
 | 
			
		||||
    s3 = s3_util.get_s3_session(url, method="fetch")
 | 
			
		||||
 | 
			
		||||
@@ -52,29 +52,27 @@ def _s3_open(url, method="GET"):
 | 
			
		||||
    if key.startswith("/"):
 | 
			
		||||
        key = key[1:]
 | 
			
		||||
 | 
			
		||||
    if method not in ("GET", "HEAD"):
 | 
			
		||||
        raise urllib.error.URLError(
 | 
			
		||||
            "Only GET and HEAD verbs are currently supported for the s3:// scheme"
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        if method == "GET":
 | 
			
		||||
            obj = s3.get_object(Bucket=bucket, Key=key)
 | 
			
		||||
            # NOTE(opadron): Apply workaround here (see above)
 | 
			
		||||
            stream = WrapStream(obj["Body"])
 | 
			
		||||
        elif method == "HEAD":
 | 
			
		||||
            obj = s3.head_object(Bucket=bucket, Key=key)
 | 
			
		||||
            stream = BytesIO()
 | 
			
		||||
    except s3.ClientError as e:
 | 
			
		||||
        raise urllib.error.URLError(e) from e
 | 
			
		||||
    obj = s3.get_object(Bucket=bucket, Key=key)
 | 
			
		||||
 | 
			
		||||
    # NOTE(opadron): Apply workaround here (see above)
 | 
			
		||||
    stream = WrapStream(obj["Body"])
 | 
			
		||||
    headers = obj["ResponseMetadata"]["HTTPHeaders"]
 | 
			
		||||
 | 
			
		||||
    return url, headers, stream
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class UrllibS3Handler(urllib.request.BaseHandler):
 | 
			
		||||
class UrllibS3Handler(urllib.request.HTTPSHandler):
 | 
			
		||||
    def s3_open(self, req):
 | 
			
		||||
        orig_url = req.get_full_url()
 | 
			
		||||
        url, headers, stream = _s3_open(orig_url, method=req.get_method())
 | 
			
		||||
        return urllib.response.addinfourl(stream, headers, url)
 | 
			
		||||
        from botocore.exceptions import ClientError  # type: ignore[import]
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            url, headers, stream = _s3_open(orig_url)
 | 
			
		||||
            return urllib.response.addinfourl(stream, headers, url)
 | 
			
		||||
        except ClientError as err:
 | 
			
		||||
            raise urllib.error.URLError(err) from err
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
S3OpenerDirector = urllib.request.build_opener(UrllibS3Handler())
 | 
			
		||||
 | 
			
		||||
open = S3OpenerDirector.open
 | 
			
		||||
 
 | 
			
		||||
@@ -223,10 +223,7 @@ def paginate(self, *args, **kwargs):
 | 
			
		||||
 | 
			
		||||
class MockClientError(Exception):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.response = {
 | 
			
		||||
            "Error": {"Code": "NoSuchKey"},
 | 
			
		||||
            "ResponseMetadata": {"HTTPStatusCode": 404},
 | 
			
		||||
        }
 | 
			
		||||
        self.response = {"Error": {"Code": "NoSuchKey"}}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MockS3Client(object):
 | 
			
		||||
@@ -245,13 +242,7 @@ def delete_object(self, *args, **kwargs):
 | 
			
		||||
    def get_object(self, Bucket=None, Key=None):
 | 
			
		||||
        self.ClientError = MockClientError
 | 
			
		||||
        if Bucket == "my-bucket" and Key == "subdirectory/my-file":
 | 
			
		||||
            return {"ResponseMetadata": {"HTTPHeaders": {}}}
 | 
			
		||||
        raise self.ClientError
 | 
			
		||||
 | 
			
		||||
    def head_object(self, Bucket=None, Key=None):
 | 
			
		||||
        self.ClientError = MockClientError
 | 
			
		||||
        if Bucket == "my-bucket" and Key == "subdirectory/my-file":
 | 
			
		||||
            return {"ResponseMetadata": {"HTTPHeaders": {}}}
 | 
			
		||||
            return True
 | 
			
		||||
        raise self.ClientError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -17,7 +17,7 @@
 | 
			
		||||
import traceback
 | 
			
		||||
from html.parser import HTMLParser
 | 
			
		||||
from urllib.error import URLError
 | 
			
		||||
from urllib.request import HTTPSHandler, Request, build_opener
 | 
			
		||||
from urllib.request import Request, urlopen
 | 
			
		||||
 | 
			
		||||
import llnl.util.lang
 | 
			
		||||
import llnl.util.tty as tty
 | 
			
		||||
@@ -26,8 +26,6 @@
 | 
			
		||||
import spack
 | 
			
		||||
import spack.config
 | 
			
		||||
import spack.error
 | 
			
		||||
import spack.gcs_handler
 | 
			
		||||
import spack.s3_handler
 | 
			
		||||
import spack.url
 | 
			
		||||
import spack.util.crypto
 | 
			
		||||
import spack.util.gcs as gcs_util
 | 
			
		||||
@@ -37,28 +35,6 @@
 | 
			
		||||
from spack.util.executable import CommandNotFoundError, which
 | 
			
		||||
from spack.util.path import convert_to_posix_path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _urlopen():
 | 
			
		||||
    s3 = spack.s3_handler.UrllibS3Handler()
 | 
			
		||||
    gcs = spack.gcs_handler.GCSHandler()
 | 
			
		||||
 | 
			
		||||
    # One opener with HTTPS ssl enabled
 | 
			
		||||
    with_ssl = build_opener(s3, gcs, HTTPSHandler(context=ssl.create_default_context()))
 | 
			
		||||
 | 
			
		||||
    # One opener with HTTPS ssl disabled
 | 
			
		||||
    without_ssl = build_opener(s3, gcs, HTTPSHandler(context=ssl._create_unverified_context()))
 | 
			
		||||
 | 
			
		||||
    # And dynamically dispatch based on the config:verify_ssl.
 | 
			
		||||
    def dispatch_open(*args, **kwargs):
 | 
			
		||||
        opener = with_ssl if spack.config.get("config:verify_ssl", True) else without_ssl
 | 
			
		||||
        return opener.open(*args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    return dispatch_open
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#: Dispatches to the correct OpenerDirector.open, based on Spack configuration.
 | 
			
		||||
urlopen = llnl.util.lang.Singleton(_urlopen)
 | 
			
		||||
 | 
			
		||||
#: User-Agent used in Request objects
 | 
			
		||||
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
 | 
			
		||||
 | 
			
		||||
@@ -83,12 +59,43 @@ def handle_starttag(self, tag, attrs):
 | 
			
		||||
                    self.links.append(val)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def uses_ssl(parsed_url):
 | 
			
		||||
    if parsed_url.scheme == "https":
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    if parsed_url.scheme == "s3":
 | 
			
		||||
        endpoint_url = os.environ.get("S3_ENDPOINT_URL")
 | 
			
		||||
        if not endpoint_url:
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
        if url_util.parse(endpoint_url, scheme="https").scheme == "https":
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
    elif parsed_url.scheme == "gs":
 | 
			
		||||
        tty.debug("(uses_ssl) GCS Blob is https")
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def read_from_url(url, accept_content_type=None):
 | 
			
		||||
    url = url_util.parse(url)
 | 
			
		||||
    context = None
 | 
			
		||||
 | 
			
		||||
    # Timeout in seconds for web requests
 | 
			
		||||
    timeout = spack.config.get("config:connect_timeout", 10)
 | 
			
		||||
 | 
			
		||||
    # Don't even bother with a context unless the URL scheme is one that uses
 | 
			
		||||
    # SSL certs.
 | 
			
		||||
    if uses_ssl(url):
 | 
			
		||||
        if spack.config.get("config:verify_ssl"):
 | 
			
		||||
            # User wants SSL verification, and it *can* be provided.
 | 
			
		||||
            context = ssl.create_default_context()
 | 
			
		||||
        else:
 | 
			
		||||
            # User has explicitly indicated that they do not want SSL
 | 
			
		||||
            # verification.
 | 
			
		||||
            context = ssl._create_unverified_context()
 | 
			
		||||
 | 
			
		||||
    url_scheme = url.scheme
 | 
			
		||||
    url = url_util.format(url)
 | 
			
		||||
    if sys.platform == "win32" and url_scheme == "file":
 | 
			
		||||
@@ -104,7 +111,7 @@ def read_from_url(url, accept_content_type=None):
 | 
			
		||||
        # one round-trip.  However, most servers seem to ignore the header
 | 
			
		||||
        # if you ask for a tarball with Accept: text/html.
 | 
			
		||||
        req.get_method = lambda: "HEAD"
 | 
			
		||||
        resp = urlopen(req, timeout=timeout)
 | 
			
		||||
        resp = _urlopen(req, timeout=timeout, context=context)
 | 
			
		||||
 | 
			
		||||
        content_type = get_header(resp.headers, "Content-type")
 | 
			
		||||
 | 
			
		||||
@@ -112,7 +119,7 @@ def read_from_url(url, accept_content_type=None):
 | 
			
		||||
    req.get_method = lambda: "GET"
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        response = urlopen(req, timeout=timeout)
 | 
			
		||||
        response = _urlopen(req, timeout=timeout, context=context)
 | 
			
		||||
    except URLError as err:
 | 
			
		||||
        raise SpackWebError("Download failed: {ERROR}".format(ERROR=str(err)))
 | 
			
		||||
 | 
			
		||||
@@ -344,6 +351,12 @@ def url_exists(url, curl=None):
 | 
			
		||||
    Simple Storage Service (`s3`) URLs; otherwise, the configured fetch
 | 
			
		||||
    method defined by `config:url_fetch_method` is used.
 | 
			
		||||
 | 
			
		||||
    If the method is `curl`, it also uses the following configuration option:
 | 
			
		||||
 | 
			
		||||
        * config:verify_ssl (str): Perform SSL verification
 | 
			
		||||
 | 
			
		||||
    Otherwise, `urllib` will be used.
 | 
			
		||||
 | 
			
		||||
    Arguments:
 | 
			
		||||
        url (str): URL whose existence is being checked
 | 
			
		||||
        curl (spack.util.executable.Executable or None): (optional) curl
 | 
			
		||||
@@ -354,11 +367,31 @@ def url_exists(url, curl=None):
 | 
			
		||||
    tty.debug("Checking existence of {0}".format(url))
 | 
			
		||||
    url_result = url_util.parse(url)
 | 
			
		||||
 | 
			
		||||
    # Use curl if configured to do so
 | 
			
		||||
    use_curl = spack.config.get(
 | 
			
		||||
        "config:url_fetch_method", "urllib"
 | 
			
		||||
    ) == "curl" and url_result.scheme not in ("gs", "s3")
 | 
			
		||||
    if use_curl:
 | 
			
		||||
    # Check if a local file
 | 
			
		||||
    local_path = url_util.local_file_path(url_result)
 | 
			
		||||
    if local_path:
 | 
			
		||||
        return os.path.exists(local_path)
 | 
			
		||||
 | 
			
		||||
    # Check if Amazon Simple Storage Service (S3) .. urllib-based fetch
 | 
			
		||||
    if url_result.scheme == "s3":
 | 
			
		||||
        # Check for URL-specific connection information
 | 
			
		||||
        s3 = s3_util.get_s3_session(url_result, method="fetch")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            s3.get_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/"))
 | 
			
		||||
            return True
 | 
			
		||||
        except s3.ClientError as err:
 | 
			
		||||
            if err.response["Error"]["Code"] == "NoSuchKey":
 | 
			
		||||
                return False
 | 
			
		||||
            raise err
 | 
			
		||||
 | 
			
		||||
    # Check if Google Storage .. urllib-based fetch
 | 
			
		||||
    if url_result.scheme == "gs":
 | 
			
		||||
        gcs = gcs_util.GCSBlob(url_result)
 | 
			
		||||
        return gcs.exists()
 | 
			
		||||
 | 
			
		||||
    # Otherwise, use the configured fetch method
 | 
			
		||||
    if spack.config.get("config:url_fetch_method") == "curl":
 | 
			
		||||
        curl_exe = _curl(curl)
 | 
			
		||||
        if not curl_exe:
 | 
			
		||||
            return False
 | 
			
		||||
@@ -371,14 +404,13 @@ def url_exists(url, curl=None):
 | 
			
		||||
        _ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull)
 | 
			
		||||
        return curl_exe.returncode == 0
 | 
			
		||||
 | 
			
		||||
    # Otherwise use urllib.
 | 
			
		||||
    # If we get here, then the only other fetch method option is urllib.
 | 
			
		||||
    # So try to "read" from the URL and assume that *any* non-throwing
 | 
			
		||||
    #  response contains the resource represented by the URL.
 | 
			
		||||
    try:
 | 
			
		||||
        urlopen(
 | 
			
		||||
            Request(url, method="HEAD", headers={"User-Agent": SPACK_USER_AGENT}),
 | 
			
		||||
            timeout=spack.config.get("config:connect_timeout", 10),
 | 
			
		||||
        )
 | 
			
		||||
        read_from_url(url)
 | 
			
		||||
        return True
 | 
			
		||||
    except URLError as e:
 | 
			
		||||
    except (SpackWebError, URLError) as e:
 | 
			
		||||
        tty.debug("Failure reading URL: " + str(e))
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
@@ -661,6 +693,35 @@ def _spider(url, collect_nested):
 | 
			
		||||
    return pages, links
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _urlopen(req, *args, **kwargs):
 | 
			
		||||
    """Wrapper for compatibility with old versions of Python."""
 | 
			
		||||
    url = req
 | 
			
		||||
    try:
 | 
			
		||||
        url = url.get_full_url()
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    del kwargs["context"]
 | 
			
		||||
 | 
			
		||||
    opener = urlopen
 | 
			
		||||
    if url_util.parse(url).scheme == "s3":
 | 
			
		||||
        import spack.s3_handler
 | 
			
		||||
 | 
			
		||||
        opener = spack.s3_handler.open
 | 
			
		||||
    elif url_util.parse(url).scheme == "gs":
 | 
			
		||||
        import spack.gcs_handler
 | 
			
		||||
 | 
			
		||||
        opener = spack.gcs_handler.gcs_open
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        return opener(req, *args, **kwargs)
 | 
			
		||||
    except TypeError as err:
 | 
			
		||||
        # If the above fails because of 'context', call without 'context'.
 | 
			
		||||
        if "context" in kwargs and "context" in str(err):
 | 
			
		||||
            del kwargs["context"]
 | 
			
		||||
        return opener(req, *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def find_versions_of_archive(
 | 
			
		||||
    archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
 | 
			
		||||
):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user