Group primitive url/path handling functions together (#40028)
This commit is contained in:

committed by
GitHub

parent
bc02453f6d
commit
fb9e5fcc4f
459
lib/spack/llnl/url.py
Normal file
459
lib/spack/llnl/url.py
Normal file
@@ -0,0 +1,459 @@
|
||||
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
"""URL primitives that just require Python standard library."""
|
||||
import itertools
|
||||
import os.path
|
||||
import re
|
||||
from typing import Optional, Set, Tuple
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
# Archive extensions allowed in Spack
|
||||
PREFIX_EXTENSIONS = ("tar", "TAR")
|
||||
EXTENSIONS = ("gz", "bz2", "xz", "Z")
|
||||
NO_TAR_EXTENSIONS = ("zip", "tgz", "tbz2", "tbz", "txz")
|
||||
|
||||
# Add PREFIX_EXTENSIONS and EXTENSIONS last so that .tar.gz is matched *before* .tar or .gz
|
||||
ALLOWED_ARCHIVE_TYPES = (
|
||||
tuple(".".join(ext) for ext in itertools.product(PREFIX_EXTENSIONS, EXTENSIONS))
|
||||
+ PREFIX_EXTENSIONS
|
||||
+ EXTENSIONS
|
||||
+ NO_TAR_EXTENSIONS
|
||||
)
|
||||
CONTRACTION_MAP = {"tgz": "tar.gz", "txz": "tar.xz", "tbz": "tar.bz2", "tbz2": "tar.bz2"}
|
||||
|
||||
|
||||
def find_list_urls(url: str) -> Set[str]:
|
||||
r"""Find good list URLs for the supplied URL.
|
||||
|
||||
By default, returns the dirname of the archive path.
|
||||
|
||||
Provides special treatment for the following websites, which have a
|
||||
unique list URL different from the dirname of the download URL:
|
||||
|
||||
========= =======================================================
|
||||
GitHub https://github.com/<repo>/<name>/releases
|
||||
GitLab https://gitlab.\*/<repo>/<name>/tags
|
||||
BitBucket https://bitbucket.org/<repo>/<name>/downloads/?tab=tags
|
||||
CRAN https://\*.r-project.org/src/contrib/Archive/<name>
|
||||
PyPI https://pypi.org/simple/<name>/
|
||||
LuaRocks https://luarocks.org/modules/<repo>/<name>
|
||||
========= =======================================================
|
||||
|
||||
Note: this function is called by `spack versions`, `spack checksum`,
|
||||
and `spack create`, but not by `spack fetch` or `spack install`.
|
||||
|
||||
Parameters:
|
||||
url (str): The download URL for the package
|
||||
|
||||
Returns:
|
||||
set: One or more list URLs for the package
|
||||
"""
|
||||
|
||||
url_types = [
|
||||
# GitHub
|
||||
# e.g. https://github.com/llnl/callpath/archive/v1.0.1.tar.gz
|
||||
(r"(.*github\.com/[^/]+/[^/]+)", lambda m: m.group(1) + "/releases"),
|
||||
# GitLab API endpoint
|
||||
# e.g. https://gitlab.dkrz.de/api/v4/projects/k202009%2Flibaec/repository/archive.tar.gz?sha=v1.0.2
|
||||
(
|
||||
r"(.*gitlab[^/]+)/api/v4/projects/([^/]+)%2F([^/]+)",
|
||||
lambda m: m.group(1) + "/" + m.group(2) + "/" + m.group(3) + "/tags",
|
||||
),
|
||||
# GitLab non-API endpoint
|
||||
# e.g. https://gitlab.dkrz.de/k202009/libaec/uploads/631e85bcf877c2dcaca9b2e6d6526339/libaec-1.0.0.tar.gz
|
||||
(r"(.*gitlab[^/]+/(?!api/v4/projects)[^/]+/[^/]+)", lambda m: m.group(1) + "/tags"),
|
||||
# BitBucket
|
||||
# e.g. https://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2
|
||||
(r"(.*bitbucket.org/[^/]+/[^/]+)", lambda m: m.group(1) + "/downloads/?tab=tags"),
|
||||
# CRAN
|
||||
# e.g. https://cran.r-project.org/src/contrib/Rcpp_0.12.9.tar.gz
|
||||
# e.g. https://cloud.r-project.org/src/contrib/rgl_0.98.1.tar.gz
|
||||
(
|
||||
r"(.*\.r-project\.org/src/contrib)/([^_]+)",
|
||||
lambda m: m.group(1) + "/Archive/" + m.group(2),
|
||||
),
|
||||
# PyPI
|
||||
# e.g. https://pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://www.pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://pypi.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://pypi.python.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://files.pythonhosted.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://pypi.io/packages/py2.py3/o/opencensus-context/opencensus_context-0.1.1-py2.py3-none-any.whl
|
||||
(
|
||||
r"(?:pypi|pythonhosted)[^/]+/packages/[^/]+/./([^/]+)",
|
||||
lambda m: "https://pypi.org/simple/" + m.group(1) + "/",
|
||||
),
|
||||
# LuaRocks
|
||||
# e.g. https://luarocks.org/manifests/gvvaughan/lpeg-1.0.2-1.src.rock
|
||||
# e.g. https://luarocks.org/manifests/openresty/lua-cjson-2.1.0-1.src.rock
|
||||
(
|
||||
r"luarocks[^/]+/(?:modules|manifests)/(?P<org>[^/]+)/"
|
||||
+ r"(?P<name>.+?)-[0-9.-]*\.src\.rock",
|
||||
lambda m: "https://luarocks.org/modules/"
|
||||
+ m.group("org")
|
||||
+ "/"
|
||||
+ m.group("name")
|
||||
+ "/",
|
||||
),
|
||||
]
|
||||
|
||||
list_urls = {os.path.dirname(url)}
|
||||
|
||||
for pattern, fun in url_types:
|
||||
match = re.search(pattern, url)
|
||||
if match:
|
||||
list_urls.add(fun(match))
|
||||
|
||||
return list_urls
|
||||
|
||||
|
||||
def strip_query_and_fragment(url: str) -> Tuple[str, str]:
|
||||
"""Strips query and fragment from a url, then returns the base url and the suffix.
|
||||
|
||||
Args:
|
||||
url: URL to be stripped
|
||||
|
||||
Raises:
|
||||
ValueError: when there is any error parsing the URL
|
||||
"""
|
||||
components = urlsplit(url)
|
||||
stripped = components[:3] + (None, None)
|
||||
|
||||
query, frag = components[3:5]
|
||||
suffix = ""
|
||||
if query:
|
||||
suffix += "?" + query
|
||||
if frag:
|
||||
suffix += "#" + frag
|
||||
|
||||
return urlunsplit(stripped), suffix
|
||||
|
||||
|
||||
SOURCEFORGE_RE = re.compile(r"(.*(?:sourceforge\.net|sf\.net)/.*)(/download)$")
|
||||
|
||||
|
||||
def split_url_on_sourceforge_suffix(url: str) -> Tuple[str, ...]:
|
||||
"""If the input is a sourceforge URL, returns base URL and "/download" suffix. Otherwise,
|
||||
returns the input URL and an empty string.
|
||||
"""
|
||||
match = SOURCEFORGE_RE.search(url)
|
||||
if match is not None:
|
||||
return match.groups()
|
||||
return url, ""
|
||||
|
||||
|
||||
def has_extension(path_or_url: str, ext: str) -> bool:
|
||||
"""Returns true if the extension in input is present in path, false otherwise."""
|
||||
prefix, _ = split_url_on_sourceforge_suffix(path_or_url)
|
||||
if not ext.startswith(r"\."):
|
||||
ext = rf"\.{ext}$"
|
||||
|
||||
if re.search(ext, prefix):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def extension_from_path(path_or_url: Optional[str]) -> Optional[str]:
|
||||
"""Tries to match an allowed archive extension to the input. Returns the first match,
|
||||
or None if no match was found.
|
||||
|
||||
Raises:
|
||||
ValueError: if the input is None
|
||||
"""
|
||||
if path_or_url is None:
|
||||
raise ValueError("Can't call extension() on None")
|
||||
|
||||
for t in ALLOWED_ARCHIVE_TYPES:
|
||||
if has_extension(path_or_url, t):
|
||||
return t
|
||||
return None
|
||||
|
||||
|
||||
def remove_extension(path_or_url: str, *, extension: str) -> str:
|
||||
"""Returns the input with the extension removed"""
|
||||
suffix = rf"\.{extension}$"
|
||||
return re.sub(suffix, "", path_or_url)
|
||||
|
||||
|
||||
def check_and_remove_ext(path: str, *, extension: str) -> str:
|
||||
"""Returns the input path with the extension removed, if the extension is present in path.
|
||||
Otherwise, returns the input unchanged.
|
||||
"""
|
||||
if not has_extension(path, extension):
|
||||
return path
|
||||
path, _ = split_url_on_sourceforge_suffix(path)
|
||||
return remove_extension(path, extension=extension)
|
||||
|
||||
|
||||
def strip_extension(path_or_url: str, *, extension: Optional[str] = None) -> str:
|
||||
"""If a path contains the extension in input, returns the path stripped of the extension.
|
||||
Otherwise, returns the input path.
|
||||
|
||||
If extension is None, attempts to strip any allowed extension from path.
|
||||
"""
|
||||
if extension is None:
|
||||
for t in ALLOWED_ARCHIVE_TYPES:
|
||||
if has_extension(path_or_url, ext=t):
|
||||
extension = t
|
||||
break
|
||||
else:
|
||||
return path_or_url
|
||||
|
||||
return check_and_remove_ext(path_or_url, extension=extension)
|
||||
|
||||
|
||||
def split_url_extension(url: str) -> Tuple[str, ...]:
|
||||
"""Some URLs have a query string, e.g.:
|
||||
|
||||
1. https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7.tgz?raw=true
|
||||
2. http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin.tar.gz
|
||||
3. https://gitlab.kitware.com/vtk/vtk/repository/archive.tar.bz2?ref=v7.0.0
|
||||
|
||||
In (1), the query string needs to be stripped to get at the
|
||||
extension, but in (2) & (3), the filename is IN a single final query
|
||||
argument.
|
||||
|
||||
This strips the URL into three pieces: ``prefix``, ``ext``, and ``suffix``.
|
||||
The suffix contains anything that was stripped off the URL to
|
||||
get at the file extension. In (1), it will be ``'?raw=true'``, but
|
||||
in (2), it will be empty. In (3) the suffix is a parameter that follows
|
||||
after the file extension, e.g.:
|
||||
|
||||
1. ``('https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7', '.tgz', '?raw=true')``
|
||||
2. ``('http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin', '.tar.gz', None)``
|
||||
3. ``('https://gitlab.kitware.com/vtk/vtk/repository/archive', '.tar.bz2', '?ref=v7.0.0')``
|
||||
"""
|
||||
# Strip off sourceforge download suffix.
|
||||
# e.g. https://sourceforge.net/projects/glew/files/glew/2.0.0/glew-2.0.0.tgz/download
|
||||
prefix, suffix = split_url_on_sourceforge_suffix(url)
|
||||
|
||||
ext = extension_from_path(prefix)
|
||||
if ext is not None:
|
||||
prefix = strip_extension(prefix)
|
||||
return prefix, ext, suffix
|
||||
|
||||
try:
|
||||
prefix, suf = strip_query_and_fragment(prefix)
|
||||
except ValueError:
|
||||
# FIXME: tty.debug("Got error parsing path %s" % path)
|
||||
# Ignore URL parse errors here
|
||||
return url, ""
|
||||
|
||||
ext = extension_from_path(prefix)
|
||||
prefix = strip_extension(prefix)
|
||||
suffix = suf + suffix
|
||||
if ext is None:
|
||||
ext = ""
|
||||
|
||||
return prefix, ext, suffix
|
||||
|
||||
|
||||
def strip_version_suffixes(path_or_url: str) -> str:
|
||||
"""Some tarballs contain extraneous information after the version:
|
||||
|
||||
* ``bowtie2-2.2.5-source``
|
||||
* ``libevent-2.0.21-stable``
|
||||
* ``cuda_8.0.44_linux.run``
|
||||
|
||||
These strings are not part of the version number and should be ignored.
|
||||
This function strips those suffixes off and returns the remaining string.
|
||||
The goal is that the version is always the last thing in ``path``:
|
||||
|
||||
* ``bowtie2-2.2.5``
|
||||
* ``libevent-2.0.21``
|
||||
* ``cuda_8.0.44``
|
||||
|
||||
Args:
|
||||
path_or_url: The filename or URL for the package
|
||||
|
||||
Returns:
|
||||
The ``path`` with any extraneous suffixes removed
|
||||
"""
|
||||
# NOTE: This could be done with complicated regexes in parse_version_offset
|
||||
# NOTE: The problem is that we would have to add these regexes to the end
|
||||
# NOTE: of every single version regex. Easier to just strip them off
|
||||
# NOTE: permanently
|
||||
|
||||
suffix_regexes = [
|
||||
# Download type
|
||||
r"[Ii]nstall",
|
||||
r"all",
|
||||
r"code",
|
||||
r"[Ss]ources?",
|
||||
r"file",
|
||||
r"full",
|
||||
r"single",
|
||||
r"with[a-zA-Z_-]+",
|
||||
r"rock",
|
||||
r"src(_0)?",
|
||||
r"public",
|
||||
r"bin",
|
||||
r"binary",
|
||||
r"run",
|
||||
r"[Uu]niversal",
|
||||
r"jar",
|
||||
r"complete",
|
||||
r"dynamic",
|
||||
r"oss",
|
||||
r"gem",
|
||||
r"tar",
|
||||
r"sh",
|
||||
# Download version
|
||||
r"release",
|
||||
r"bin",
|
||||
r"stable",
|
||||
r"[Ff]inal",
|
||||
r"rel",
|
||||
r"orig",
|
||||
r"dist",
|
||||
r"\+",
|
||||
# License
|
||||
r"gpl",
|
||||
# Arch
|
||||
# Needs to come before and after OS, appears in both orders
|
||||
r"ia32",
|
||||
r"intel",
|
||||
r"amd64",
|
||||
r"linux64",
|
||||
r"x64",
|
||||
r"64bit",
|
||||
r"x86[_-]64",
|
||||
r"i586_64",
|
||||
r"x86",
|
||||
r"i[36]86",
|
||||
r"ppc64(le)?",
|
||||
r"armv?(7l|6l|64)",
|
||||
# Other
|
||||
r"cpp",
|
||||
r"gtk",
|
||||
r"incubating",
|
||||
# OS
|
||||
r"[Ll]inux(_64)?",
|
||||
r"LINUX",
|
||||
r"[Uu]ni?x",
|
||||
r"[Ss]un[Oo][Ss]",
|
||||
r"[Mm]ac[Oo][Ss][Xx]?",
|
||||
r"[Oo][Ss][Xx]",
|
||||
r"[Dd]arwin(64)?",
|
||||
r"[Aa]pple",
|
||||
r"[Ww]indows",
|
||||
r"[Ww]in(64|32)?",
|
||||
r"[Cc]ygwin(64|32)?",
|
||||
r"[Mm]ingw",
|
||||
r"centos",
|
||||
# Arch
|
||||
# Needs to come before and after OS, appears in both orders
|
||||
r"ia32",
|
||||
r"intel",
|
||||
r"amd64",
|
||||
r"linux64",
|
||||
r"x64",
|
||||
r"64bit",
|
||||
r"x86[_-]64",
|
||||
r"i586_64",
|
||||
r"x86",
|
||||
r"i[36]86",
|
||||
r"ppc64(le)?",
|
||||
r"armv?(7l|6l|64)?",
|
||||
# PyPI
|
||||
r"[._-]py[23].*\.whl",
|
||||
r"[._-]cp[23].*\.whl",
|
||||
r"[._-]win.*\.exe",
|
||||
]
|
||||
|
||||
for regex in suffix_regexes:
|
||||
# Remove the suffix from the end of the path
|
||||
# This may be done multiple times
|
||||
path_or_url = re.sub(r"[._-]?" + regex + "$", "", path_or_url)
|
||||
|
||||
return path_or_url
|
||||
|
||||
|
||||
def expand_contracted_extension(extension: str) -> str:
|
||||
"""Returns the expanded version of a known contracted extension.
|
||||
|
||||
This function maps extensions like ".tgz" to ".tar.gz". On unknown extensions,
|
||||
return the input unmodified.
|
||||
"""
|
||||
extension = extension.strip(".")
|
||||
return CONTRACTION_MAP.get(extension, extension)
|
||||
|
||||
|
||||
def expand_contracted_extension_in_path(
|
||||
path_or_url: str, *, extension: Optional[str] = None
|
||||
) -> str:
|
||||
"""Returns the input path or URL with any contraction extension expanded.
|
||||
|
||||
Args:
|
||||
path_or_url: path or URL to be expanded
|
||||
extension: if specified, only attempt to expand that extension
|
||||
"""
|
||||
extension = extension or extension_from_path(path_or_url)
|
||||
if extension is None:
|
||||
return path_or_url
|
||||
|
||||
expanded = expand_contracted_extension(extension)
|
||||
if expanded != extension:
|
||||
return re.sub(rf"{extension}", rf"{expanded}", path_or_url)
|
||||
return path_or_url
|
||||
|
||||
|
||||
def compression_ext_from_compressed_archive(extension: str) -> Optional[str]:
|
||||
"""Returns compression extension for a compressed archive"""
|
||||
extension = expand_contracted_extension(extension)
|
||||
for ext in [*EXTENSIONS]:
|
||||
if ext in extension:
|
||||
return ext
|
||||
return None
|
||||
|
||||
|
||||
def strip_compression_extension(path_or_url: str, ext: Optional[str] = None) -> str:
|
||||
"""Strips the compression extension from the input, and returns it. For instance,
|
||||
"foo.tgz" becomes "foo.tar".
|
||||
|
||||
If no extension is given, try a default list of extensions.
|
||||
|
||||
Args:
|
||||
path_or_url: input to be stripped
|
||||
ext: if given, extension to be stripped
|
||||
"""
|
||||
if not extension_from_path(path_or_url):
|
||||
return path_or_url
|
||||
|
||||
expanded_path = expand_contracted_extension_in_path(path_or_url)
|
||||
candidates = [ext] if ext is not None else EXTENSIONS
|
||||
for current_extension in candidates:
|
||||
modified_path = check_and_remove_ext(expanded_path, extension=current_extension)
|
||||
if modified_path != expanded_path:
|
||||
return modified_path
|
||||
return expanded_path
|
||||
|
||||
|
||||
def allowed_archive(path_or_url: str) -> bool:
|
||||
"""Returns true if the input is a valid archive, False otherwise."""
|
||||
return (
|
||||
False if not path_or_url else any(path_or_url.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
|
||||
)
|
||||
|
||||
|
||||
def determine_url_file_extension(path: str) -> str:
|
||||
"""This returns the type of archive a URL refers to. This is
|
||||
sometimes confusing because of URLs like:
|
||||
|
||||
(1) https://github.com/petdance/ack/tarball/1.93_02
|
||||
|
||||
Where the URL doesn't actually contain the filename. We need
|
||||
to know what type it is so that we can appropriately name files
|
||||
in mirrors.
|
||||
"""
|
||||
match = re.search(r"github.com/.+/(zip|tar)ball/", path)
|
||||
if match:
|
||||
if match.group(1) == "zip":
|
||||
return "zip"
|
||||
elif match.group(1) == "tar":
|
||||
return "tar.gz"
|
||||
|
||||
prefix, ext, suffix = split_url_extension(path)
|
||||
return ext
|
@@ -822,7 +822,7 @@ def get_versions(args, name):
|
||||
if args.url is not None and args.template != "bundle" and valid_url:
|
||||
# Find available versions
|
||||
try:
|
||||
url_dict = spack.util.web.find_versions_of_archive(args.url)
|
||||
url_dict = spack.url.find_versions_of_archive(args.url)
|
||||
except UndetectableVersionError:
|
||||
# Use fake versions
|
||||
tty.warn("Couldn't detect version in: {0}".format(args.url))
|
||||
|
@@ -12,6 +12,7 @@
|
||||
import spack.fetch_strategy as fs
|
||||
import spack.repo
|
||||
import spack.spec
|
||||
import spack.url
|
||||
import spack.util.crypto as crypto
|
||||
from spack.url import (
|
||||
UndetectableNameError,
|
||||
@@ -26,7 +27,6 @@
|
||||
substitution_offsets,
|
||||
)
|
||||
from spack.util.naming import simplify_name
|
||||
from spack.util.web import find_versions_of_archive
|
||||
|
||||
description = "debugging tool for url parsing"
|
||||
section = "developer"
|
||||
@@ -139,7 +139,7 @@ def url_parse(args):
|
||||
if args.spider:
|
||||
print()
|
||||
tty.msg("Spidering for versions:")
|
||||
versions = find_versions_of_archive(url)
|
||||
versions = spack.url.find_versions_of_archive(url)
|
||||
|
||||
if not versions:
|
||||
print(" Found no versions for {0}".format(name))
|
||||
|
@@ -31,6 +31,7 @@
|
||||
import urllib.parse
|
||||
from typing import List, Optional
|
||||
|
||||
import llnl.url
|
||||
import llnl.util
|
||||
import llnl.util.filesystem as fs
|
||||
import llnl.util.tty as tty
|
||||
@@ -46,7 +47,7 @@
|
||||
import spack.util.web as web_util
|
||||
import spack.version
|
||||
import spack.version.git_ref_lookup
|
||||
from spack.util.compression import decompressor_for, extension_from_path
|
||||
from spack.util.compression import decompressor_for
|
||||
from spack.util.executable import CommandNotFoundError, which
|
||||
from spack.util.string import comma_and, quote
|
||||
|
||||
@@ -441,7 +442,7 @@ def expand(self):
|
||||
|
||||
# TODO: replace this by mime check.
|
||||
if not self.extension:
|
||||
self.extension = spack.url.determine_url_file_extension(self.url)
|
||||
self.extension = llnl.url.determine_url_file_extension(self.url)
|
||||
|
||||
if self.stage.expanded:
|
||||
tty.debug("Source already staged to %s" % self.stage.source_path)
|
||||
@@ -570,7 +571,7 @@ def expand(self):
|
||||
|
||||
@_needs_stage
|
||||
def archive(self, destination, **kwargs):
|
||||
assert extension_from_path(destination) == "tar.gz"
|
||||
assert llnl.url.extension_from_path(destination) == "tar.gz"
|
||||
assert self.stage.source_path.startswith(self.stage.path)
|
||||
|
||||
tar = which("tar", required=True)
|
||||
|
@@ -1,28 +0,0 @@
|
||||
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
import urllib.parse
|
||||
import urllib.response
|
||||
from urllib.error import URLError
|
||||
from urllib.request import BaseHandler
|
||||
|
||||
|
||||
def gcs_open(req, *args, **kwargs):
|
||||
"""Open a reader stream to a blob object on GCS"""
|
||||
import spack.util.gcs as gcs_util
|
||||
|
||||
url = urllib.parse.urlparse(req.get_full_url())
|
||||
gcsblob = gcs_util.GCSBlob(url)
|
||||
|
||||
if not gcsblob.exists():
|
||||
raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
|
||||
stream = gcsblob.get_blob_byte_stream()
|
||||
headers = gcsblob.get_blob_headers()
|
||||
|
||||
return urllib.response.addinfourl(stream, headers, url)
|
||||
|
||||
|
||||
class GCSHandler(BaseHandler):
|
||||
def gs_open(self, req):
|
||||
return gcs_open(req)
|
@@ -30,7 +30,6 @@
|
||||
import llnl.util.tty.color as color
|
||||
from llnl.util.tty.log import log_output
|
||||
|
||||
import spack
|
||||
import spack.cmd
|
||||
import spack.config
|
||||
import spack.environment as ev
|
||||
|
@@ -20,6 +20,7 @@
|
||||
import urllib.parse
|
||||
from typing import Optional, Union
|
||||
|
||||
import llnl.url
|
||||
import llnl.util.tty as tty
|
||||
from llnl.util.filesystem import mkdirp
|
||||
|
||||
@@ -29,7 +30,6 @@
|
||||
import spack.fetch_strategy as fs
|
||||
import spack.mirror
|
||||
import spack.spec
|
||||
import spack.url as url
|
||||
import spack.util.path
|
||||
import spack.util.spack_json as sjson
|
||||
import spack.util.spack_yaml as syaml
|
||||
@@ -375,7 +375,7 @@ def _determine_extension(fetcher):
|
||||
if isinstance(fetcher, fs.URLFetchStrategy):
|
||||
if fetcher.expand_archive:
|
||||
# If we fetch with a URLFetchStrategy, use URL's archive type
|
||||
ext = url.determine_url_file_extension(fetcher.url)
|
||||
ext = llnl.url.determine_url_file_extension(fetcher.url)
|
||||
|
||||
if ext:
|
||||
# Remove any leading dots
|
||||
|
@@ -2377,7 +2377,7 @@ def fetch_remote_versions(self, concurrency=128):
|
||||
return {}
|
||||
|
||||
try:
|
||||
return spack.util.web.find_versions_of_archive(
|
||||
return spack.url.find_versions_of_archive(
|
||||
self.all_urls, self.list_url, self.list_depth, concurrency, reference_package=self
|
||||
)
|
||||
except spack.util.web.NoNetworkConnectionError as e:
|
||||
|
@@ -11,6 +11,7 @@
|
||||
|
||||
import llnl.util.filesystem
|
||||
import llnl.util.lang
|
||||
from llnl.url import allowed_archive
|
||||
|
||||
import spack
|
||||
import spack.error
|
||||
@@ -19,7 +20,6 @@
|
||||
import spack.repo
|
||||
import spack.stage
|
||||
import spack.util.spack_json as sjson
|
||||
from spack.util.compression import allowed_archive
|
||||
from spack.util.crypto import Checker, checksum
|
||||
from spack.util.executable import which, which_string
|
||||
|
||||
|
@@ -1,80 +0,0 @@
|
||||
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
from io import BufferedReader, BytesIO, IOBase
|
||||
|
||||
import spack.util.s3 as s3_util
|
||||
|
||||
|
||||
# NOTE(opadron): Workaround issue in boto where its StreamingBody
|
||||
# implementation is missing several APIs expected from IOBase. These missing
|
||||
# APIs prevent the streams returned by boto from being passed as-are along to
|
||||
# urllib.
|
||||
#
|
||||
# https://github.com/boto/botocore/issues/879
|
||||
# https://github.com/python/cpython/pull/3249
|
||||
class WrapStream(BufferedReader):
|
||||
def __init__(self, raw):
|
||||
# In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
|
||||
# only add missing attributes in older versions.
|
||||
# https://github.com/boto/botocore/commit/a624815eabac50442ed7404f3c4f2664cd0aa784
|
||||
if not isinstance(raw, IOBase):
|
||||
raw.readable = lambda: True
|
||||
raw.writable = lambda: False
|
||||
raw.seekable = lambda: False
|
||||
raw.closed = False
|
||||
raw.flush = lambda: None
|
||||
super().__init__(raw)
|
||||
|
||||
def detach(self):
|
||||
self.raw = None
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
return self.raw.read(*args, **kwargs)
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self.raw, key)
|
||||
|
||||
|
||||
def _s3_open(url, method="GET"):
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
s3 = s3_util.get_s3_session(url, method="fetch")
|
||||
|
||||
bucket = parsed.netloc
|
||||
key = parsed.path
|
||||
|
||||
if key.startswith("/"):
|
||||
key = key[1:]
|
||||
|
||||
if method not in ("GET", "HEAD"):
|
||||
raise urllib.error.URLError(
|
||||
"Only GET and HEAD verbs are currently supported for the s3:// scheme"
|
||||
)
|
||||
|
||||
try:
|
||||
if method == "GET":
|
||||
obj = s3.get_object(Bucket=bucket, Key=key)
|
||||
# NOTE(opadron): Apply workaround here (see above)
|
||||
stream = WrapStream(obj["Body"])
|
||||
elif method == "HEAD":
|
||||
obj = s3.head_object(Bucket=bucket, Key=key)
|
||||
stream = BytesIO()
|
||||
except s3.ClientError as e:
|
||||
raise urllib.error.URLError(e) from e
|
||||
|
||||
headers = obj["ResponseMetadata"]["HTTPHeaders"]
|
||||
|
||||
return url, headers, stream
|
||||
|
||||
|
||||
class UrllibS3Handler(urllib.request.BaseHandler):
|
||||
def s3_open(self, req):
|
||||
orig_url = req.get_full_url()
|
||||
url, headers, stream = _s3_open(orig_url, method=req.get_method())
|
||||
return urllib.response.addinfourl(stream, headers, url)
|
167
lib/spack/spack/test/llnl/url.py
Normal file
167
lib/spack/spack/test/llnl/url.py
Normal file
@@ -0,0 +1,167 @@
|
||||
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
"""Tests for llnl.url functions"""
|
||||
import itertools
|
||||
|
||||
import pytest
|
||||
|
||||
import llnl.url
|
||||
|
||||
|
||||
@pytest.fixture(params=llnl.url.ALLOWED_ARCHIVE_TYPES)
|
||||
def archive_and_expected(request):
|
||||
archive_name = ".".join(["Foo", request.param])
|
||||
return archive_name, request.param
|
||||
|
||||
|
||||
def test_get_extension(archive_and_expected):
|
||||
"""Tests that we can predict correctly known extensions for simple cases."""
|
||||
archive, expected = archive_and_expected
|
||||
result = llnl.url.extension_from_path(archive)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_get_bad_extension():
|
||||
"""Tests that a bad extension returns None"""
|
||||
result = llnl.url.extension_from_path("Foo.cxx")
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url,expected",
|
||||
[
|
||||
# No suffix
|
||||
("rgb-1.0.6", "rgb-1.0.6"),
|
||||
# Misleading prefix
|
||||
("jpegsrc.v9b", "jpegsrc.v9b"),
|
||||
("turbolinux702", "turbolinux702"),
|
||||
("converge_install_2.3.16", "converge_install_2.3.16"),
|
||||
# Download type - code, source
|
||||
("cistem-1.0.0-beta-source-code", "cistem-1.0.0-beta"),
|
||||
# Download type - src
|
||||
("apache-ant-1.9.7-src", "apache-ant-1.9.7"),
|
||||
("go1.7.4.src", "go1.7.4"),
|
||||
# Download type - source
|
||||
("bowtie2-2.2.5-source", "bowtie2-2.2.5"),
|
||||
("grib_api-1.17.0-Source", "grib_api-1.17.0"),
|
||||
# Download type - full
|
||||
("julia-0.4.3-full", "julia-0.4.3"),
|
||||
# Download type - bin
|
||||
("apache-maven-3.3.9-bin", "apache-maven-3.3.9"),
|
||||
# Download type - binary
|
||||
("Jmol-14.8.0-binary", "Jmol-14.8.0"),
|
||||
# Download type - gem
|
||||
("rubysl-date-2.0.9.gem", "rubysl-date-2.0.9"),
|
||||
# Download type - tar
|
||||
("gromacs-4.6.1-tar", "gromacs-4.6.1"),
|
||||
# Download type - sh
|
||||
("Miniconda2-4.3.11-Linux-x86_64.sh", "Miniconda2-4.3.11"),
|
||||
# Download version - release
|
||||
("v1.0.4-release", "v1.0.4"),
|
||||
# Download version - stable
|
||||
("libevent-2.0.21-stable", "libevent-2.0.21"),
|
||||
# Download version - final
|
||||
("2.6.7-final", "2.6.7"),
|
||||
# Download version - rel
|
||||
("v1.9.5.1rel", "v1.9.5.1"),
|
||||
# Download version - orig
|
||||
("dash_0.5.5.1.orig", "dash_0.5.5.1"),
|
||||
# Download version - plus
|
||||
("ncbi-blast-2.6.0+-src", "ncbi-blast-2.6.0"),
|
||||
# License
|
||||
("cppad-20170114.gpl", "cppad-20170114"),
|
||||
# Arch
|
||||
("pcraster-4.1.0_x86-64", "pcraster-4.1.0"),
|
||||
("dislin-11.0.linux.i586_64", "dislin-11.0"),
|
||||
("PAGIT.V1.01.64bit", "PAGIT.V1.01"),
|
||||
# OS - linux
|
||||
("astyle_2.04_linux", "astyle_2.04"),
|
||||
# OS - unix
|
||||
("install-tl-unx", "install-tl"),
|
||||
# OS - macos
|
||||
("astyle_1.23_macosx", "astyle_1.23"),
|
||||
("haxe-2.08-osx", "haxe-2.08"),
|
||||
# PyPI - wheel
|
||||
("entrypoints-0.2.2-py2.py3-none-any.whl", "entrypoints-0.2.2"),
|
||||
(
|
||||
"numpy-1.12.0-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel."
|
||||
"macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
|
||||
"numpy-1.12.0",
|
||||
),
|
||||
# PyPI - exe
|
||||
("PyYAML-3.12.win-amd64-py3.5.exe", "PyYAML-3.12"),
|
||||
# Combinations of multiple patterns - bin, release
|
||||
("rocketmq-all-4.5.2-bin-release", "rocketmq-all-4.5.2"),
|
||||
# Combinations of multiple patterns - all
|
||||
("p7zip_9.04_src_all", "p7zip_9.04"),
|
||||
# Combinations of multiple patterns - run
|
||||
("cuda_8.0.44_linux.run", "cuda_8.0.44"),
|
||||
# Combinations of multiple patterns - file
|
||||
("ack-2.14-single-file", "ack-2.14"),
|
||||
# Combinations of multiple patterns - jar
|
||||
("antlr-3.4-complete.jar", "antlr-3.4"),
|
||||
# Combinations of multiple patterns - oss
|
||||
("tbb44_20160128oss_src_0", "tbb44_20160128"),
|
||||
# Combinations of multiple patterns - darwin
|
||||
("ghc-7.0.4-x86_64-apple-darwin", "ghc-7.0.4"),
|
||||
("ghc-7.0.4-i386-apple-darwin", "ghc-7.0.4"),
|
||||
# Combinations of multiple patterns - centos
|
||||
("sratoolkit.2.8.2-1-centos_linux64", "sratoolkit.2.8.2-1"),
|
||||
# Combinations of multiple patterns - arch
|
||||
(
|
||||
"VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install",
|
||||
"VizGlow_v2.2alpha17-R21November2016",
|
||||
),
|
||||
("jdk-8u92-linux-x64", "jdk-8u92"),
|
||||
("cuda_6.5.14_linux_64.run", "cuda_6.5.14"),
|
||||
("Mathematica_12.0.0_LINUX.sh", "Mathematica_12.0.0"),
|
||||
("trf407b.linux64", "trf407b"),
|
||||
# Combinations of multiple patterns - with
|
||||
("mafft-7.221-with-extensions-src", "mafft-7.221"),
|
||||
("spark-2.0.0-bin-without-hadoop", "spark-2.0.0"),
|
||||
("conduit-v0.3.0-src-with-blt", "conduit-v0.3.0"),
|
||||
# Combinations of multiple patterns - rock
|
||||
("bitlib-23-2.src.rock", "bitlib-23-2"),
|
||||
# Combinations of multiple patterns - public
|
||||
("dakota-6.3-public.src", "dakota-6.3"),
|
||||
# Combinations of multiple patterns - universal
|
||||
("synergy-1.3.6p2-MacOSX-Universal", "synergy-1.3.6p2"),
|
||||
# Combinations of multiple patterns - dynamic
|
||||
("snptest_v2.5.2_linux_x86_64_dynamic", "snptest_v2.5.2"),
|
||||
# Combinations of multiple patterns - other
|
||||
("alglib-3.11.0.cpp.gpl", "alglib-3.11.0"),
|
||||
("hpcviewer-2019.08-linux.gtk.x86_64", "hpcviewer-2019.08"),
|
||||
("apache-mxnet-src-1.3.0-incubating", "apache-mxnet-src-1.3.0"),
|
||||
],
|
||||
)
|
||||
def test_url_strip_version_suffixes(url, expected):
|
||||
stripped = llnl.url.strip_version_suffixes(url)
|
||||
assert stripped == expected
|
||||
|
||||
|
||||
def test_strip_compression_extension(archive_and_expected):
|
||||
archive, extension = archive_and_expected
|
||||
stripped = llnl.url.strip_compression_extension(archive)
|
||||
if extension == "zip":
|
||||
assert stripped == "Foo.zip"
|
||||
stripped = llnl.url.strip_compression_extension(archive, "zip")
|
||||
assert stripped == "Foo"
|
||||
elif (
|
||||
extension.lower() == "tar"
|
||||
or extension in llnl.url.CONTRACTION_MAP
|
||||
or extension
|
||||
in [
|
||||
".".join(ext)
|
||||
for ext in itertools.product(llnl.url.PREFIX_EXTENSIONS, llnl.url.EXTENSIONS)
|
||||
]
|
||||
):
|
||||
assert stripped == "Foo.tar" or stripped == "Foo.TAR"
|
||||
else:
|
||||
assert stripped == "Foo"
|
||||
|
||||
|
||||
def test_allowed_archive(archive_and_expected):
|
||||
archive, _ = archive_and_expected
|
||||
assert llnl.url.allowed_archive(archive)
|
@@ -17,124 +17,11 @@
|
||||
parse_name_offset,
|
||||
parse_version_offset,
|
||||
strip_name_suffixes,
|
||||
strip_version_suffixes,
|
||||
substitute_version,
|
||||
)
|
||||
from spack.version import Version
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url,expected",
|
||||
[
|
||||
# No suffix
|
||||
("rgb-1.0.6", "rgb-1.0.6"),
|
||||
# Misleading prefix
|
||||
("jpegsrc.v9b", "jpegsrc.v9b"),
|
||||
("turbolinux702", "turbolinux702"),
|
||||
("converge_install_2.3.16", "converge_install_2.3.16"),
|
||||
# Download type - code, source
|
||||
("cistem-1.0.0-beta-source-code", "cistem-1.0.0-beta"),
|
||||
# Download type - src
|
||||
("apache-ant-1.9.7-src", "apache-ant-1.9.7"),
|
||||
("go1.7.4.src", "go1.7.4"),
|
||||
# Download type - source
|
||||
("bowtie2-2.2.5-source", "bowtie2-2.2.5"),
|
||||
("grib_api-1.17.0-Source", "grib_api-1.17.0"),
|
||||
# Download type - full
|
||||
("julia-0.4.3-full", "julia-0.4.3"),
|
||||
# Download type - bin
|
||||
("apache-maven-3.3.9-bin", "apache-maven-3.3.9"),
|
||||
# Download type - binary
|
||||
("Jmol-14.8.0-binary", "Jmol-14.8.0"),
|
||||
# Download type - gem
|
||||
("rubysl-date-2.0.9.gem", "rubysl-date-2.0.9"),
|
||||
# Download type - tar
|
||||
("gromacs-4.6.1-tar", "gromacs-4.6.1"),
|
||||
# Download type - sh
|
||||
("Miniconda2-4.3.11-Linux-x86_64.sh", "Miniconda2-4.3.11"),
|
||||
# Download version - release
|
||||
("v1.0.4-release", "v1.0.4"),
|
||||
# Download version - stable
|
||||
("libevent-2.0.21-stable", "libevent-2.0.21"),
|
||||
# Download version - final
|
||||
("2.6.7-final", "2.6.7"),
|
||||
# Download version - rel
|
||||
("v1.9.5.1rel", "v1.9.5.1"),
|
||||
# Download version - orig
|
||||
("dash_0.5.5.1.orig", "dash_0.5.5.1"),
|
||||
# Download version - plus
|
||||
("ncbi-blast-2.6.0+-src", "ncbi-blast-2.6.0"),
|
||||
# License
|
||||
("cppad-20170114.gpl", "cppad-20170114"),
|
||||
# Arch
|
||||
("pcraster-4.1.0_x86-64", "pcraster-4.1.0"),
|
||||
("dislin-11.0.linux.i586_64", "dislin-11.0"),
|
||||
("PAGIT.V1.01.64bit", "PAGIT.V1.01"),
|
||||
# OS - linux
|
||||
("astyle_2.04_linux", "astyle_2.04"),
|
||||
# OS - unix
|
||||
("install-tl-unx", "install-tl"),
|
||||
# OS - macos
|
||||
("astyle_1.23_macosx", "astyle_1.23"),
|
||||
("haxe-2.08-osx", "haxe-2.08"),
|
||||
# PyPI - wheel
|
||||
("entrypoints-0.2.2-py2.py3-none-any.whl", "entrypoints-0.2.2"),
|
||||
(
|
||||
"numpy-1.12.0-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel."
|
||||
"macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
|
||||
"numpy-1.12.0",
|
||||
),
|
||||
# PyPI - exe
|
||||
("PyYAML-3.12.win-amd64-py3.5.exe", "PyYAML-3.12"),
|
||||
# Combinations of multiple patterns - bin, release
|
||||
("rocketmq-all-4.5.2-bin-release", "rocketmq-all-4.5.2"),
|
||||
# Combinations of multiple patterns - all
|
||||
("p7zip_9.04_src_all", "p7zip_9.04"),
|
||||
# Combinations of multiple patterns - run
|
||||
("cuda_8.0.44_linux.run", "cuda_8.0.44"),
|
||||
# Combinations of multiple patterns - file
|
||||
("ack-2.14-single-file", "ack-2.14"),
|
||||
# Combinations of multiple patterns - jar
|
||||
("antlr-3.4-complete.jar", "antlr-3.4"),
|
||||
# Combinations of multiple patterns - oss
|
||||
("tbb44_20160128oss_src_0", "tbb44_20160128"),
|
||||
# Combinations of multiple patterns - darwin
|
||||
("ghc-7.0.4-x86_64-apple-darwin", "ghc-7.0.4"),
|
||||
("ghc-7.0.4-i386-apple-darwin", "ghc-7.0.4"),
|
||||
# Combinations of multiple patterns - centos
|
||||
("sratoolkit.2.8.2-1-centos_linux64", "sratoolkit.2.8.2-1"),
|
||||
# Combinations of multiple patterns - arch
|
||||
(
|
||||
"VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install",
|
||||
"VizGlow_v2.2alpha17-R21November2016",
|
||||
),
|
||||
("jdk-8u92-linux-x64", "jdk-8u92"),
|
||||
("cuda_6.5.14_linux_64.run", "cuda_6.5.14"),
|
||||
("Mathematica_12.0.0_LINUX.sh", "Mathematica_12.0.0"),
|
||||
("trf407b.linux64", "trf407b"),
|
||||
# Combinations of multiple patterns - with
|
||||
("mafft-7.221-with-extensions-src", "mafft-7.221"),
|
||||
("spark-2.0.0-bin-without-hadoop", "spark-2.0.0"),
|
||||
("conduit-v0.3.0-src-with-blt", "conduit-v0.3.0"),
|
||||
# Combinations of multiple patterns - rock
|
||||
("bitlib-23-2.src.rock", "bitlib-23-2"),
|
||||
# Combinations of multiple patterns - public
|
||||
("dakota-6.3-public.src", "dakota-6.3"),
|
||||
# Combinations of multiple patterns - universal
|
||||
("synergy-1.3.6p2-MacOSX-Universal", "synergy-1.3.6p2"),
|
||||
# Combinations of multiple patterns - dynamic
|
||||
("snptest_v2.5.2_linux_x86_64_dynamic", "snptest_v2.5.2"),
|
||||
# Combinations of multiple patterns - other
|
||||
("alglib-3.11.0.cpp.gpl", "alglib-3.11.0"),
|
||||
("hpcviewer-2019.08-linux.gtk.x86_64", "hpcviewer-2019.08"),
|
||||
("apache-mxnet-src-1.3.0-incubating", "apache-mxnet-src-1.3.0"),
|
||||
],
|
||||
)
|
||||
def test_url_strip_version_suffixes(url, expected):
|
||||
stripped = strip_version_suffixes(url)
|
||||
assert stripped == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url,version,expected",
|
||||
[
|
||||
|
@@ -10,6 +10,7 @@
|
||||
|
||||
import pytest
|
||||
|
||||
import llnl.url
|
||||
from llnl.util.filesystem import working_dir
|
||||
|
||||
from spack.paths import spack_root
|
||||
@@ -21,7 +22,7 @@
|
||||
ext_archive = {}
|
||||
[
|
||||
ext_archive.update({ext: ".".join(["Foo", ext])})
|
||||
for ext in scomp.ALLOWED_ARCHIVE_TYPES
|
||||
for ext in llnl.url.ALLOWED_ARCHIVE_TYPES
|
||||
if "TAR" not in ext
|
||||
]
|
||||
# Spack does not use Python native handling for tarballs or zip
|
||||
@@ -95,38 +96,3 @@ def test_unallowed_extension():
|
||||
bad_ext_archive = "Foo.cxx"
|
||||
with pytest.raises(CommandNotFoundError):
|
||||
scomp.decompressor_for(bad_ext_archive)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("archive", ext_archive.values())
|
||||
def test_get_extension(archive):
|
||||
ext = scomp.extension_from_path(archive)
|
||||
assert ext_archive[ext] == archive
|
||||
|
||||
|
||||
def test_get_bad_extension():
|
||||
archive = "Foo.cxx"
|
||||
ext = scomp.extension_from_path(archive)
|
||||
assert ext is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path", ext_archive.values())
|
||||
def test_allowed_archive(path):
|
||||
assert scomp.allowed_archive(path)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ext_path", ext_archive.items())
|
||||
def test_strip_compression_extension(ext_path):
|
||||
ext, path = ext_path
|
||||
stripped = scomp.strip_compression_extension(path)
|
||||
if ext == "zip":
|
||||
assert stripped == "Foo.zip"
|
||||
stripped = scomp.strip_compression_extension(path, "zip")
|
||||
assert stripped == "Foo"
|
||||
elif (
|
||||
ext == "tar"
|
||||
or ext in scomp.CONTRACTION_MAP.keys()
|
||||
or ext in [".".join(ext) for ext in product(scomp.PRE_EXTS, scomp.EXTS)]
|
||||
):
|
||||
assert stripped == "Foo.tar" or stripped == "Foo.TAR"
|
||||
else:
|
||||
assert stripped == "Foo"
|
||||
|
@@ -15,6 +15,7 @@
|
||||
import spack.config
|
||||
import spack.mirror
|
||||
import spack.paths
|
||||
import spack.url
|
||||
import spack.util.path
|
||||
import spack.util.s3
|
||||
import spack.util.url as url_util
|
||||
@@ -102,31 +103,31 @@ def test_spider_no_response(monkeypatch):
|
||||
|
||||
|
||||
def test_find_versions_of_archive_0():
|
||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=0)
|
||||
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=0)
|
||||
assert Version("0.0.0") in versions
|
||||
|
||||
|
||||
def test_find_versions_of_archive_1():
|
||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=1)
|
||||
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=1)
|
||||
assert Version("0.0.0") in versions
|
||||
assert Version("1.0.0") in versions
|
||||
|
||||
|
||||
def test_find_versions_of_archive_2():
|
||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=2)
|
||||
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=2)
|
||||
assert Version("0.0.0") in versions
|
||||
assert Version("1.0.0") in versions
|
||||
assert Version("2.0.0") in versions
|
||||
|
||||
|
||||
def test_find_exotic_versions_of_archive_2():
|
||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=2)
|
||||
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=2)
|
||||
# up for grabs to make this better.
|
||||
assert Version("2.0.0b2") in versions
|
||||
|
||||
|
||||
def test_find_versions_of_archive_3():
|
||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=3)
|
||||
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=3)
|
||||
assert Version("0.0.0") in versions
|
||||
assert Version("1.0.0") in versions
|
||||
assert Version("2.0.0") in versions
|
||||
@@ -135,16 +136,14 @@ def test_find_versions_of_archive_3():
|
||||
|
||||
|
||||
def test_find_exotic_versions_of_archive_3():
|
||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=3)
|
||||
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=3)
|
||||
assert Version("2.0.0b2") in versions
|
||||
assert Version("3.0a1") in versions
|
||||
assert Version("4.5-rc5") in versions
|
||||
|
||||
|
||||
def test_find_versions_of_archive_with_fragment():
|
||||
versions = spack.util.web.find_versions_of_archive(
|
||||
root_tarball, root_with_fragment, list_depth=0
|
||||
)
|
||||
versions = spack.url.find_versions_of_archive(root_tarball, root_with_fragment, list_depth=0)
|
||||
assert Version("5.0.0") in versions
|
||||
|
||||
|
||||
@@ -311,7 +310,7 @@ def test_remove_s3_url(monkeypatch, capfd):
|
||||
def get_s3_session(url, method="fetch"):
|
||||
return MockS3Client()
|
||||
|
||||
monkeypatch.setattr(spack.util.s3, "get_s3_session", get_s3_session)
|
||||
monkeypatch.setattr(spack.util.web, "get_s3_session", get_s3_session)
|
||||
|
||||
current_debug_level = tty.debug_level()
|
||||
tty.set_debug(1)
|
||||
|
@@ -27,246 +27,22 @@
|
||||
"""
|
||||
import io
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
import llnl.util.tty as tty
|
||||
import llnl.url
|
||||
from llnl.util.tty.color import cescape, colorize
|
||||
|
||||
import spack.error
|
||||
import spack.util.compression as comp
|
||||
import spack.util.path as spath
|
||||
import spack.util.web
|
||||
import spack.version
|
||||
|
||||
from spack.util.path import convert_to_posix_path
|
||||
|
||||
#
|
||||
# Note: We call the input to most of these functions a "path" but the functions
|
||||
# work on paths and URLs. There's not a good word for both of these, but
|
||||
# "path" seemed like the most generic term.
|
||||
#
|
||||
def find_list_urls(url):
|
||||
r"""Find good list URLs for the supplied URL.
|
||||
|
||||
By default, returns the dirname of the archive path.
|
||||
|
||||
Provides special treatment for the following websites, which have a
|
||||
unique list URL different from the dirname of the download URL:
|
||||
|
||||
========= =======================================================
|
||||
GitHub https://github.com/<repo>/<name>/releases
|
||||
GitLab https://gitlab.\*/<repo>/<name>/tags
|
||||
BitBucket https://bitbucket.org/<repo>/<name>/downloads/?tab=tags
|
||||
CRAN https://\*.r-project.org/src/contrib/Archive/<name>
|
||||
PyPI https://pypi.org/simple/<name>/
|
||||
LuaRocks https://luarocks.org/modules/<repo>/<name>
|
||||
========= =======================================================
|
||||
|
||||
Note: this function is called by `spack versions`, `spack checksum`,
|
||||
and `spack create`, but not by `spack fetch` or `spack install`.
|
||||
|
||||
Parameters:
|
||||
url (str): The download URL for the package
|
||||
|
||||
Returns:
|
||||
set: One or more list URLs for the package
|
||||
"""
|
||||
|
||||
url_types = [
|
||||
# GitHub
|
||||
# e.g. https://github.com/llnl/callpath/archive/v1.0.1.tar.gz
|
||||
(r"(.*github\.com/[^/]+/[^/]+)", lambda m: m.group(1) + "/releases"),
|
||||
# GitLab API endpoint
|
||||
# e.g. https://gitlab.dkrz.de/api/v4/projects/k202009%2Flibaec/repository/archive.tar.gz?sha=v1.0.2
|
||||
(
|
||||
r"(.*gitlab[^/]+)/api/v4/projects/([^/]+)%2F([^/]+)",
|
||||
lambda m: m.group(1) + "/" + m.group(2) + "/" + m.group(3) + "/tags",
|
||||
),
|
||||
# GitLab non-API endpoint
|
||||
# e.g. https://gitlab.dkrz.de/k202009/libaec/uploads/631e85bcf877c2dcaca9b2e6d6526339/libaec-1.0.0.tar.gz
|
||||
(r"(.*gitlab[^/]+/(?!api/v4/projects)[^/]+/[^/]+)", lambda m: m.group(1) + "/tags"),
|
||||
# BitBucket
|
||||
# e.g. https://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2
|
||||
(r"(.*bitbucket.org/[^/]+/[^/]+)", lambda m: m.group(1) + "/downloads/?tab=tags"),
|
||||
# CRAN
|
||||
# e.g. https://cran.r-project.org/src/contrib/Rcpp_0.12.9.tar.gz
|
||||
# e.g. https://cloud.r-project.org/src/contrib/rgl_0.98.1.tar.gz
|
||||
(
|
||||
r"(.*\.r-project\.org/src/contrib)/([^_]+)",
|
||||
lambda m: m.group(1) + "/Archive/" + m.group(2),
|
||||
),
|
||||
# PyPI
|
||||
# e.g. https://pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://www.pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://pypi.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://pypi.python.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://files.pythonhosted.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||
# e.g. https://pypi.io/packages/py2.py3/o/opencensus-context/opencensus_context-0.1.1-py2.py3-none-any.whl
|
||||
(
|
||||
r"(?:pypi|pythonhosted)[^/]+/packages/[^/]+/./([^/]+)",
|
||||
lambda m: "https://pypi.org/simple/" + m.group(1) + "/",
|
||||
),
|
||||
# LuaRocks
|
||||
# e.g. https://luarocks.org/manifests/gvvaughan/lpeg-1.0.2-1.src.rock
|
||||
# e.g. https://luarocks.org/manifests/openresty/lua-cjson-2.1.0-1.src.rock
|
||||
(
|
||||
r"luarocks[^/]+/(?:modules|manifests)/(?P<org>[^/]+)/"
|
||||
+ r"(?P<name>.+?)-[0-9.-]*\.src\.rock",
|
||||
lambda m: "https://luarocks.org/modules/"
|
||||
+ m.group("org")
|
||||
+ "/"
|
||||
+ m.group("name")
|
||||
+ "/",
|
||||
),
|
||||
]
|
||||
|
||||
list_urls = set([os.path.dirname(url)])
|
||||
|
||||
for pattern, fun in url_types:
|
||||
match = re.search(pattern, url)
|
||||
if match:
|
||||
list_urls.add(fun(match))
|
||||
|
||||
return list_urls
|
||||
|
||||
|
||||
def strip_query_and_fragment(path):
|
||||
try:
|
||||
components = urlsplit(path)
|
||||
stripped = components[:3] + (None, None)
|
||||
|
||||
query, frag = components[3:5]
|
||||
suffix = ""
|
||||
if query:
|
||||
suffix += "?" + query
|
||||
if frag:
|
||||
suffix += "#" + frag
|
||||
|
||||
return (urlunsplit(stripped), suffix)
|
||||
|
||||
except ValueError:
|
||||
tty.debug("Got error parsing path %s" % path)
|
||||
return (path, "") # Ignore URL parse errors here
|
||||
|
||||
|
||||
def strip_version_suffixes(path):
|
||||
"""Some tarballs contain extraneous information after the version:
|
||||
|
||||
* ``bowtie2-2.2.5-source``
|
||||
* ``libevent-2.0.21-stable``
|
||||
* ``cuda_8.0.44_linux.run``
|
||||
|
||||
These strings are not part of the version number and should be ignored.
|
||||
This function strips those suffixes off and returns the remaining string.
|
||||
The goal is that the version is always the last thing in ``path``:
|
||||
|
||||
* ``bowtie2-2.2.5``
|
||||
* ``libevent-2.0.21``
|
||||
* ``cuda_8.0.44``
|
||||
|
||||
Args:
|
||||
path (str): The filename or URL for the package
|
||||
|
||||
Returns:
|
||||
str: The ``path`` with any extraneous suffixes removed
|
||||
"""
|
||||
# NOTE: This could be done with complicated regexes in parse_version_offset
|
||||
# NOTE: The problem is that we would have to add these regexes to the end
|
||||
# NOTE: of every single version regex. Easier to just strip them off
|
||||
# NOTE: permanently
|
||||
|
||||
suffix_regexes = [
|
||||
# Download type
|
||||
r"[Ii]nstall",
|
||||
r"all",
|
||||
r"code",
|
||||
r"[Ss]ources?",
|
||||
r"file",
|
||||
r"full",
|
||||
r"single",
|
||||
r"with[a-zA-Z_-]+",
|
||||
r"rock",
|
||||
r"src(_0)?",
|
||||
r"public",
|
||||
r"bin",
|
||||
r"binary",
|
||||
r"run",
|
||||
r"[Uu]niversal",
|
||||
r"jar",
|
||||
r"complete",
|
||||
r"dynamic",
|
||||
r"oss",
|
||||
r"gem",
|
||||
r"tar",
|
||||
r"sh",
|
||||
# Download version
|
||||
r"release",
|
||||
r"bin",
|
||||
r"stable",
|
||||
r"[Ff]inal",
|
||||
r"rel",
|
||||
r"orig",
|
||||
r"dist",
|
||||
r"\+",
|
||||
# License
|
||||
r"gpl",
|
||||
# Arch
|
||||
# Needs to come before and after OS, appears in both orders
|
||||
r"ia32",
|
||||
r"intel",
|
||||
r"amd64",
|
||||
r"linux64",
|
||||
r"x64",
|
||||
r"64bit",
|
||||
r"x86[_-]64",
|
||||
r"i586_64",
|
||||
r"x86",
|
||||
r"i[36]86",
|
||||
r"ppc64(le)?",
|
||||
r"armv?(7l|6l|64)",
|
||||
# Other
|
||||
r"cpp",
|
||||
r"gtk",
|
||||
r"incubating",
|
||||
# OS
|
||||
r"[Ll]inux(_64)?",
|
||||
r"LINUX",
|
||||
r"[Uu]ni?x",
|
||||
r"[Ss]un[Oo][Ss]",
|
||||
r"[Mm]ac[Oo][Ss][Xx]?",
|
||||
r"[Oo][Ss][Xx]",
|
||||
r"[Dd]arwin(64)?",
|
||||
r"[Aa]pple",
|
||||
r"[Ww]indows",
|
||||
r"[Ww]in(64|32)?",
|
||||
r"[Cc]ygwin(64|32)?",
|
||||
r"[Mm]ingw",
|
||||
r"centos",
|
||||
# Arch
|
||||
# Needs to come before and after OS, appears in both orders
|
||||
r"ia32",
|
||||
r"intel",
|
||||
r"amd64",
|
||||
r"linux64",
|
||||
r"x64",
|
||||
r"64bit",
|
||||
r"x86[_-]64",
|
||||
r"i586_64",
|
||||
r"x86",
|
||||
r"i[36]86",
|
||||
r"ppc64(le)?",
|
||||
r"armv?(7l|6l|64)?",
|
||||
# PyPI
|
||||
r"[._-]py[23].*\.whl",
|
||||
r"[._-]cp[23].*\.whl",
|
||||
r"[._-]win.*\.exe",
|
||||
]
|
||||
|
||||
for regex in suffix_regexes:
|
||||
# Remove the suffix from the end of the path
|
||||
# This may be done multiple times
|
||||
path = re.sub(r"[._-]?" + regex + "$", "", path)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def strip_name_suffixes(path, version):
|
||||
@@ -341,69 +117,6 @@ def strip_name_suffixes(path, version):
|
||||
return path
|
||||
|
||||
|
||||
def split_url_extension(path):
|
||||
"""Some URLs have a query string, e.g.:
|
||||
|
||||
1. https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7.tgz?raw=true
|
||||
2. http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin.tar.gz
|
||||
3. https://gitlab.kitware.com/vtk/vtk/repository/archive.tar.bz2?ref=v7.0.0
|
||||
|
||||
In (1), the query string needs to be stripped to get at the
|
||||
extension, but in (2) & (3), the filename is IN a single final query
|
||||
argument.
|
||||
|
||||
This strips the URL into three pieces: ``prefix``, ``ext``, and ``suffix``.
|
||||
The suffix contains anything that was stripped off the URL to
|
||||
get at the file extension. In (1), it will be ``'?raw=true'``, but
|
||||
in (2), it will be empty. In (3) the suffix is a parameter that follows
|
||||
after the file extension, e.g.:
|
||||
|
||||
1. ``('https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7', '.tgz', '?raw=true')``
|
||||
2. ``('http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin', '.tar.gz', None)``
|
||||
3. ``('https://gitlab.kitware.com/vtk/vtk/repository/archive', '.tar.bz2', '?ref=v7.0.0')``
|
||||
"""
|
||||
prefix, ext, suffix = path, "", ""
|
||||
|
||||
# Strip off sourceforge download suffix.
|
||||
# e.g. https://sourceforge.net/projects/glew/files/glew/2.0.0/glew-2.0.0.tgz/download
|
||||
prefix, suffix = spath.find_sourceforge_suffix(path)
|
||||
|
||||
ext = comp.extension_from_path(prefix)
|
||||
if ext is not None:
|
||||
prefix = comp.strip_extension(prefix)
|
||||
|
||||
else:
|
||||
prefix, suf = strip_query_and_fragment(prefix)
|
||||
ext = comp.extension_from_path(prefix)
|
||||
prefix = comp.strip_extension(prefix)
|
||||
suffix = suf + suffix
|
||||
if ext is None:
|
||||
ext = ""
|
||||
|
||||
return prefix, ext, suffix
|
||||
|
||||
|
||||
def determine_url_file_extension(path):
|
||||
"""This returns the type of archive a URL refers to. This is
|
||||
sometimes confusing because of URLs like:
|
||||
|
||||
(1) https://github.com/petdance/ack/tarball/1.93_02
|
||||
|
||||
Where the URL doesn't actually contain the filename. We need
|
||||
to know what type it is so that we can appropriately name files
|
||||
in mirrors.
|
||||
"""
|
||||
match = re.search(r"github.com/.+/(zip|tar)ball/", path)
|
||||
if match:
|
||||
if match.group(1) == "zip":
|
||||
return "zip"
|
||||
elif match.group(1) == "tar":
|
||||
return "tar.gz"
|
||||
|
||||
prefix, ext, suffix = split_url_extension(path)
|
||||
return ext
|
||||
|
||||
|
||||
def parse_version_offset(path):
|
||||
"""Try to extract a version string from a filename or URL.
|
||||
|
||||
@@ -426,13 +139,13 @@ def parse_version_offset(path):
|
||||
# path: The prefix of the URL, everything before the ext and suffix
|
||||
# ext: The file extension
|
||||
# suffix: Any kind of query string that begins with a '?'
|
||||
path, ext, suffix = split_url_extension(path)
|
||||
path, ext, suffix = llnl.url.split_url_extension(path)
|
||||
|
||||
# stem: Everything from path after the final '/'
|
||||
original_stem = os.path.basename(path)
|
||||
|
||||
# Try to strip off anything after the version number
|
||||
stem = strip_version_suffixes(original_stem)
|
||||
stem = llnl.url.strip_version_suffixes(original_stem)
|
||||
|
||||
# Assumptions:
|
||||
#
|
||||
@@ -620,7 +333,7 @@ def parse_name_offset(path, v=None):
|
||||
# path: The prefix of the URL, everything before the ext and suffix
|
||||
# ext: The file extension
|
||||
# suffix: Any kind of query string that begins with a '?'
|
||||
path, ext, suffix = split_url_extension(path)
|
||||
path, ext, suffix = llnl.url.split_url_extension(path)
|
||||
|
||||
# stem: Everything from path after the final '/'
|
||||
original_stem = os.path.basename(path)
|
||||
@@ -735,28 +448,6 @@ def parse_name_and_version(path):
|
||||
return (name, ver)
|
||||
|
||||
|
||||
def insensitize(string):
|
||||
"""Change upper and lowercase letters to be case insensitive in
|
||||
the provided string. e.g., 'a' becomes '[Aa]', 'B' becomes
|
||||
'[bB]', etc. Use for building regexes."""
|
||||
|
||||
def to_ins(match):
|
||||
char = match.group(1)
|
||||
return "[%s%s]" % (char.lower(), char.upper())
|
||||
|
||||
return re.sub(r"([a-zA-Z])", to_ins, string)
|
||||
|
||||
|
||||
def cumsum(elts, init=0, fn=lambda x: x):
|
||||
"""Return cumulative sum of result of fn on each element in elts."""
|
||||
sums = []
|
||||
s = init
|
||||
for i, e in enumerate(elts):
|
||||
sums.append(s)
|
||||
s += fn(e)
|
||||
return sums
|
||||
|
||||
|
||||
def find_all(substring, string):
|
||||
"""Returns a list containing the indices of
|
||||
every occurrence of substring in string."""
|
||||
@@ -912,6 +603,122 @@ def color_url(path, **kwargs):
|
||||
return colorize(out.getvalue())
|
||||
|
||||
|
||||
def find_versions_of_archive(
|
||||
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
|
||||
):
|
||||
"""Scrape web pages for new versions of a tarball. This function prefers URLs in the
|
||||
following order: links found on the scraped page that match a url generated by the
|
||||
reference package, found and in the archive_urls list, found and derived from those
|
||||
in the archive_urls list, and if none are found for a version then the item in the
|
||||
archive_urls list is included for the version.
|
||||
|
||||
Args:
|
||||
archive_urls (str or list or tuple): URL or sequence of URLs for
|
||||
different versions of a package. Typically these are just the
|
||||
tarballs from the package file itself. By default, this searches
|
||||
the parent directories of archives.
|
||||
list_url (str or None): URL for a listing of archives.
|
||||
Spack will scrape these pages for download links that look
|
||||
like the archive URL.
|
||||
list_depth (int): max depth to follow links on list_url pages.
|
||||
Defaults to 0.
|
||||
concurrency (int): maximum number of concurrent requests
|
||||
reference_package (spack.package_base.PackageBase or None): a spack package
|
||||
used as a reference for url detection. Uses the url_for_version
|
||||
method on the package to produce reference urls which, if found,
|
||||
are preferred.
|
||||
"""
|
||||
if not isinstance(archive_urls, (list, tuple)):
|
||||
archive_urls = [archive_urls]
|
||||
|
||||
# Generate a list of list_urls based on archive urls and any
|
||||
# explicitly listed list_url in the package
|
||||
list_urls = set()
|
||||
if list_url is not None:
|
||||
list_urls.add(list_url)
|
||||
for aurl in archive_urls:
|
||||
list_urls |= llnl.url.find_list_urls(aurl)
|
||||
|
||||
# Add '/' to the end of the URL. Some web servers require this.
|
||||
additional_list_urls = set()
|
||||
for lurl in list_urls:
|
||||
if not lurl.endswith("/"):
|
||||
additional_list_urls.add(lurl + "/")
|
||||
list_urls |= additional_list_urls
|
||||
|
||||
# Grab some web pages to scrape.
|
||||
pages, links = spack.util.web.spider(list_urls, depth=list_depth, concurrency=concurrency)
|
||||
|
||||
# Scrape them for archive URLs
|
||||
regexes = []
|
||||
for aurl in archive_urls:
|
||||
# This creates a regex from the URL with a capture group for
|
||||
# the version part of the URL. The capture group is converted
|
||||
# to a generic wildcard, so we can use this to extract things
|
||||
# on a page that look like archive URLs.
|
||||
url_regex = wildcard_version(aurl)
|
||||
|
||||
# We'll be a bit more liberal and just look for the archive
|
||||
# part, not the full path.
|
||||
# this is a URL so it is a posixpath even on Windows
|
||||
url_regex = pathlib.PurePosixPath(url_regex).name
|
||||
|
||||
# We need to add a / to the beginning of the regex to prevent
|
||||
# Spack from picking up similarly named packages like:
|
||||
# https://cran.r-project.org/src/contrib/pls_2.6-0.tar.gz
|
||||
# https://cran.r-project.org/src/contrib/enpls_5.7.tar.gz
|
||||
# https://cran.r-project.org/src/contrib/autopls_1.3.tar.gz
|
||||
# https://cran.r-project.org/src/contrib/matrixpls_1.0.4.tar.gz
|
||||
url_regex = "/" + url_regex
|
||||
|
||||
# We need to add a $ anchor to the end of the regex to prevent
|
||||
# Spack from picking up signature files like:
|
||||
# .asc
|
||||
# .md5
|
||||
# .sha256
|
||||
# .sig
|
||||
# However, SourceForge downloads still need to end in '/download'.
|
||||
url_regex += r"(\/download)?"
|
||||
# PyPI adds #sha256=... to the end of the URL
|
||||
url_regex += "(#sha256=.*)?"
|
||||
url_regex += "$"
|
||||
|
||||
regexes.append(url_regex)
|
||||
|
||||
regexes = [re.compile(r) for r in regexes]
|
||||
# Build a dict version -> URL from any links that match the wildcards.
|
||||
# Walk through archive_url links first.
|
||||
# Any conflicting versions will be overwritten by the list_url links.
|
||||
versions = {}
|
||||
matched = set()
|
||||
for url in sorted(links):
|
||||
url = convert_to_posix_path(url)
|
||||
if any(r.search(url) for r in regexes):
|
||||
try:
|
||||
ver = parse_version(url)
|
||||
if ver in matched:
|
||||
continue
|
||||
versions[ver] = url
|
||||
# prevent this version from getting overwritten
|
||||
if reference_package is not None:
|
||||
if url == reference_package.url_for_version(ver):
|
||||
matched.add(ver)
|
||||
else:
|
||||
extrapolated_urls = [substitute_version(u, ver) for u in archive_urls]
|
||||
if url in extrapolated_urls:
|
||||
matched.add(ver)
|
||||
except UndetectableVersionError:
|
||||
continue
|
||||
|
||||
for url in archive_urls:
|
||||
url = convert_to_posix_path(url)
|
||||
ver = parse_version(url)
|
||||
if ver not in versions:
|
||||
versions[ver] = url
|
||||
|
||||
return versions
|
||||
|
||||
|
||||
class UrlParseError(spack.error.SpackError):
|
||||
"""Raised when the URL module can't parse something correctly."""
|
||||
|
||||
|
@@ -9,27 +9,13 @@
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from itertools import product
|
||||
|
||||
import llnl.url
|
||||
from llnl.util import tty
|
||||
|
||||
import spack.util.path as spath
|
||||
from spack.error import SpackError
|
||||
from spack.util.executable import CommandNotFoundError, which
|
||||
|
||||
# Supported archive extensions.
|
||||
PRE_EXTS = ["tar", "TAR"]
|
||||
EXTS = ["gz", "bz2", "xz", "Z"]
|
||||
NOTAR_EXTS = ["zip", "tgz", "tbz2", "tbz", "txz"]
|
||||
CONTRACTION_MAP = {"tgz": "tar.gz", "txz": "tar.xz", "tbz": "tar.bz2", "tbz2": "tar.bz2"}
|
||||
|
||||
# Add PRE_EXTS and EXTS last so that .tar.gz is matched *before* .tar or .gz
|
||||
ALLOWED_ARCHIVE_TYPES = (
|
||||
[".".join(ext) for ext in product(PRE_EXTS, EXTS)] + PRE_EXTS + EXTS + NOTAR_EXTS
|
||||
)
|
||||
|
||||
ALLOWED_SINGLE_EXT_ARCHIVE_TYPES = PRE_EXTS + EXTS + NOTAR_EXTS
|
||||
|
||||
try:
|
||||
import bz2 # noqa
|
||||
|
||||
@@ -66,10 +52,6 @@ def is_bz2_supported():
|
||||
return _bz2_support
|
||||
|
||||
|
||||
def allowed_archive(path):
|
||||
return False if not path else any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
|
||||
|
||||
|
||||
def _system_untar(archive_file, remove_archive_file=False):
|
||||
"""Returns path to unarchived tar file.
|
||||
Untars archive via system tar.
|
||||
@@ -78,7 +60,7 @@ def _system_untar(archive_file, remove_archive_file=False):
|
||||
archive_file (str): absolute path to the archive to be extracted.
|
||||
Can be one of .tar(.[gz|bz2|xz|Z]) or .(tgz|tbz|tbz2|txz).
|
||||
"""
|
||||
archive_file_no_ext = strip_extension(archive_file)
|
||||
archive_file_no_ext = llnl.url.strip_extension(archive_file)
|
||||
outfile = os.path.basename(archive_file_no_ext)
|
||||
if archive_file_no_ext == archive_file:
|
||||
# the archive file has no extension. Tar on windows cannot untar onto itself
|
||||
@@ -114,7 +96,7 @@ def _bunzip2(archive_file):
|
||||
def _py_bunzip(archive_file):
|
||||
"""Returns path to decompressed file.
|
||||
Decompresses bz2 compressed archives/files via python's bz2 module"""
|
||||
decompressed_file = os.path.basename(strip_compression_extension(archive_file, "bz2"))
|
||||
decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2"))
|
||||
working_dir = os.getcwd()
|
||||
archive_out = os.path.join(working_dir, decompressed_file)
|
||||
f_bz = bz2.BZ2File(archive_file, mode="rb")
|
||||
@@ -128,7 +110,7 @@ def _system_bunzip(archive_file):
|
||||
"""Returns path to decompressed file.
|
||||
Decompresses bz2 compressed archives/files via system bzip2 utility"""
|
||||
compressed_file_name = os.path.basename(archive_file)
|
||||
decompressed_file = os.path.basename(strip_compression_extension(archive_file, "bz2"))
|
||||
decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2"))
|
||||
working_dir = os.getcwd()
|
||||
archive_out = os.path.join(working_dir, decompressed_file)
|
||||
copy_path = os.path.join(working_dir, compressed_file_name)
|
||||
@@ -158,7 +140,7 @@ def _gunzip(archive_file):
|
||||
def _py_gunzip(archive_file):
|
||||
"""Returns path to gunzip'd file
|
||||
Decompresses `.gz` compressed archvies via python gzip module"""
|
||||
decompressed_file = os.path.basename(strip_compression_extension(archive_file, "gz"))
|
||||
decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "gz"))
|
||||
working_dir = os.getcwd()
|
||||
destination_abspath = os.path.join(working_dir, decompressed_file)
|
||||
f_in = gzip.open(archive_file, "rb")
|
||||
@@ -171,7 +153,7 @@ def _py_gunzip(archive_file):
|
||||
def _system_gunzip(archive_file):
|
||||
"""Returns path to gunzip'd file
|
||||
Decompresses `.gz` compressed files via system gzip"""
|
||||
archive_file_no_ext = strip_compression_extension(archive_file)
|
||||
archive_file_no_ext = llnl.url.strip_compression_extension(archive_file)
|
||||
if archive_file_no_ext == archive_file:
|
||||
# the zip file has no extension. On Unix gunzip cannot unzip onto itself
|
||||
archive_file = archive_file + ".gz"
|
||||
@@ -196,7 +178,7 @@ def _unzip(archive_file):
|
||||
Args:
|
||||
archive_file (str): absolute path of the file to be decompressed
|
||||
"""
|
||||
extracted_file = os.path.basename(strip_extension(archive_file, "zip"))
|
||||
extracted_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="zip"))
|
||||
if sys.platform == "win32":
|
||||
return _system_untar(archive_file)
|
||||
else:
|
||||
@@ -259,7 +241,7 @@ def unarchive(archive_file):
|
||||
def _py_lzma(archive_file):
|
||||
"""Returns path to decompressed .xz files
|
||||
Decompress lzma compressed .xz files via python lzma module"""
|
||||
decompressed_file = os.path.basename(strip_compression_extension(archive_file, "xz"))
|
||||
decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "xz"))
|
||||
archive_out = os.path.join(os.getcwd(), decompressed_file)
|
||||
with open(archive_out, "wb") as ar:
|
||||
with lzma.open(archive_file) as lar:
|
||||
@@ -272,7 +254,7 @@ def _xz(archive_file):
|
||||
Decompress lzma compressed .xz files via xz command line
|
||||
tool.
|
||||
"""
|
||||
decompressed_file = os.path.basename(strip_extension(archive_file, "xz"))
|
||||
decompressed_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="xz"))
|
||||
working_dir = os.getcwd()
|
||||
destination_abspath = os.path.join(working_dir, decompressed_file)
|
||||
compressed_file = os.path.basename(archive_file)
|
||||
@@ -297,13 +279,13 @@ def _system_7zip(archive_file):
|
||||
Args:
|
||||
archive_file (str): absolute path of file to be unarchived
|
||||
"""
|
||||
outfile = os.path.basename(strip_compression_extension(archive_file))
|
||||
outfile = os.path.basename(llnl.url.strip_compression_extension(archive_file))
|
||||
_7z = which("7z")
|
||||
if not _7z:
|
||||
raise CommandNotFoundError(
|
||||
"7z unavailable,\
|
||||
unable to extract %s files. 7z can be installed via Spack"
|
||||
% extension_from_path(archive_file)
|
||||
% llnl.url.extension_from_path(archive_file)
|
||||
)
|
||||
_7z.add_default_arg("e")
|
||||
_7z(archive_file)
|
||||
@@ -318,7 +300,7 @@ def decompressor_for(path, extension=None):
|
||||
if not extension:
|
||||
extension = extension_from_file(path, decompress=True)
|
||||
|
||||
if not allowed_archive(extension):
|
||||
if not llnl.url.allowed_archive(extension):
|
||||
raise CommandNotFoundError(
|
||||
"Cannot extract archive, \
|
||||
unrecognized file extension: '%s'"
|
||||
@@ -394,7 +376,7 @@ def decompressor_for_win(extension):
|
||||
path (str): path of the archive file requiring decompression
|
||||
extension (str): extension
|
||||
"""
|
||||
extension = expand_contracted_extension(extension)
|
||||
extension = llnl.url.expand_contracted_extension(extension)
|
||||
# Windows native tar can handle .zip extensions, use standard
|
||||
# unzip method
|
||||
if re.match(r"zip$", extension):
|
||||
@@ -415,7 +397,7 @@ def decompressor_for_win(extension):
|
||||
# python based decompression strategy
|
||||
# Expand extension from contracted extension i.e. tar.gz from .tgz
|
||||
# no-op on non contracted extensions
|
||||
compression_extension = compression_ext_from_compressed_archive(extension)
|
||||
compression_extension = llnl.url.compression_ext_from_compressed_archive(extension)
|
||||
decompressor = _determine_py_decomp_archive_strategy(compression_extension)
|
||||
if not decompressor:
|
||||
raise SpackError(
|
||||
@@ -657,7 +639,7 @@ def extension_from_stream(stream, decompress=False):
|
||||
"Cannot derive file extension from magic number;"
|
||||
" falling back to regex path parsing."
|
||||
)
|
||||
return extension_from_path(stream.name)
|
||||
return llnl.url.extension_from_path(stream.name)
|
||||
resultant_ext = suffix_ext if not prefix_ext else ".".join([prefix_ext, suffix_ext])
|
||||
tty.debug("File extension %s successfully derived by magic number." % resultant_ext)
|
||||
return resultant_ext
|
||||
@@ -693,114 +675,11 @@ def extension_from_file(file, decompress=False):
|
||||
if ext and ext.startswith("tar."):
|
||||
suf = ext.split(".")[1]
|
||||
abbr = "t" + suf
|
||||
if check_extension(file, abbr):
|
||||
if llnl.url.has_extension(file, abbr):
|
||||
return abbr
|
||||
if not ext:
|
||||
# If unable to parse extension from stream,
|
||||
# attempt to fall back to string parsing
|
||||
ext = extension_from_path(file)
|
||||
ext = llnl.url.extension_from_path(file)
|
||||
return ext
|
||||
return None
|
||||
|
||||
|
||||
def extension_from_path(path):
|
||||
"""Returns the allowed archive extension for a path.
|
||||
If path does not include a valid archive extension
|
||||
(see`spack.util.compression.ALLOWED_ARCHIVE_TYPES`) return None
|
||||
"""
|
||||
if path is None:
|
||||
raise ValueError("Can't call extension() on None")
|
||||
|
||||
for t in ALLOWED_ARCHIVE_TYPES:
|
||||
if check_extension(path, t):
|
||||
return t
|
||||
return None
|
||||
|
||||
|
||||
def strip_compression_extension(path, ext=None):
|
||||
"""Returns path with last supported (can be combined with tar) or
|
||||
provided archive extension stripped"""
|
||||
path_ext = extension_from_path(path)
|
||||
if path_ext:
|
||||
path = expand_contracted_extension_in_path(path)
|
||||
exts_to_check = EXTS
|
||||
if ext:
|
||||
exts_to_check = [ext]
|
||||
for ext_check in exts_to_check:
|
||||
mod_path = check_and_remove_ext(path, ext_check)
|
||||
if mod_path != path:
|
||||
return mod_path
|
||||
return path
|
||||
|
||||
|
||||
def strip_extension(path, ext=None):
|
||||
"""Returns the part of a path that does not include extension.
|
||||
If ext is given, only attempts to remove that extension. If no
|
||||
extension given, attempts to strip any valid extension from path"""
|
||||
if ext:
|
||||
return check_and_remove_ext(path, ext)
|
||||
for t in ALLOWED_ARCHIVE_TYPES:
|
||||
mod_path = check_and_remove_ext(path, t)
|
||||
if mod_path != path:
|
||||
return mod_path
|
||||
return path
|
||||
|
||||
|
||||
def check_extension(path, ext):
|
||||
"""Returns true if extension is present in path
|
||||
false otherwise"""
|
||||
# Strip sourceforge suffix.
|
||||
prefix, _ = spath.find_sourceforge_suffix(path)
|
||||
if not ext.startswith(r"\."):
|
||||
ext = r"\.%s$" % ext
|
||||
if re.search(ext, prefix):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def reg_remove_ext(path, ext):
|
||||
"""Returns path with ext remove via regex"""
|
||||
if path and ext:
|
||||
suffix = r"\.%s$" % ext
|
||||
return re.sub(suffix, "", path)
|
||||
return path
|
||||
|
||||
|
||||
def check_and_remove_ext(path, ext):
|
||||
"""Returns path with extension removed if extension
|
||||
is present in path. Otherwise just returns path"""
|
||||
if check_extension(path, ext):
|
||||
return reg_remove_ext(path, ext)
|
||||
return path
|
||||
|
||||
|
||||
def _substitute_extension(path, old_ext, new_ext):
|
||||
"""Returns path with old_ext replaced with new_ext.
|
||||
old_ext and new_ext can be extension strings or regexs"""
|
||||
return re.sub(rf"{old_ext}", rf"{new_ext}", path)
|
||||
|
||||
|
||||
def expand_contracted_extension_in_path(path, ext=None):
|
||||
"""Returns path with any contraction extension (i.e. tgz) expanded
|
||||
(i.e. tar.gz). If ext is specified, only attempt to expand that extension"""
|
||||
if not ext:
|
||||
ext = extension_from_path(path)
|
||||
expanded_ext = expand_contracted_extension(ext)
|
||||
if expanded_ext != ext:
|
||||
return _substitute_extension(path, ext, expanded_ext)
|
||||
return path
|
||||
|
||||
|
||||
def expand_contracted_extension(extension):
|
||||
"""Return expanded version of contracted extension
|
||||
i.e. .tgz -> .tar.gz, no op on non contracted extensions"""
|
||||
extension = extension.strip(".")
|
||||
return CONTRACTION_MAP.get(extension, extension)
|
||||
|
||||
|
||||
def compression_ext_from_compressed_archive(extension):
|
||||
"""Returns compression extension for a compressed archive"""
|
||||
extension = expand_contracted_extension(extension)
|
||||
for ext in [*EXTS]:
|
||||
if ext in extension:
|
||||
return ext
|
||||
|
@@ -10,6 +10,10 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import urllib.parse
|
||||
import urllib.response
|
||||
from urllib.error import URLError
|
||||
from urllib.request import BaseHandler
|
||||
|
||||
import llnl.util.tty as tty
|
||||
|
||||
@@ -222,3 +226,21 @@ def get_blob_headers(self):
|
||||
}
|
||||
|
||||
return headers
|
||||
|
||||
|
||||
def gcs_open(req, *args, **kwargs):
|
||||
"""Open a reader stream to a blob object on GCS"""
|
||||
url = urllib.parse.urlparse(req.get_full_url())
|
||||
gcsblob = GCSBlob(url)
|
||||
|
||||
if not gcsblob.exists():
|
||||
raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
|
||||
stream = gcsblob.get_blob_byte_stream()
|
||||
headers = gcsblob.get_blob_headers()
|
||||
|
||||
return urllib.response.addinfourl(stream, headers, url)
|
||||
|
||||
|
||||
class GCSHandler(BaseHandler):
|
||||
def gs_open(self, req):
|
||||
return gcs_open(req)
|
||||
|
@@ -109,15 +109,6 @@ def win_exe_ext():
|
||||
return ".exe"
|
||||
|
||||
|
||||
def find_sourceforge_suffix(path):
|
||||
"""find and match sourceforge filepath components
|
||||
Return match object"""
|
||||
match = re.search(r"(.*(?:sourceforge\.net|sf\.net)/.*)(/download)$", path)
|
||||
if match:
|
||||
return match.groups()
|
||||
return path, ""
|
||||
|
||||
|
||||
def path_to_os_path(*pths):
|
||||
"""
|
||||
Takes an arbitrary number of positional parameters
|
||||
|
@@ -3,10 +3,13 @@
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
import os
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
from io import BufferedReader, BytesIO, IOBase
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
import spack
|
||||
import spack.config
|
||||
|
||||
#: Map (mirror name, method) tuples to s3 client instances.
|
||||
@@ -114,4 +117,72 @@ def get_mirror_s3_connection_info(mirror, method):
|
||||
if endpoint_url:
|
||||
s3_client_args["endpoint_url"] = _parse_s3_endpoint_url(endpoint_url)
|
||||
|
||||
return (s3_connection, s3_client_args)
|
||||
return s3_connection, s3_client_args
|
||||
|
||||
|
||||
# NOTE(opadron): Workaround issue in boto where its StreamingBody
|
||||
# implementation is missing several APIs expected from IOBase. These missing
|
||||
# APIs prevent the streams returned by boto from being passed as-are along to
|
||||
# urllib.
|
||||
#
|
||||
# https://github.com/boto/botocore/issues/879
|
||||
# https://github.com/python/cpython/pull/3249
|
||||
class WrapStream(BufferedReader):
|
||||
def __init__(self, raw):
|
||||
# In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
|
||||
# only add missing attributes in older versions.
|
||||
# https://github.com/boto/botocore/commit/a624815eabac50442ed7404f3c4f2664cd0aa784
|
||||
if not isinstance(raw, IOBase):
|
||||
raw.readable = lambda: True
|
||||
raw.writable = lambda: False
|
||||
raw.seekable = lambda: False
|
||||
raw.closed = False
|
||||
raw.flush = lambda: None
|
||||
super().__init__(raw)
|
||||
|
||||
def detach(self):
|
||||
self.raw = None
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
return self.raw.read(*args, **kwargs)
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self.raw, key)
|
||||
|
||||
|
||||
def _s3_open(url, method="GET"):
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
s3 = get_s3_session(url, method="fetch")
|
||||
|
||||
bucket = parsed.netloc
|
||||
key = parsed.path
|
||||
|
||||
if key.startswith("/"):
|
||||
key = key[1:]
|
||||
|
||||
if method not in ("GET", "HEAD"):
|
||||
raise urllib.error.URLError(
|
||||
"Only GET and HEAD verbs are currently supported for the s3:// scheme"
|
||||
)
|
||||
|
||||
try:
|
||||
if method == "GET":
|
||||
obj = s3.get_object(Bucket=bucket, Key=key)
|
||||
# NOTE(opadron): Apply workaround here (see above)
|
||||
stream = WrapStream(obj["Body"])
|
||||
elif method == "HEAD":
|
||||
obj = s3.head_object(Bucket=bucket, Key=key)
|
||||
stream = BytesIO()
|
||||
except s3.ClientError as e:
|
||||
raise urllib.error.URLError(e) from e
|
||||
|
||||
headers = obj["ResponseMetadata"]["HTTPHeaders"]
|
||||
|
||||
return url, headers, stream
|
||||
|
||||
|
||||
class UrllibS3Handler(urllib.request.BaseHandler):
|
||||
def s3_open(self, req):
|
||||
orig_url = req.get_full_url()
|
||||
url, headers, stream = _s3_open(orig_url, method=req.get_method())
|
||||
return urllib.response.addinfourl(stream, headers, url)
|
||||
|
@@ -21,23 +21,17 @@
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.request import HTTPSHandler, Request, build_opener
|
||||
|
||||
import llnl.util.lang
|
||||
import llnl.util.tty as tty
|
||||
import llnl.url
|
||||
from llnl.util import lang, tty
|
||||
from llnl.util.filesystem import mkdirp, rename, working_dir
|
||||
|
||||
import spack
|
||||
import spack.config
|
||||
import spack.error
|
||||
import spack.gcs_handler
|
||||
import spack.s3_handler
|
||||
import spack.url
|
||||
import spack.util.crypto
|
||||
import spack.util.gcs as gcs_util
|
||||
import spack.util.s3 as s3_util
|
||||
import spack.util.url as url_util
|
||||
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
|
||||
from spack.util.executable import CommandNotFoundError, which
|
||||
from spack.util.path import convert_to_posix_path
|
||||
|
||||
from .executable import CommandNotFoundError, which
|
||||
from .gcs import GCSBlob, GCSBucket, GCSHandler
|
||||
from .s3 import UrllibS3Handler, get_s3_session
|
||||
|
||||
|
||||
class DetailedHTTPError(HTTPError):
|
||||
@@ -66,8 +60,8 @@ def http_error_default(self, req, fp, code, msg, hdrs):
|
||||
|
||||
|
||||
def _urlopen():
|
||||
s3 = spack.s3_handler.UrllibS3Handler()
|
||||
gcs = spack.gcs_handler.GCSHandler()
|
||||
s3 = UrllibS3Handler()
|
||||
gcs = GCSHandler()
|
||||
error_handler = SpackHTTPDefaultErrorHandler()
|
||||
|
||||
# One opener with HTTPS ssl enabled
|
||||
@@ -90,7 +84,7 @@ def dispatch_open(fullurl, data=None, timeout=None):
|
||||
|
||||
|
||||
#: Dispatches to the correct OpenerDirector.open, based on Spack configuration.
|
||||
urlopen = llnl.util.lang.Singleton(_urlopen)
|
||||
urlopen = lang.Singleton(_urlopen)
|
||||
|
||||
#: User-Agent used in Request objects
|
||||
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
|
||||
@@ -190,14 +184,14 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non
|
||||
while remote_path.startswith("/"):
|
||||
remote_path = remote_path[1:]
|
||||
|
||||
s3 = s3_util.get_s3_session(remote_url, method="push")
|
||||
s3 = get_s3_session(remote_url, method="push")
|
||||
s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args)
|
||||
|
||||
if not keep_original:
|
||||
os.remove(local_file_path)
|
||||
|
||||
elif remote_url.scheme == "gs":
|
||||
gcs = gcs_util.GCSBlob(remote_url)
|
||||
gcs = GCSBlob(remote_url)
|
||||
gcs.upload_to_blob(local_file_path)
|
||||
if not keep_original:
|
||||
os.remove(local_file_path)
|
||||
@@ -427,7 +421,7 @@ def remove_url(url, recursive=False):
|
||||
|
||||
if url.scheme == "s3":
|
||||
# Try to find a mirror for potential connection information
|
||||
s3 = s3_util.get_s3_session(url, method="push")
|
||||
s3 = get_s3_session(url, method="push")
|
||||
bucket = url.netloc
|
||||
if recursive:
|
||||
# Because list_objects_v2 can only return up to 1000 items
|
||||
@@ -460,10 +454,10 @@ def remove_url(url, recursive=False):
|
||||
|
||||
elif url.scheme == "gs":
|
||||
if recursive:
|
||||
bucket = gcs_util.GCSBucket(url)
|
||||
bucket = GCSBucket(url)
|
||||
bucket.destroy(recursive=recursive)
|
||||
else:
|
||||
blob = gcs_util.GCSBlob(url)
|
||||
blob = GCSBlob(url)
|
||||
blob.delete_blob()
|
||||
return
|
||||
|
||||
@@ -538,14 +532,14 @@ def list_url(url, recursive=False):
|
||||
]
|
||||
|
||||
if url.scheme == "s3":
|
||||
s3 = s3_util.get_s3_session(url, method="fetch")
|
||||
s3 = get_s3_session(url, method="fetch")
|
||||
if recursive:
|
||||
return list(_iter_s3_prefix(s3, url))
|
||||
|
||||
return list(set(key.split("/", 1)[0] for key in _iter_s3_prefix(s3, url)))
|
||||
|
||||
elif url.scheme == "gs":
|
||||
gcs = gcs_util.GCSBucket(url)
|
||||
gcs = GCSBucket(url)
|
||||
return gcs.get_all_blobs(recursive=recursive)
|
||||
|
||||
|
||||
@@ -636,7 +630,7 @@ def _spider(url, collect_nested):
|
||||
links.add(abs_link)
|
||||
|
||||
# Skip stuff that looks like an archive
|
||||
if any(raw_link.endswith(s) for s in ALLOWED_ARCHIVE_TYPES):
|
||||
if any(raw_link.endswith(s) for s in llnl.url.ALLOWED_ARCHIVE_TYPES):
|
||||
continue
|
||||
|
||||
# Skip already-visited links
|
||||
@@ -696,7 +690,7 @@ def _spider(url, collect_nested):
|
||||
current_depth, depth, len(spider_args)
|
||||
)
|
||||
)
|
||||
results = tp.map(llnl.util.lang.star(_spider), spider_args)
|
||||
results = tp.map(lang.star(_spider), spider_args)
|
||||
spider_args = []
|
||||
collect = current_depth < depth
|
||||
for sub_pages, sub_links, sub_spider_args in results:
|
||||
@@ -713,123 +707,6 @@ def _spider(url, collect_nested):
|
||||
return pages, links
|
||||
|
||||
|
||||
def find_versions_of_archive(
|
||||
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
|
||||
):
|
||||
"""Scrape web pages for new versions of a tarball. This function prefers URLs in the
|
||||
following order: links found on the scraped page that match a url generated by the
|
||||
reference package, found and in the archive_urls list, found and derived from those
|
||||
in the archive_urls list, and if none are found for a version then the item in the
|
||||
archive_urls list is included for the version.
|
||||
|
||||
Args:
|
||||
archive_urls (str or list or tuple): URL or sequence of URLs for
|
||||
different versions of a package. Typically these are just the
|
||||
tarballs from the package file itself. By default, this searches
|
||||
the parent directories of archives.
|
||||
list_url (str or None): URL for a listing of archives.
|
||||
Spack will scrape these pages for download links that look
|
||||
like the archive URL.
|
||||
list_depth (int): max depth to follow links on list_url pages.
|
||||
Defaults to 0.
|
||||
concurrency (int): maximum number of concurrent requests
|
||||
reference_package (spack.package_base.PackageBase or None): a spack package
|
||||
used as a reference for url detection. Uses the url_for_version
|
||||
method on the package to produce reference urls which, if found,
|
||||
are preferred.
|
||||
"""
|
||||
if not isinstance(archive_urls, (list, tuple)):
|
||||
archive_urls = [archive_urls]
|
||||
|
||||
# Generate a list of list_urls based on archive urls and any
|
||||
# explicitly listed list_url in the package
|
||||
list_urls = set()
|
||||
if list_url is not None:
|
||||
list_urls.add(list_url)
|
||||
for aurl in archive_urls:
|
||||
list_urls |= spack.url.find_list_urls(aurl)
|
||||
|
||||
# Add '/' to the end of the URL. Some web servers require this.
|
||||
additional_list_urls = set()
|
||||
for lurl in list_urls:
|
||||
if not lurl.endswith("/"):
|
||||
additional_list_urls.add(lurl + "/")
|
||||
list_urls |= additional_list_urls
|
||||
|
||||
# Grab some web pages to scrape.
|
||||
pages, links = spider(list_urls, depth=list_depth, concurrency=concurrency)
|
||||
|
||||
# Scrape them for archive URLs
|
||||
regexes = []
|
||||
for aurl in archive_urls:
|
||||
# This creates a regex from the URL with a capture group for
|
||||
# the version part of the URL. The capture group is converted
|
||||
# to a generic wildcard, so we can use this to extract things
|
||||
# on a page that look like archive URLs.
|
||||
url_regex = spack.url.wildcard_version(aurl)
|
||||
|
||||
# We'll be a bit more liberal and just look for the archive
|
||||
# part, not the full path.
|
||||
# this is a URL so it is a posixpath even on Windows
|
||||
url_regex = PurePosixPath(url_regex).name
|
||||
|
||||
# We need to add a / to the beginning of the regex to prevent
|
||||
# Spack from picking up similarly named packages like:
|
||||
# https://cran.r-project.org/src/contrib/pls_2.6-0.tar.gz
|
||||
# https://cran.r-project.org/src/contrib/enpls_5.7.tar.gz
|
||||
# https://cran.r-project.org/src/contrib/autopls_1.3.tar.gz
|
||||
# https://cran.r-project.org/src/contrib/matrixpls_1.0.4.tar.gz
|
||||
url_regex = "/" + url_regex
|
||||
|
||||
# We need to add a $ anchor to the end of the regex to prevent
|
||||
# Spack from picking up signature files like:
|
||||
# .asc
|
||||
# .md5
|
||||
# .sha256
|
||||
# .sig
|
||||
# However, SourceForge downloads still need to end in '/download'.
|
||||
url_regex += r"(\/download)?"
|
||||
# PyPI adds #sha256=... to the end of the URL
|
||||
url_regex += "(#sha256=.*)?"
|
||||
url_regex += "$"
|
||||
|
||||
regexes.append(url_regex)
|
||||
|
||||
# Build a dict version -> URL from any links that match the wildcards.
|
||||
# Walk through archive_url links first.
|
||||
# Any conflicting versions will be overwritten by the list_url links.
|
||||
versions = {}
|
||||
matched = set()
|
||||
for url in sorted(links):
|
||||
url = convert_to_posix_path(url)
|
||||
if any(re.search(r, url) for r in regexes):
|
||||
try:
|
||||
ver = spack.url.parse_version(url)
|
||||
if ver in matched:
|
||||
continue
|
||||
versions[ver] = url
|
||||
# prevent this version from getting overwritten
|
||||
if reference_package is not None:
|
||||
if url == reference_package.url_for_version(ver):
|
||||
matched.add(ver)
|
||||
else:
|
||||
extrapolated_urls = [
|
||||
spack.url.substitute_version(u, ver) for u in archive_urls
|
||||
]
|
||||
if url in extrapolated_urls:
|
||||
matched.add(ver)
|
||||
except spack.url.UndetectableVersionError:
|
||||
continue
|
||||
|
||||
for url in archive_urls:
|
||||
url = convert_to_posix_path(url)
|
||||
ver = spack.url.parse_version(url)
|
||||
if ver not in versions:
|
||||
versions[ver] = url
|
||||
|
||||
return versions
|
||||
|
||||
|
||||
def get_header(headers, header_name):
|
||||
"""Looks up a dict of headers for the given header value.
|
||||
|
||||
|
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import spack.util.web
|
||||
import spack.url
|
||||
from spack.package import *
|
||||
|
||||
|
||||
@@ -120,9 +120,7 @@ def fetch_remote_versions(self, *args, **kwargs):
|
||||
return dict(
|
||||
map(
|
||||
lambda u: (u, self.url_for_version(u)),
|
||||
spack.util.web.find_versions_of_archive(
|
||||
self.all_urls, self.list_url, self.list_depth
|
||||
),
|
||||
spack.url.find_versions_of_archive(self.all_urls, self.list_url, self.list_depth),
|
||||
)
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user