Group primitive url/path handling functions together (#40028)
This commit is contained in:

committed by
GitHub

parent
bc02453f6d
commit
fb9e5fcc4f
459
lib/spack/llnl/url.py
Normal file
459
lib/spack/llnl/url.py
Normal file
@@ -0,0 +1,459 @@
|
|||||||
|
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
|
||||||
|
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
"""URL primitives that just require Python standard library."""
|
||||||
|
import itertools
|
||||||
|
import os.path
|
||||||
|
import re
|
||||||
|
from typing import Optional, Set, Tuple
|
||||||
|
from urllib.parse import urlsplit, urlunsplit
|
||||||
|
|
||||||
|
# Archive extensions allowed in Spack
|
||||||
|
PREFIX_EXTENSIONS = ("tar", "TAR")
|
||||||
|
EXTENSIONS = ("gz", "bz2", "xz", "Z")
|
||||||
|
NO_TAR_EXTENSIONS = ("zip", "tgz", "tbz2", "tbz", "txz")
|
||||||
|
|
||||||
|
# Add PREFIX_EXTENSIONS and EXTENSIONS last so that .tar.gz is matched *before* .tar or .gz
|
||||||
|
ALLOWED_ARCHIVE_TYPES = (
|
||||||
|
tuple(".".join(ext) for ext in itertools.product(PREFIX_EXTENSIONS, EXTENSIONS))
|
||||||
|
+ PREFIX_EXTENSIONS
|
||||||
|
+ EXTENSIONS
|
||||||
|
+ NO_TAR_EXTENSIONS
|
||||||
|
)
|
||||||
|
CONTRACTION_MAP = {"tgz": "tar.gz", "txz": "tar.xz", "tbz": "tar.bz2", "tbz2": "tar.bz2"}
|
||||||
|
|
||||||
|
|
||||||
|
def find_list_urls(url: str) -> Set[str]:
|
||||||
|
r"""Find good list URLs for the supplied URL.
|
||||||
|
|
||||||
|
By default, returns the dirname of the archive path.
|
||||||
|
|
||||||
|
Provides special treatment for the following websites, which have a
|
||||||
|
unique list URL different from the dirname of the download URL:
|
||||||
|
|
||||||
|
========= =======================================================
|
||||||
|
GitHub https://github.com/<repo>/<name>/releases
|
||||||
|
GitLab https://gitlab.\*/<repo>/<name>/tags
|
||||||
|
BitBucket https://bitbucket.org/<repo>/<name>/downloads/?tab=tags
|
||||||
|
CRAN https://\*.r-project.org/src/contrib/Archive/<name>
|
||||||
|
PyPI https://pypi.org/simple/<name>/
|
||||||
|
LuaRocks https://luarocks.org/modules/<repo>/<name>
|
||||||
|
========= =======================================================
|
||||||
|
|
||||||
|
Note: this function is called by `spack versions`, `spack checksum`,
|
||||||
|
and `spack create`, but not by `spack fetch` or `spack install`.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
url (str): The download URL for the package
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
set: One or more list URLs for the package
|
||||||
|
"""
|
||||||
|
|
||||||
|
url_types = [
|
||||||
|
# GitHub
|
||||||
|
# e.g. https://github.com/llnl/callpath/archive/v1.0.1.tar.gz
|
||||||
|
(r"(.*github\.com/[^/]+/[^/]+)", lambda m: m.group(1) + "/releases"),
|
||||||
|
# GitLab API endpoint
|
||||||
|
# e.g. https://gitlab.dkrz.de/api/v4/projects/k202009%2Flibaec/repository/archive.tar.gz?sha=v1.0.2
|
||||||
|
(
|
||||||
|
r"(.*gitlab[^/]+)/api/v4/projects/([^/]+)%2F([^/]+)",
|
||||||
|
lambda m: m.group(1) + "/" + m.group(2) + "/" + m.group(3) + "/tags",
|
||||||
|
),
|
||||||
|
# GitLab non-API endpoint
|
||||||
|
# e.g. https://gitlab.dkrz.de/k202009/libaec/uploads/631e85bcf877c2dcaca9b2e6d6526339/libaec-1.0.0.tar.gz
|
||||||
|
(r"(.*gitlab[^/]+/(?!api/v4/projects)[^/]+/[^/]+)", lambda m: m.group(1) + "/tags"),
|
||||||
|
# BitBucket
|
||||||
|
# e.g. https://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2
|
||||||
|
(r"(.*bitbucket.org/[^/]+/[^/]+)", lambda m: m.group(1) + "/downloads/?tab=tags"),
|
||||||
|
# CRAN
|
||||||
|
# e.g. https://cran.r-project.org/src/contrib/Rcpp_0.12.9.tar.gz
|
||||||
|
# e.g. https://cloud.r-project.org/src/contrib/rgl_0.98.1.tar.gz
|
||||||
|
(
|
||||||
|
r"(.*\.r-project\.org/src/contrib)/([^_]+)",
|
||||||
|
lambda m: m.group(1) + "/Archive/" + m.group(2),
|
||||||
|
),
|
||||||
|
# PyPI
|
||||||
|
# e.g. https://pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
|
||||||
|
# e.g. https://www.pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
|
||||||
|
# e.g. https://pypi.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||||
|
# e.g. https://pypi.python.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||||
|
# e.g. https://files.pythonhosted.org/packages/source/n/numpy/numpy-1.19.4.zip
|
||||||
|
# e.g. https://pypi.io/packages/py2.py3/o/opencensus-context/opencensus_context-0.1.1-py2.py3-none-any.whl
|
||||||
|
(
|
||||||
|
r"(?:pypi|pythonhosted)[^/]+/packages/[^/]+/./([^/]+)",
|
||||||
|
lambda m: "https://pypi.org/simple/" + m.group(1) + "/",
|
||||||
|
),
|
||||||
|
# LuaRocks
|
||||||
|
# e.g. https://luarocks.org/manifests/gvvaughan/lpeg-1.0.2-1.src.rock
|
||||||
|
# e.g. https://luarocks.org/manifests/openresty/lua-cjson-2.1.0-1.src.rock
|
||||||
|
(
|
||||||
|
r"luarocks[^/]+/(?:modules|manifests)/(?P<org>[^/]+)/"
|
||||||
|
+ r"(?P<name>.+?)-[0-9.-]*\.src\.rock",
|
||||||
|
lambda m: "https://luarocks.org/modules/"
|
||||||
|
+ m.group("org")
|
||||||
|
+ "/"
|
||||||
|
+ m.group("name")
|
||||||
|
+ "/",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
list_urls = {os.path.dirname(url)}
|
||||||
|
|
||||||
|
for pattern, fun in url_types:
|
||||||
|
match = re.search(pattern, url)
|
||||||
|
if match:
|
||||||
|
list_urls.add(fun(match))
|
||||||
|
|
||||||
|
return list_urls
|
||||||
|
|
||||||
|
|
||||||
|
def strip_query_and_fragment(url: str) -> Tuple[str, str]:
|
||||||
|
"""Strips query and fragment from a url, then returns the base url and the suffix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: URL to be stripped
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: when there is any error parsing the URL
|
||||||
|
"""
|
||||||
|
components = urlsplit(url)
|
||||||
|
stripped = components[:3] + (None, None)
|
||||||
|
|
||||||
|
query, frag = components[3:5]
|
||||||
|
suffix = ""
|
||||||
|
if query:
|
||||||
|
suffix += "?" + query
|
||||||
|
if frag:
|
||||||
|
suffix += "#" + frag
|
||||||
|
|
||||||
|
return urlunsplit(stripped), suffix
|
||||||
|
|
||||||
|
|
||||||
|
SOURCEFORGE_RE = re.compile(r"(.*(?:sourceforge\.net|sf\.net)/.*)(/download)$")
|
||||||
|
|
||||||
|
|
||||||
|
def split_url_on_sourceforge_suffix(url: str) -> Tuple[str, ...]:
|
||||||
|
"""If the input is a sourceforge URL, returns base URL and "/download" suffix. Otherwise,
|
||||||
|
returns the input URL and an empty string.
|
||||||
|
"""
|
||||||
|
match = SOURCEFORGE_RE.search(url)
|
||||||
|
if match is not None:
|
||||||
|
return match.groups()
|
||||||
|
return url, ""
|
||||||
|
|
||||||
|
|
||||||
|
def has_extension(path_or_url: str, ext: str) -> bool:
|
||||||
|
"""Returns true if the extension in input is present in path, false otherwise."""
|
||||||
|
prefix, _ = split_url_on_sourceforge_suffix(path_or_url)
|
||||||
|
if not ext.startswith(r"\."):
|
||||||
|
ext = rf"\.{ext}$"
|
||||||
|
|
||||||
|
if re.search(ext, prefix):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extension_from_path(path_or_url: Optional[str]) -> Optional[str]:
|
||||||
|
"""Tries to match an allowed archive extension to the input. Returns the first match,
|
||||||
|
or None if no match was found.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: if the input is None
|
||||||
|
"""
|
||||||
|
if path_or_url is None:
|
||||||
|
raise ValueError("Can't call extension() on None")
|
||||||
|
|
||||||
|
for t in ALLOWED_ARCHIVE_TYPES:
|
||||||
|
if has_extension(path_or_url, t):
|
||||||
|
return t
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def remove_extension(path_or_url: str, *, extension: str) -> str:
|
||||||
|
"""Returns the input with the extension removed"""
|
||||||
|
suffix = rf"\.{extension}$"
|
||||||
|
return re.sub(suffix, "", path_or_url)
|
||||||
|
|
||||||
|
|
||||||
|
def check_and_remove_ext(path: str, *, extension: str) -> str:
|
||||||
|
"""Returns the input path with the extension removed, if the extension is present in path.
|
||||||
|
Otherwise, returns the input unchanged.
|
||||||
|
"""
|
||||||
|
if not has_extension(path, extension):
|
||||||
|
return path
|
||||||
|
path, _ = split_url_on_sourceforge_suffix(path)
|
||||||
|
return remove_extension(path, extension=extension)
|
||||||
|
|
||||||
|
|
||||||
|
def strip_extension(path_or_url: str, *, extension: Optional[str] = None) -> str:
|
||||||
|
"""If a path contains the extension in input, returns the path stripped of the extension.
|
||||||
|
Otherwise, returns the input path.
|
||||||
|
|
||||||
|
If extension is None, attempts to strip any allowed extension from path.
|
||||||
|
"""
|
||||||
|
if extension is None:
|
||||||
|
for t in ALLOWED_ARCHIVE_TYPES:
|
||||||
|
if has_extension(path_or_url, ext=t):
|
||||||
|
extension = t
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
return path_or_url
|
||||||
|
|
||||||
|
return check_and_remove_ext(path_or_url, extension=extension)
|
||||||
|
|
||||||
|
|
||||||
|
def split_url_extension(url: str) -> Tuple[str, ...]:
|
||||||
|
"""Some URLs have a query string, e.g.:
|
||||||
|
|
||||||
|
1. https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7.tgz?raw=true
|
||||||
|
2. http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin.tar.gz
|
||||||
|
3. https://gitlab.kitware.com/vtk/vtk/repository/archive.tar.bz2?ref=v7.0.0
|
||||||
|
|
||||||
|
In (1), the query string needs to be stripped to get at the
|
||||||
|
extension, but in (2) & (3), the filename is IN a single final query
|
||||||
|
argument.
|
||||||
|
|
||||||
|
This strips the URL into three pieces: ``prefix``, ``ext``, and ``suffix``.
|
||||||
|
The suffix contains anything that was stripped off the URL to
|
||||||
|
get at the file extension. In (1), it will be ``'?raw=true'``, but
|
||||||
|
in (2), it will be empty. In (3) the suffix is a parameter that follows
|
||||||
|
after the file extension, e.g.:
|
||||||
|
|
||||||
|
1. ``('https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7', '.tgz', '?raw=true')``
|
||||||
|
2. ``('http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin', '.tar.gz', None)``
|
||||||
|
3. ``('https://gitlab.kitware.com/vtk/vtk/repository/archive', '.tar.bz2', '?ref=v7.0.0')``
|
||||||
|
"""
|
||||||
|
# Strip off sourceforge download suffix.
|
||||||
|
# e.g. https://sourceforge.net/projects/glew/files/glew/2.0.0/glew-2.0.0.tgz/download
|
||||||
|
prefix, suffix = split_url_on_sourceforge_suffix(url)
|
||||||
|
|
||||||
|
ext = extension_from_path(prefix)
|
||||||
|
if ext is not None:
|
||||||
|
prefix = strip_extension(prefix)
|
||||||
|
return prefix, ext, suffix
|
||||||
|
|
||||||
|
try:
|
||||||
|
prefix, suf = strip_query_and_fragment(prefix)
|
||||||
|
except ValueError:
|
||||||
|
# FIXME: tty.debug("Got error parsing path %s" % path)
|
||||||
|
# Ignore URL parse errors here
|
||||||
|
return url, ""
|
||||||
|
|
||||||
|
ext = extension_from_path(prefix)
|
||||||
|
prefix = strip_extension(prefix)
|
||||||
|
suffix = suf + suffix
|
||||||
|
if ext is None:
|
||||||
|
ext = ""
|
||||||
|
|
||||||
|
return prefix, ext, suffix
|
||||||
|
|
||||||
|
|
||||||
|
def strip_version_suffixes(path_or_url: str) -> str:
|
||||||
|
"""Some tarballs contain extraneous information after the version:
|
||||||
|
|
||||||
|
* ``bowtie2-2.2.5-source``
|
||||||
|
* ``libevent-2.0.21-stable``
|
||||||
|
* ``cuda_8.0.44_linux.run``
|
||||||
|
|
||||||
|
These strings are not part of the version number and should be ignored.
|
||||||
|
This function strips those suffixes off and returns the remaining string.
|
||||||
|
The goal is that the version is always the last thing in ``path``:
|
||||||
|
|
||||||
|
* ``bowtie2-2.2.5``
|
||||||
|
* ``libevent-2.0.21``
|
||||||
|
* ``cuda_8.0.44``
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path_or_url: The filename or URL for the package
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The ``path`` with any extraneous suffixes removed
|
||||||
|
"""
|
||||||
|
# NOTE: This could be done with complicated regexes in parse_version_offset
|
||||||
|
# NOTE: The problem is that we would have to add these regexes to the end
|
||||||
|
# NOTE: of every single version regex. Easier to just strip them off
|
||||||
|
# NOTE: permanently
|
||||||
|
|
||||||
|
suffix_regexes = [
|
||||||
|
# Download type
|
||||||
|
r"[Ii]nstall",
|
||||||
|
r"all",
|
||||||
|
r"code",
|
||||||
|
r"[Ss]ources?",
|
||||||
|
r"file",
|
||||||
|
r"full",
|
||||||
|
r"single",
|
||||||
|
r"with[a-zA-Z_-]+",
|
||||||
|
r"rock",
|
||||||
|
r"src(_0)?",
|
||||||
|
r"public",
|
||||||
|
r"bin",
|
||||||
|
r"binary",
|
||||||
|
r"run",
|
||||||
|
r"[Uu]niversal",
|
||||||
|
r"jar",
|
||||||
|
r"complete",
|
||||||
|
r"dynamic",
|
||||||
|
r"oss",
|
||||||
|
r"gem",
|
||||||
|
r"tar",
|
||||||
|
r"sh",
|
||||||
|
# Download version
|
||||||
|
r"release",
|
||||||
|
r"bin",
|
||||||
|
r"stable",
|
||||||
|
r"[Ff]inal",
|
||||||
|
r"rel",
|
||||||
|
r"orig",
|
||||||
|
r"dist",
|
||||||
|
r"\+",
|
||||||
|
# License
|
||||||
|
r"gpl",
|
||||||
|
# Arch
|
||||||
|
# Needs to come before and after OS, appears in both orders
|
||||||
|
r"ia32",
|
||||||
|
r"intel",
|
||||||
|
r"amd64",
|
||||||
|
r"linux64",
|
||||||
|
r"x64",
|
||||||
|
r"64bit",
|
||||||
|
r"x86[_-]64",
|
||||||
|
r"i586_64",
|
||||||
|
r"x86",
|
||||||
|
r"i[36]86",
|
||||||
|
r"ppc64(le)?",
|
||||||
|
r"armv?(7l|6l|64)",
|
||||||
|
# Other
|
||||||
|
r"cpp",
|
||||||
|
r"gtk",
|
||||||
|
r"incubating",
|
||||||
|
# OS
|
||||||
|
r"[Ll]inux(_64)?",
|
||||||
|
r"LINUX",
|
||||||
|
r"[Uu]ni?x",
|
||||||
|
r"[Ss]un[Oo][Ss]",
|
||||||
|
r"[Mm]ac[Oo][Ss][Xx]?",
|
||||||
|
r"[Oo][Ss][Xx]",
|
||||||
|
r"[Dd]arwin(64)?",
|
||||||
|
r"[Aa]pple",
|
||||||
|
r"[Ww]indows",
|
||||||
|
r"[Ww]in(64|32)?",
|
||||||
|
r"[Cc]ygwin(64|32)?",
|
||||||
|
r"[Mm]ingw",
|
||||||
|
r"centos",
|
||||||
|
# Arch
|
||||||
|
# Needs to come before and after OS, appears in both orders
|
||||||
|
r"ia32",
|
||||||
|
r"intel",
|
||||||
|
r"amd64",
|
||||||
|
r"linux64",
|
||||||
|
r"x64",
|
||||||
|
r"64bit",
|
||||||
|
r"x86[_-]64",
|
||||||
|
r"i586_64",
|
||||||
|
r"x86",
|
||||||
|
r"i[36]86",
|
||||||
|
r"ppc64(le)?",
|
||||||
|
r"armv?(7l|6l|64)?",
|
||||||
|
# PyPI
|
||||||
|
r"[._-]py[23].*\.whl",
|
||||||
|
r"[._-]cp[23].*\.whl",
|
||||||
|
r"[._-]win.*\.exe",
|
||||||
|
]
|
||||||
|
|
||||||
|
for regex in suffix_regexes:
|
||||||
|
# Remove the suffix from the end of the path
|
||||||
|
# This may be done multiple times
|
||||||
|
path_or_url = re.sub(r"[._-]?" + regex + "$", "", path_or_url)
|
||||||
|
|
||||||
|
return path_or_url
|
||||||
|
|
||||||
|
|
||||||
|
def expand_contracted_extension(extension: str) -> str:
|
||||||
|
"""Returns the expanded version of a known contracted extension.
|
||||||
|
|
||||||
|
This function maps extensions like ".tgz" to ".tar.gz". On unknown extensions,
|
||||||
|
return the input unmodified.
|
||||||
|
"""
|
||||||
|
extension = extension.strip(".")
|
||||||
|
return CONTRACTION_MAP.get(extension, extension)
|
||||||
|
|
||||||
|
|
||||||
|
def expand_contracted_extension_in_path(
|
||||||
|
path_or_url: str, *, extension: Optional[str] = None
|
||||||
|
) -> str:
|
||||||
|
"""Returns the input path or URL with any contraction extension expanded.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path_or_url: path or URL to be expanded
|
||||||
|
extension: if specified, only attempt to expand that extension
|
||||||
|
"""
|
||||||
|
extension = extension or extension_from_path(path_or_url)
|
||||||
|
if extension is None:
|
||||||
|
return path_or_url
|
||||||
|
|
||||||
|
expanded = expand_contracted_extension(extension)
|
||||||
|
if expanded != extension:
|
||||||
|
return re.sub(rf"{extension}", rf"{expanded}", path_or_url)
|
||||||
|
return path_or_url
|
||||||
|
|
||||||
|
|
||||||
|
def compression_ext_from_compressed_archive(extension: str) -> Optional[str]:
|
||||||
|
"""Returns compression extension for a compressed archive"""
|
||||||
|
extension = expand_contracted_extension(extension)
|
||||||
|
for ext in [*EXTENSIONS]:
|
||||||
|
if ext in extension:
|
||||||
|
return ext
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def strip_compression_extension(path_or_url: str, ext: Optional[str] = None) -> str:
|
||||||
|
"""Strips the compression extension from the input, and returns it. For instance,
|
||||||
|
"foo.tgz" becomes "foo.tar".
|
||||||
|
|
||||||
|
If no extension is given, try a default list of extensions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path_or_url: input to be stripped
|
||||||
|
ext: if given, extension to be stripped
|
||||||
|
"""
|
||||||
|
if not extension_from_path(path_or_url):
|
||||||
|
return path_or_url
|
||||||
|
|
||||||
|
expanded_path = expand_contracted_extension_in_path(path_or_url)
|
||||||
|
candidates = [ext] if ext is not None else EXTENSIONS
|
||||||
|
for current_extension in candidates:
|
||||||
|
modified_path = check_and_remove_ext(expanded_path, extension=current_extension)
|
||||||
|
if modified_path != expanded_path:
|
||||||
|
return modified_path
|
||||||
|
return expanded_path
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_archive(path_or_url: str) -> bool:
|
||||||
|
"""Returns true if the input is a valid archive, False otherwise."""
|
||||||
|
return (
|
||||||
|
False if not path_or_url else any(path_or_url.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def determine_url_file_extension(path: str) -> str:
|
||||||
|
"""This returns the type of archive a URL refers to. This is
|
||||||
|
sometimes confusing because of URLs like:
|
||||||
|
|
||||||
|
(1) https://github.com/petdance/ack/tarball/1.93_02
|
||||||
|
|
||||||
|
Where the URL doesn't actually contain the filename. We need
|
||||||
|
to know what type it is so that we can appropriately name files
|
||||||
|
in mirrors.
|
||||||
|
"""
|
||||||
|
match = re.search(r"github.com/.+/(zip|tar)ball/", path)
|
||||||
|
if match:
|
||||||
|
if match.group(1) == "zip":
|
||||||
|
return "zip"
|
||||||
|
elif match.group(1) == "tar":
|
||||||
|
return "tar.gz"
|
||||||
|
|
||||||
|
prefix, ext, suffix = split_url_extension(path)
|
||||||
|
return ext
|
@@ -822,7 +822,7 @@ def get_versions(args, name):
|
|||||||
if args.url is not None and args.template != "bundle" and valid_url:
|
if args.url is not None and args.template != "bundle" and valid_url:
|
||||||
# Find available versions
|
# Find available versions
|
||||||
try:
|
try:
|
||||||
url_dict = spack.util.web.find_versions_of_archive(args.url)
|
url_dict = spack.url.find_versions_of_archive(args.url)
|
||||||
except UndetectableVersionError:
|
except UndetectableVersionError:
|
||||||
# Use fake versions
|
# Use fake versions
|
||||||
tty.warn("Couldn't detect version in: {0}".format(args.url))
|
tty.warn("Couldn't detect version in: {0}".format(args.url))
|
||||||
|
@@ -12,6 +12,7 @@
|
|||||||
import spack.fetch_strategy as fs
|
import spack.fetch_strategy as fs
|
||||||
import spack.repo
|
import spack.repo
|
||||||
import spack.spec
|
import spack.spec
|
||||||
|
import spack.url
|
||||||
import spack.util.crypto as crypto
|
import spack.util.crypto as crypto
|
||||||
from spack.url import (
|
from spack.url import (
|
||||||
UndetectableNameError,
|
UndetectableNameError,
|
||||||
@@ -26,7 +27,6 @@
|
|||||||
substitution_offsets,
|
substitution_offsets,
|
||||||
)
|
)
|
||||||
from spack.util.naming import simplify_name
|
from spack.util.naming import simplify_name
|
||||||
from spack.util.web import find_versions_of_archive
|
|
||||||
|
|
||||||
description = "debugging tool for url parsing"
|
description = "debugging tool for url parsing"
|
||||||
section = "developer"
|
section = "developer"
|
||||||
@@ -139,7 +139,7 @@ def url_parse(args):
|
|||||||
if args.spider:
|
if args.spider:
|
||||||
print()
|
print()
|
||||||
tty.msg("Spidering for versions:")
|
tty.msg("Spidering for versions:")
|
||||||
versions = find_versions_of_archive(url)
|
versions = spack.url.find_versions_of_archive(url)
|
||||||
|
|
||||||
if not versions:
|
if not versions:
|
||||||
print(" Found no versions for {0}".format(name))
|
print(" Found no versions for {0}".format(name))
|
||||||
|
@@ -31,6 +31,7 @@
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import llnl.url
|
||||||
import llnl.util
|
import llnl.util
|
||||||
import llnl.util.filesystem as fs
|
import llnl.util.filesystem as fs
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
@@ -46,7 +47,7 @@
|
|||||||
import spack.util.web as web_util
|
import spack.util.web as web_util
|
||||||
import spack.version
|
import spack.version
|
||||||
import spack.version.git_ref_lookup
|
import spack.version.git_ref_lookup
|
||||||
from spack.util.compression import decompressor_for, extension_from_path
|
from spack.util.compression import decompressor_for
|
||||||
from spack.util.executable import CommandNotFoundError, which
|
from spack.util.executable import CommandNotFoundError, which
|
||||||
from spack.util.string import comma_and, quote
|
from spack.util.string import comma_and, quote
|
||||||
|
|
||||||
@@ -441,7 +442,7 @@ def expand(self):
|
|||||||
|
|
||||||
# TODO: replace this by mime check.
|
# TODO: replace this by mime check.
|
||||||
if not self.extension:
|
if not self.extension:
|
||||||
self.extension = spack.url.determine_url_file_extension(self.url)
|
self.extension = llnl.url.determine_url_file_extension(self.url)
|
||||||
|
|
||||||
if self.stage.expanded:
|
if self.stage.expanded:
|
||||||
tty.debug("Source already staged to %s" % self.stage.source_path)
|
tty.debug("Source already staged to %s" % self.stage.source_path)
|
||||||
@@ -570,7 +571,7 @@ def expand(self):
|
|||||||
|
|
||||||
@_needs_stage
|
@_needs_stage
|
||||||
def archive(self, destination, **kwargs):
|
def archive(self, destination, **kwargs):
|
||||||
assert extension_from_path(destination) == "tar.gz"
|
assert llnl.url.extension_from_path(destination) == "tar.gz"
|
||||||
assert self.stage.source_path.startswith(self.stage.path)
|
assert self.stage.source_path.startswith(self.stage.path)
|
||||||
|
|
||||||
tar = which("tar", required=True)
|
tar = which("tar", required=True)
|
||||||
|
@@ -1,28 +0,0 @@
|
|||||||
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
|
|
||||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
|
||||||
import urllib.parse
|
|
||||||
import urllib.response
|
|
||||||
from urllib.error import URLError
|
|
||||||
from urllib.request import BaseHandler
|
|
||||||
|
|
||||||
|
|
||||||
def gcs_open(req, *args, **kwargs):
|
|
||||||
"""Open a reader stream to a blob object on GCS"""
|
|
||||||
import spack.util.gcs as gcs_util
|
|
||||||
|
|
||||||
url = urllib.parse.urlparse(req.get_full_url())
|
|
||||||
gcsblob = gcs_util.GCSBlob(url)
|
|
||||||
|
|
||||||
if not gcsblob.exists():
|
|
||||||
raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
|
|
||||||
stream = gcsblob.get_blob_byte_stream()
|
|
||||||
headers = gcsblob.get_blob_headers()
|
|
||||||
|
|
||||||
return urllib.response.addinfourl(stream, headers, url)
|
|
||||||
|
|
||||||
|
|
||||||
class GCSHandler(BaseHandler):
|
|
||||||
def gs_open(self, req):
|
|
||||||
return gcs_open(req)
|
|
@@ -30,7 +30,6 @@
|
|||||||
import llnl.util.tty.color as color
|
import llnl.util.tty.color as color
|
||||||
from llnl.util.tty.log import log_output
|
from llnl.util.tty.log import log_output
|
||||||
|
|
||||||
import spack
|
|
||||||
import spack.cmd
|
import spack.cmd
|
||||||
import spack.config
|
import spack.config
|
||||||
import spack.environment as ev
|
import spack.environment as ev
|
||||||
|
@@ -20,6 +20,7 @@
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
|
import llnl.url
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
from llnl.util.filesystem import mkdirp
|
from llnl.util.filesystem import mkdirp
|
||||||
|
|
||||||
@@ -29,7 +30,6 @@
|
|||||||
import spack.fetch_strategy as fs
|
import spack.fetch_strategy as fs
|
||||||
import spack.mirror
|
import spack.mirror
|
||||||
import spack.spec
|
import spack.spec
|
||||||
import spack.url as url
|
|
||||||
import spack.util.path
|
import spack.util.path
|
||||||
import spack.util.spack_json as sjson
|
import spack.util.spack_json as sjson
|
||||||
import spack.util.spack_yaml as syaml
|
import spack.util.spack_yaml as syaml
|
||||||
@@ -375,7 +375,7 @@ def _determine_extension(fetcher):
|
|||||||
if isinstance(fetcher, fs.URLFetchStrategy):
|
if isinstance(fetcher, fs.URLFetchStrategy):
|
||||||
if fetcher.expand_archive:
|
if fetcher.expand_archive:
|
||||||
# If we fetch with a URLFetchStrategy, use URL's archive type
|
# If we fetch with a URLFetchStrategy, use URL's archive type
|
||||||
ext = url.determine_url_file_extension(fetcher.url)
|
ext = llnl.url.determine_url_file_extension(fetcher.url)
|
||||||
|
|
||||||
if ext:
|
if ext:
|
||||||
# Remove any leading dots
|
# Remove any leading dots
|
||||||
|
@@ -2377,7 +2377,7 @@ def fetch_remote_versions(self, concurrency=128):
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return spack.util.web.find_versions_of_archive(
|
return spack.url.find_versions_of_archive(
|
||||||
self.all_urls, self.list_url, self.list_depth, concurrency, reference_package=self
|
self.all_urls, self.list_url, self.list_depth, concurrency, reference_package=self
|
||||||
)
|
)
|
||||||
except spack.util.web.NoNetworkConnectionError as e:
|
except spack.util.web.NoNetworkConnectionError as e:
|
||||||
|
@@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
import llnl.util.filesystem
|
import llnl.util.filesystem
|
||||||
import llnl.util.lang
|
import llnl.util.lang
|
||||||
|
from llnl.url import allowed_archive
|
||||||
|
|
||||||
import spack
|
import spack
|
||||||
import spack.error
|
import spack.error
|
||||||
@@ -19,7 +20,6 @@
|
|||||||
import spack.repo
|
import spack.repo
|
||||||
import spack.stage
|
import spack.stage
|
||||||
import spack.util.spack_json as sjson
|
import spack.util.spack_json as sjson
|
||||||
from spack.util.compression import allowed_archive
|
|
||||||
from spack.util.crypto import Checker, checksum
|
from spack.util.crypto import Checker, checksum
|
||||||
from spack.util.executable import which, which_string
|
from spack.util.executable import which, which_string
|
||||||
|
|
||||||
|
@@ -1,80 +0,0 @@
|
|||||||
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
|
|
||||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
|
||||||
|
|
||||||
import urllib.error
|
|
||||||
import urllib.parse
|
|
||||||
import urllib.request
|
|
||||||
import urllib.response
|
|
||||||
from io import BufferedReader, BytesIO, IOBase
|
|
||||||
|
|
||||||
import spack.util.s3 as s3_util
|
|
||||||
|
|
||||||
|
|
||||||
# NOTE(opadron): Workaround issue in boto where its StreamingBody
|
|
||||||
# implementation is missing several APIs expected from IOBase. These missing
|
|
||||||
# APIs prevent the streams returned by boto from being passed as-are along to
|
|
||||||
# urllib.
|
|
||||||
#
|
|
||||||
# https://github.com/boto/botocore/issues/879
|
|
||||||
# https://github.com/python/cpython/pull/3249
|
|
||||||
class WrapStream(BufferedReader):
|
|
||||||
def __init__(self, raw):
|
|
||||||
# In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
|
|
||||||
# only add missing attributes in older versions.
|
|
||||||
# https://github.com/boto/botocore/commit/a624815eabac50442ed7404f3c4f2664cd0aa784
|
|
||||||
if not isinstance(raw, IOBase):
|
|
||||||
raw.readable = lambda: True
|
|
||||||
raw.writable = lambda: False
|
|
||||||
raw.seekable = lambda: False
|
|
||||||
raw.closed = False
|
|
||||||
raw.flush = lambda: None
|
|
||||||
super().__init__(raw)
|
|
||||||
|
|
||||||
def detach(self):
|
|
||||||
self.raw = None
|
|
||||||
|
|
||||||
def read(self, *args, **kwargs):
|
|
||||||
return self.raw.read(*args, **kwargs)
|
|
||||||
|
|
||||||
def __getattr__(self, key):
|
|
||||||
return getattr(self.raw, key)
|
|
||||||
|
|
||||||
|
|
||||||
def _s3_open(url, method="GET"):
|
|
||||||
parsed = urllib.parse.urlparse(url)
|
|
||||||
s3 = s3_util.get_s3_session(url, method="fetch")
|
|
||||||
|
|
||||||
bucket = parsed.netloc
|
|
||||||
key = parsed.path
|
|
||||||
|
|
||||||
if key.startswith("/"):
|
|
||||||
key = key[1:]
|
|
||||||
|
|
||||||
if method not in ("GET", "HEAD"):
|
|
||||||
raise urllib.error.URLError(
|
|
||||||
"Only GET and HEAD verbs are currently supported for the s3:// scheme"
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if method == "GET":
|
|
||||||
obj = s3.get_object(Bucket=bucket, Key=key)
|
|
||||||
# NOTE(opadron): Apply workaround here (see above)
|
|
||||||
stream = WrapStream(obj["Body"])
|
|
||||||
elif method == "HEAD":
|
|
||||||
obj = s3.head_object(Bucket=bucket, Key=key)
|
|
||||||
stream = BytesIO()
|
|
||||||
except s3.ClientError as e:
|
|
||||||
raise urllib.error.URLError(e) from e
|
|
||||||
|
|
||||||
headers = obj["ResponseMetadata"]["HTTPHeaders"]
|
|
||||||
|
|
||||||
return url, headers, stream
|
|
||||||
|
|
||||||
|
|
||||||
class UrllibS3Handler(urllib.request.BaseHandler):
|
|
||||||
def s3_open(self, req):
|
|
||||||
orig_url = req.get_full_url()
|
|
||||||
url, headers, stream = _s3_open(orig_url, method=req.get_method())
|
|
||||||
return urllib.response.addinfourl(stream, headers, url)
|
|
167
lib/spack/spack/test/llnl/url.py
Normal file
167
lib/spack/spack/test/llnl/url.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
# Copyright 2013-2023 Lawrence Livermore National Security, LLC and other
|
||||||
|
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
"""Tests for llnl.url functions"""
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import llnl.url
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(params=llnl.url.ALLOWED_ARCHIVE_TYPES)
|
||||||
|
def archive_and_expected(request):
|
||||||
|
archive_name = ".".join(["Foo", request.param])
|
||||||
|
return archive_name, request.param
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_extension(archive_and_expected):
|
||||||
|
"""Tests that we can predict correctly known extensions for simple cases."""
|
||||||
|
archive, expected = archive_and_expected
|
||||||
|
result = llnl.url.extension_from_path(archive)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_bad_extension():
|
||||||
|
"""Tests that a bad extension returns None"""
|
||||||
|
result = llnl.url.extension_from_path("Foo.cxx")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"url,expected",
|
||||||
|
[
|
||||||
|
# No suffix
|
||||||
|
("rgb-1.0.6", "rgb-1.0.6"),
|
||||||
|
# Misleading prefix
|
||||||
|
("jpegsrc.v9b", "jpegsrc.v9b"),
|
||||||
|
("turbolinux702", "turbolinux702"),
|
||||||
|
("converge_install_2.3.16", "converge_install_2.3.16"),
|
||||||
|
# Download type - code, source
|
||||||
|
("cistem-1.0.0-beta-source-code", "cistem-1.0.0-beta"),
|
||||||
|
# Download type - src
|
||||||
|
("apache-ant-1.9.7-src", "apache-ant-1.9.7"),
|
||||||
|
("go1.7.4.src", "go1.7.4"),
|
||||||
|
# Download type - source
|
||||||
|
("bowtie2-2.2.5-source", "bowtie2-2.2.5"),
|
||||||
|
("grib_api-1.17.0-Source", "grib_api-1.17.0"),
|
||||||
|
# Download type - full
|
||||||
|
("julia-0.4.3-full", "julia-0.4.3"),
|
||||||
|
# Download type - bin
|
||||||
|
("apache-maven-3.3.9-bin", "apache-maven-3.3.9"),
|
||||||
|
# Download type - binary
|
||||||
|
("Jmol-14.8.0-binary", "Jmol-14.8.0"),
|
||||||
|
# Download type - gem
|
||||||
|
("rubysl-date-2.0.9.gem", "rubysl-date-2.0.9"),
|
||||||
|
# Download type - tar
|
||||||
|
("gromacs-4.6.1-tar", "gromacs-4.6.1"),
|
||||||
|
# Download type - sh
|
||||||
|
("Miniconda2-4.3.11-Linux-x86_64.sh", "Miniconda2-4.3.11"),
|
||||||
|
# Download version - release
|
||||||
|
("v1.0.4-release", "v1.0.4"),
|
||||||
|
# Download version - stable
|
||||||
|
("libevent-2.0.21-stable", "libevent-2.0.21"),
|
||||||
|
# Download version - final
|
||||||
|
("2.6.7-final", "2.6.7"),
|
||||||
|
# Download version - rel
|
||||||
|
("v1.9.5.1rel", "v1.9.5.1"),
|
||||||
|
# Download version - orig
|
||||||
|
("dash_0.5.5.1.orig", "dash_0.5.5.1"),
|
||||||
|
# Download version - plus
|
||||||
|
("ncbi-blast-2.6.0+-src", "ncbi-blast-2.6.0"),
|
||||||
|
# License
|
||||||
|
("cppad-20170114.gpl", "cppad-20170114"),
|
||||||
|
# Arch
|
||||||
|
("pcraster-4.1.0_x86-64", "pcraster-4.1.0"),
|
||||||
|
("dislin-11.0.linux.i586_64", "dislin-11.0"),
|
||||||
|
("PAGIT.V1.01.64bit", "PAGIT.V1.01"),
|
||||||
|
# OS - linux
|
||||||
|
("astyle_2.04_linux", "astyle_2.04"),
|
||||||
|
# OS - unix
|
||||||
|
("install-tl-unx", "install-tl"),
|
||||||
|
# OS - macos
|
||||||
|
("astyle_1.23_macosx", "astyle_1.23"),
|
||||||
|
("haxe-2.08-osx", "haxe-2.08"),
|
||||||
|
# PyPI - wheel
|
||||||
|
("entrypoints-0.2.2-py2.py3-none-any.whl", "entrypoints-0.2.2"),
|
||||||
|
(
|
||||||
|
"numpy-1.12.0-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel."
|
||||||
|
"macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
|
||||||
|
"numpy-1.12.0",
|
||||||
|
),
|
||||||
|
# PyPI - exe
|
||||||
|
("PyYAML-3.12.win-amd64-py3.5.exe", "PyYAML-3.12"),
|
||||||
|
# Combinations of multiple patterns - bin, release
|
||||||
|
("rocketmq-all-4.5.2-bin-release", "rocketmq-all-4.5.2"),
|
||||||
|
# Combinations of multiple patterns - all
|
||||||
|
("p7zip_9.04_src_all", "p7zip_9.04"),
|
||||||
|
# Combinations of multiple patterns - run
|
||||||
|
("cuda_8.0.44_linux.run", "cuda_8.0.44"),
|
||||||
|
# Combinations of multiple patterns - file
|
||||||
|
("ack-2.14-single-file", "ack-2.14"),
|
||||||
|
# Combinations of multiple patterns - jar
|
||||||
|
("antlr-3.4-complete.jar", "antlr-3.4"),
|
||||||
|
# Combinations of multiple patterns - oss
|
||||||
|
("tbb44_20160128oss_src_0", "tbb44_20160128"),
|
||||||
|
# Combinations of multiple patterns - darwin
|
||||||
|
("ghc-7.0.4-x86_64-apple-darwin", "ghc-7.0.4"),
|
||||||
|
("ghc-7.0.4-i386-apple-darwin", "ghc-7.0.4"),
|
||||||
|
# Combinations of multiple patterns - centos
|
||||||
|
("sratoolkit.2.8.2-1-centos_linux64", "sratoolkit.2.8.2-1"),
|
||||||
|
# Combinations of multiple patterns - arch
|
||||||
|
(
|
||||||
|
"VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install",
|
||||||
|
"VizGlow_v2.2alpha17-R21November2016",
|
||||||
|
),
|
||||||
|
("jdk-8u92-linux-x64", "jdk-8u92"),
|
||||||
|
("cuda_6.5.14_linux_64.run", "cuda_6.5.14"),
|
||||||
|
("Mathematica_12.0.0_LINUX.sh", "Mathematica_12.0.0"),
|
||||||
|
("trf407b.linux64", "trf407b"),
|
||||||
|
# Combinations of multiple patterns - with
|
||||||
|
("mafft-7.221-with-extensions-src", "mafft-7.221"),
|
||||||
|
("spark-2.0.0-bin-without-hadoop", "spark-2.0.0"),
|
||||||
|
("conduit-v0.3.0-src-with-blt", "conduit-v0.3.0"),
|
||||||
|
# Combinations of multiple patterns - rock
|
||||||
|
("bitlib-23-2.src.rock", "bitlib-23-2"),
|
||||||
|
# Combinations of multiple patterns - public
|
||||||
|
("dakota-6.3-public.src", "dakota-6.3"),
|
||||||
|
# Combinations of multiple patterns - universal
|
||||||
|
("synergy-1.3.6p2-MacOSX-Universal", "synergy-1.3.6p2"),
|
||||||
|
# Combinations of multiple patterns - dynamic
|
||||||
|
("snptest_v2.5.2_linux_x86_64_dynamic", "snptest_v2.5.2"),
|
||||||
|
# Combinations of multiple patterns - other
|
||||||
|
("alglib-3.11.0.cpp.gpl", "alglib-3.11.0"),
|
||||||
|
("hpcviewer-2019.08-linux.gtk.x86_64", "hpcviewer-2019.08"),
|
||||||
|
("apache-mxnet-src-1.3.0-incubating", "apache-mxnet-src-1.3.0"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_url_strip_version_suffixes(url, expected):
|
||||||
|
stripped = llnl.url.strip_version_suffixes(url)
|
||||||
|
assert stripped == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_compression_extension(archive_and_expected):
|
||||||
|
archive, extension = archive_and_expected
|
||||||
|
stripped = llnl.url.strip_compression_extension(archive)
|
||||||
|
if extension == "zip":
|
||||||
|
assert stripped == "Foo.zip"
|
||||||
|
stripped = llnl.url.strip_compression_extension(archive, "zip")
|
||||||
|
assert stripped == "Foo"
|
||||||
|
elif (
|
||||||
|
extension.lower() == "tar"
|
||||||
|
or extension in llnl.url.CONTRACTION_MAP
|
||||||
|
or extension
|
||||||
|
in [
|
||||||
|
".".join(ext)
|
||||||
|
for ext in itertools.product(llnl.url.PREFIX_EXTENSIONS, llnl.url.EXTENSIONS)
|
||||||
|
]
|
||||||
|
):
|
||||||
|
assert stripped == "Foo.tar" or stripped == "Foo.TAR"
|
||||||
|
else:
|
||||||
|
assert stripped == "Foo"
|
||||||
|
|
||||||
|
|
||||||
|
def test_allowed_archive(archive_and_expected):
|
||||||
|
archive, _ = archive_and_expected
|
||||||
|
assert llnl.url.allowed_archive(archive)
|
@@ -17,124 +17,11 @@
|
|||||||
parse_name_offset,
|
parse_name_offset,
|
||||||
parse_version_offset,
|
parse_version_offset,
|
||||||
strip_name_suffixes,
|
strip_name_suffixes,
|
||||||
strip_version_suffixes,
|
|
||||||
substitute_version,
|
substitute_version,
|
||||||
)
|
)
|
||||||
from spack.version import Version
|
from spack.version import Version
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"url,expected",
|
|
||||||
[
|
|
||||||
# No suffix
|
|
||||||
("rgb-1.0.6", "rgb-1.0.6"),
|
|
||||||
# Misleading prefix
|
|
||||||
("jpegsrc.v9b", "jpegsrc.v9b"),
|
|
||||||
("turbolinux702", "turbolinux702"),
|
|
||||||
("converge_install_2.3.16", "converge_install_2.3.16"),
|
|
||||||
# Download type - code, source
|
|
||||||
("cistem-1.0.0-beta-source-code", "cistem-1.0.0-beta"),
|
|
||||||
# Download type - src
|
|
||||||
("apache-ant-1.9.7-src", "apache-ant-1.9.7"),
|
|
||||||
("go1.7.4.src", "go1.7.4"),
|
|
||||||
# Download type - source
|
|
||||||
("bowtie2-2.2.5-source", "bowtie2-2.2.5"),
|
|
||||||
("grib_api-1.17.0-Source", "grib_api-1.17.0"),
|
|
||||||
# Download type - full
|
|
||||||
("julia-0.4.3-full", "julia-0.4.3"),
|
|
||||||
# Download type - bin
|
|
||||||
("apache-maven-3.3.9-bin", "apache-maven-3.3.9"),
|
|
||||||
# Download type - binary
|
|
||||||
("Jmol-14.8.0-binary", "Jmol-14.8.0"),
|
|
||||||
# Download type - gem
|
|
||||||
("rubysl-date-2.0.9.gem", "rubysl-date-2.0.9"),
|
|
||||||
# Download type - tar
|
|
||||||
("gromacs-4.6.1-tar", "gromacs-4.6.1"),
|
|
||||||
# Download type - sh
|
|
||||||
("Miniconda2-4.3.11-Linux-x86_64.sh", "Miniconda2-4.3.11"),
|
|
||||||
# Download version - release
|
|
||||||
("v1.0.4-release", "v1.0.4"),
|
|
||||||
# Download version - stable
|
|
||||||
("libevent-2.0.21-stable", "libevent-2.0.21"),
|
|
||||||
# Download version - final
|
|
||||||
("2.6.7-final", "2.6.7"),
|
|
||||||
# Download version - rel
|
|
||||||
("v1.9.5.1rel", "v1.9.5.1"),
|
|
||||||
# Download version - orig
|
|
||||||
("dash_0.5.5.1.orig", "dash_0.5.5.1"),
|
|
||||||
# Download version - plus
|
|
||||||
("ncbi-blast-2.6.0+-src", "ncbi-blast-2.6.0"),
|
|
||||||
# License
|
|
||||||
("cppad-20170114.gpl", "cppad-20170114"),
|
|
||||||
# Arch
|
|
||||||
("pcraster-4.1.0_x86-64", "pcraster-4.1.0"),
|
|
||||||
("dislin-11.0.linux.i586_64", "dislin-11.0"),
|
|
||||||
("PAGIT.V1.01.64bit", "PAGIT.V1.01"),
|
|
||||||
# OS - linux
|
|
||||||
("astyle_2.04_linux", "astyle_2.04"),
|
|
||||||
# OS - unix
|
|
||||||
("install-tl-unx", "install-tl"),
|
|
||||||
# OS - macos
|
|
||||||
("astyle_1.23_macosx", "astyle_1.23"),
|
|
||||||
("haxe-2.08-osx", "haxe-2.08"),
|
|
||||||
# PyPI - wheel
|
|
||||||
("entrypoints-0.2.2-py2.py3-none-any.whl", "entrypoints-0.2.2"),
|
|
||||||
(
|
|
||||||
"numpy-1.12.0-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel."
|
|
||||||
"macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
|
|
||||||
"numpy-1.12.0",
|
|
||||||
),
|
|
||||||
# PyPI - exe
|
|
||||||
("PyYAML-3.12.win-amd64-py3.5.exe", "PyYAML-3.12"),
|
|
||||||
# Combinations of multiple patterns - bin, release
|
|
||||||
("rocketmq-all-4.5.2-bin-release", "rocketmq-all-4.5.2"),
|
|
||||||
# Combinations of multiple patterns - all
|
|
||||||
("p7zip_9.04_src_all", "p7zip_9.04"),
|
|
||||||
# Combinations of multiple patterns - run
|
|
||||||
("cuda_8.0.44_linux.run", "cuda_8.0.44"),
|
|
||||||
# Combinations of multiple patterns - file
|
|
||||||
("ack-2.14-single-file", "ack-2.14"),
|
|
||||||
# Combinations of multiple patterns - jar
|
|
||||||
("antlr-3.4-complete.jar", "antlr-3.4"),
|
|
||||||
# Combinations of multiple patterns - oss
|
|
||||||
("tbb44_20160128oss_src_0", "tbb44_20160128"),
|
|
||||||
# Combinations of multiple patterns - darwin
|
|
||||||
("ghc-7.0.4-x86_64-apple-darwin", "ghc-7.0.4"),
|
|
||||||
("ghc-7.0.4-i386-apple-darwin", "ghc-7.0.4"),
|
|
||||||
# Combinations of multiple patterns - centos
|
|
||||||
("sratoolkit.2.8.2-1-centos_linux64", "sratoolkit.2.8.2-1"),
|
|
||||||
# Combinations of multiple patterns - arch
|
|
||||||
(
|
|
||||||
"VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install",
|
|
||||||
"VizGlow_v2.2alpha17-R21November2016",
|
|
||||||
),
|
|
||||||
("jdk-8u92-linux-x64", "jdk-8u92"),
|
|
||||||
("cuda_6.5.14_linux_64.run", "cuda_6.5.14"),
|
|
||||||
("Mathematica_12.0.0_LINUX.sh", "Mathematica_12.0.0"),
|
|
||||||
("trf407b.linux64", "trf407b"),
|
|
||||||
# Combinations of multiple patterns - with
|
|
||||||
("mafft-7.221-with-extensions-src", "mafft-7.221"),
|
|
||||||
("spark-2.0.0-bin-without-hadoop", "spark-2.0.0"),
|
|
||||||
("conduit-v0.3.0-src-with-blt", "conduit-v0.3.0"),
|
|
||||||
# Combinations of multiple patterns - rock
|
|
||||||
("bitlib-23-2.src.rock", "bitlib-23-2"),
|
|
||||||
# Combinations of multiple patterns - public
|
|
||||||
("dakota-6.3-public.src", "dakota-6.3"),
|
|
||||||
# Combinations of multiple patterns - universal
|
|
||||||
("synergy-1.3.6p2-MacOSX-Universal", "synergy-1.3.6p2"),
|
|
||||||
# Combinations of multiple patterns - dynamic
|
|
||||||
("snptest_v2.5.2_linux_x86_64_dynamic", "snptest_v2.5.2"),
|
|
||||||
# Combinations of multiple patterns - other
|
|
||||||
("alglib-3.11.0.cpp.gpl", "alglib-3.11.0"),
|
|
||||||
("hpcviewer-2019.08-linux.gtk.x86_64", "hpcviewer-2019.08"),
|
|
||||||
("apache-mxnet-src-1.3.0-incubating", "apache-mxnet-src-1.3.0"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_url_strip_version_suffixes(url, expected):
|
|
||||||
stripped = strip_version_suffixes(url)
|
|
||||||
assert stripped == expected
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"url,version,expected",
|
"url,version,expected",
|
||||||
[
|
[
|
||||||
|
@@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
import llnl.url
|
||||||
from llnl.util.filesystem import working_dir
|
from llnl.util.filesystem import working_dir
|
||||||
|
|
||||||
from spack.paths import spack_root
|
from spack.paths import spack_root
|
||||||
@@ -21,7 +22,7 @@
|
|||||||
ext_archive = {}
|
ext_archive = {}
|
||||||
[
|
[
|
||||||
ext_archive.update({ext: ".".join(["Foo", ext])})
|
ext_archive.update({ext: ".".join(["Foo", ext])})
|
||||||
for ext in scomp.ALLOWED_ARCHIVE_TYPES
|
for ext in llnl.url.ALLOWED_ARCHIVE_TYPES
|
||||||
if "TAR" not in ext
|
if "TAR" not in ext
|
||||||
]
|
]
|
||||||
# Spack does not use Python native handling for tarballs or zip
|
# Spack does not use Python native handling for tarballs or zip
|
||||||
@@ -95,38 +96,3 @@ def test_unallowed_extension():
|
|||||||
bad_ext_archive = "Foo.cxx"
|
bad_ext_archive = "Foo.cxx"
|
||||||
with pytest.raises(CommandNotFoundError):
|
with pytest.raises(CommandNotFoundError):
|
||||||
scomp.decompressor_for(bad_ext_archive)
|
scomp.decompressor_for(bad_ext_archive)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("archive", ext_archive.values())
|
|
||||||
def test_get_extension(archive):
|
|
||||||
ext = scomp.extension_from_path(archive)
|
|
||||||
assert ext_archive[ext] == archive
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_bad_extension():
|
|
||||||
archive = "Foo.cxx"
|
|
||||||
ext = scomp.extension_from_path(archive)
|
|
||||||
assert ext is None
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("path", ext_archive.values())
|
|
||||||
def test_allowed_archive(path):
|
|
||||||
assert scomp.allowed_archive(path)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("ext_path", ext_archive.items())
|
|
||||||
def test_strip_compression_extension(ext_path):
|
|
||||||
ext, path = ext_path
|
|
||||||
stripped = scomp.strip_compression_extension(path)
|
|
||||||
if ext == "zip":
|
|
||||||
assert stripped == "Foo.zip"
|
|
||||||
stripped = scomp.strip_compression_extension(path, "zip")
|
|
||||||
assert stripped == "Foo"
|
|
||||||
elif (
|
|
||||||
ext == "tar"
|
|
||||||
or ext in scomp.CONTRACTION_MAP.keys()
|
|
||||||
or ext in [".".join(ext) for ext in product(scomp.PRE_EXTS, scomp.EXTS)]
|
|
||||||
):
|
|
||||||
assert stripped == "Foo.tar" or stripped == "Foo.TAR"
|
|
||||||
else:
|
|
||||||
assert stripped == "Foo"
|
|
||||||
|
@@ -15,6 +15,7 @@
|
|||||||
import spack.config
|
import spack.config
|
||||||
import spack.mirror
|
import spack.mirror
|
||||||
import spack.paths
|
import spack.paths
|
||||||
|
import spack.url
|
||||||
import spack.util.path
|
import spack.util.path
|
||||||
import spack.util.s3
|
import spack.util.s3
|
||||||
import spack.util.url as url_util
|
import spack.util.url as url_util
|
||||||
@@ -102,31 +103,31 @@ def test_spider_no_response(monkeypatch):
|
|||||||
|
|
||||||
|
|
||||||
def test_find_versions_of_archive_0():
|
def test_find_versions_of_archive_0():
|
||||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=0)
|
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=0)
|
||||||
assert Version("0.0.0") in versions
|
assert Version("0.0.0") in versions
|
||||||
|
|
||||||
|
|
||||||
def test_find_versions_of_archive_1():
|
def test_find_versions_of_archive_1():
|
||||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=1)
|
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=1)
|
||||||
assert Version("0.0.0") in versions
|
assert Version("0.0.0") in versions
|
||||||
assert Version("1.0.0") in versions
|
assert Version("1.0.0") in versions
|
||||||
|
|
||||||
|
|
||||||
def test_find_versions_of_archive_2():
|
def test_find_versions_of_archive_2():
|
||||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=2)
|
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=2)
|
||||||
assert Version("0.0.0") in versions
|
assert Version("0.0.0") in versions
|
||||||
assert Version("1.0.0") in versions
|
assert Version("1.0.0") in versions
|
||||||
assert Version("2.0.0") in versions
|
assert Version("2.0.0") in versions
|
||||||
|
|
||||||
|
|
||||||
def test_find_exotic_versions_of_archive_2():
|
def test_find_exotic_versions_of_archive_2():
|
||||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=2)
|
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=2)
|
||||||
# up for grabs to make this better.
|
# up for grabs to make this better.
|
||||||
assert Version("2.0.0b2") in versions
|
assert Version("2.0.0b2") in versions
|
||||||
|
|
||||||
|
|
||||||
def test_find_versions_of_archive_3():
|
def test_find_versions_of_archive_3():
|
||||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=3)
|
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=3)
|
||||||
assert Version("0.0.0") in versions
|
assert Version("0.0.0") in versions
|
||||||
assert Version("1.0.0") in versions
|
assert Version("1.0.0") in versions
|
||||||
assert Version("2.0.0") in versions
|
assert Version("2.0.0") in versions
|
||||||
@@ -135,16 +136,14 @@ def test_find_versions_of_archive_3():
|
|||||||
|
|
||||||
|
|
||||||
def test_find_exotic_versions_of_archive_3():
|
def test_find_exotic_versions_of_archive_3():
|
||||||
versions = spack.util.web.find_versions_of_archive(root_tarball, root, list_depth=3)
|
versions = spack.url.find_versions_of_archive(root_tarball, root, list_depth=3)
|
||||||
assert Version("2.0.0b2") in versions
|
assert Version("2.0.0b2") in versions
|
||||||
assert Version("3.0a1") in versions
|
assert Version("3.0a1") in versions
|
||||||
assert Version("4.5-rc5") in versions
|
assert Version("4.5-rc5") in versions
|
||||||
|
|
||||||
|
|
||||||
def test_find_versions_of_archive_with_fragment():
|
def test_find_versions_of_archive_with_fragment():
|
||||||
versions = spack.util.web.find_versions_of_archive(
|
versions = spack.url.find_versions_of_archive(root_tarball, root_with_fragment, list_depth=0)
|
||||||
root_tarball, root_with_fragment, list_depth=0
|
|
||||||
)
|
|
||||||
assert Version("5.0.0") in versions
|
assert Version("5.0.0") in versions
|
||||||
|
|
||||||
|
|
||||||
@@ -311,7 +310,7 @@ def test_remove_s3_url(monkeypatch, capfd):
|
|||||||
def get_s3_session(url, method="fetch"):
|
def get_s3_session(url, method="fetch"):
|
||||||
return MockS3Client()
|
return MockS3Client()
|
||||||
|
|
||||||
monkeypatch.setattr(spack.util.s3, "get_s3_session", get_s3_session)
|
monkeypatch.setattr(spack.util.web, "get_s3_session", get_s3_session)
|
||||||
|
|
||||||
current_debug_level = tty.debug_level()
|
current_debug_level = tty.debug_level()
|
||||||
tty.set_debug(1)
|
tty.set_debug(1)
|
||||||
|
@@ -27,246 +27,22 @@
|
|||||||
"""
|
"""
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
import pathlib
|
||||||
import re
|
import re
|
||||||
from urllib.parse import urlsplit, urlunsplit
|
|
||||||
|
|
||||||
import llnl.util.tty as tty
|
import llnl.url
|
||||||
from llnl.util.tty.color import cescape, colorize
|
from llnl.util.tty.color import cescape, colorize
|
||||||
|
|
||||||
import spack.error
|
import spack.error
|
||||||
import spack.util.compression as comp
|
import spack.util.web
|
||||||
import spack.util.path as spath
|
|
||||||
import spack.version
|
import spack.version
|
||||||
|
from spack.util.path import convert_to_posix_path
|
||||||
|
|
||||||
#
|
#
|
||||||
# Note: We call the input to most of these functions a "path" but the functions
|
# Note: We call the input to most of these functions a "path" but the functions
|
||||||
# work on paths and URLs. There's not a good word for both of these, but
|
# work on paths and URLs. There's not a good word for both of these, but
|
||||||
# "path" seemed like the most generic term.
|
# "path" seemed like the most generic term.
|
||||||
#
|
#
|
||||||
def find_list_urls(url):
|
|
||||||
r"""Find good list URLs for the supplied URL.
|
|
||||||
|
|
||||||
By default, returns the dirname of the archive path.
|
|
||||||
|
|
||||||
Provides special treatment for the following websites, which have a
|
|
||||||
unique list URL different from the dirname of the download URL:
|
|
||||||
|
|
||||||
========= =======================================================
|
|
||||||
GitHub https://github.com/<repo>/<name>/releases
|
|
||||||
GitLab https://gitlab.\*/<repo>/<name>/tags
|
|
||||||
BitBucket https://bitbucket.org/<repo>/<name>/downloads/?tab=tags
|
|
||||||
CRAN https://\*.r-project.org/src/contrib/Archive/<name>
|
|
||||||
PyPI https://pypi.org/simple/<name>/
|
|
||||||
LuaRocks https://luarocks.org/modules/<repo>/<name>
|
|
||||||
========= =======================================================
|
|
||||||
|
|
||||||
Note: this function is called by `spack versions`, `spack checksum`,
|
|
||||||
and `spack create`, but not by `spack fetch` or `spack install`.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
url (str): The download URL for the package
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
set: One or more list URLs for the package
|
|
||||||
"""
|
|
||||||
|
|
||||||
url_types = [
|
|
||||||
# GitHub
|
|
||||||
# e.g. https://github.com/llnl/callpath/archive/v1.0.1.tar.gz
|
|
||||||
(r"(.*github\.com/[^/]+/[^/]+)", lambda m: m.group(1) + "/releases"),
|
|
||||||
# GitLab API endpoint
|
|
||||||
# e.g. https://gitlab.dkrz.de/api/v4/projects/k202009%2Flibaec/repository/archive.tar.gz?sha=v1.0.2
|
|
||||||
(
|
|
||||||
r"(.*gitlab[^/]+)/api/v4/projects/([^/]+)%2F([^/]+)",
|
|
||||||
lambda m: m.group(1) + "/" + m.group(2) + "/" + m.group(3) + "/tags",
|
|
||||||
),
|
|
||||||
# GitLab non-API endpoint
|
|
||||||
# e.g. https://gitlab.dkrz.de/k202009/libaec/uploads/631e85bcf877c2dcaca9b2e6d6526339/libaec-1.0.0.tar.gz
|
|
||||||
(r"(.*gitlab[^/]+/(?!api/v4/projects)[^/]+/[^/]+)", lambda m: m.group(1) + "/tags"),
|
|
||||||
# BitBucket
|
|
||||||
# e.g. https://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2
|
|
||||||
(r"(.*bitbucket.org/[^/]+/[^/]+)", lambda m: m.group(1) + "/downloads/?tab=tags"),
|
|
||||||
# CRAN
|
|
||||||
# e.g. https://cran.r-project.org/src/contrib/Rcpp_0.12.9.tar.gz
|
|
||||||
# e.g. https://cloud.r-project.org/src/contrib/rgl_0.98.1.tar.gz
|
|
||||||
(
|
|
||||||
r"(.*\.r-project\.org/src/contrib)/([^_]+)",
|
|
||||||
lambda m: m.group(1) + "/Archive/" + m.group(2),
|
|
||||||
),
|
|
||||||
# PyPI
|
|
||||||
# e.g. https://pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
|
|
||||||
# e.g. https://www.pypi.io/packages/source/n/numpy/numpy-1.19.4.zip
|
|
||||||
# e.g. https://pypi.org/packages/source/n/numpy/numpy-1.19.4.zip
|
|
||||||
# e.g. https://pypi.python.org/packages/source/n/numpy/numpy-1.19.4.zip
|
|
||||||
# e.g. https://files.pythonhosted.org/packages/source/n/numpy/numpy-1.19.4.zip
|
|
||||||
# e.g. https://pypi.io/packages/py2.py3/o/opencensus-context/opencensus_context-0.1.1-py2.py3-none-any.whl
|
|
||||||
(
|
|
||||||
r"(?:pypi|pythonhosted)[^/]+/packages/[^/]+/./([^/]+)",
|
|
||||||
lambda m: "https://pypi.org/simple/" + m.group(1) + "/",
|
|
||||||
),
|
|
||||||
# LuaRocks
|
|
||||||
# e.g. https://luarocks.org/manifests/gvvaughan/lpeg-1.0.2-1.src.rock
|
|
||||||
# e.g. https://luarocks.org/manifests/openresty/lua-cjson-2.1.0-1.src.rock
|
|
||||||
(
|
|
||||||
r"luarocks[^/]+/(?:modules|manifests)/(?P<org>[^/]+)/"
|
|
||||||
+ r"(?P<name>.+?)-[0-9.-]*\.src\.rock",
|
|
||||||
lambda m: "https://luarocks.org/modules/"
|
|
||||||
+ m.group("org")
|
|
||||||
+ "/"
|
|
||||||
+ m.group("name")
|
|
||||||
+ "/",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
list_urls = set([os.path.dirname(url)])
|
|
||||||
|
|
||||||
for pattern, fun in url_types:
|
|
||||||
match = re.search(pattern, url)
|
|
||||||
if match:
|
|
||||||
list_urls.add(fun(match))
|
|
||||||
|
|
||||||
return list_urls
|
|
||||||
|
|
||||||
|
|
||||||
def strip_query_and_fragment(path):
|
|
||||||
try:
|
|
||||||
components = urlsplit(path)
|
|
||||||
stripped = components[:3] + (None, None)
|
|
||||||
|
|
||||||
query, frag = components[3:5]
|
|
||||||
suffix = ""
|
|
||||||
if query:
|
|
||||||
suffix += "?" + query
|
|
||||||
if frag:
|
|
||||||
suffix += "#" + frag
|
|
||||||
|
|
||||||
return (urlunsplit(stripped), suffix)
|
|
||||||
|
|
||||||
except ValueError:
|
|
||||||
tty.debug("Got error parsing path %s" % path)
|
|
||||||
return (path, "") # Ignore URL parse errors here
|
|
||||||
|
|
||||||
|
|
||||||
def strip_version_suffixes(path):
|
|
||||||
"""Some tarballs contain extraneous information after the version:
|
|
||||||
|
|
||||||
* ``bowtie2-2.2.5-source``
|
|
||||||
* ``libevent-2.0.21-stable``
|
|
||||||
* ``cuda_8.0.44_linux.run``
|
|
||||||
|
|
||||||
These strings are not part of the version number and should be ignored.
|
|
||||||
This function strips those suffixes off and returns the remaining string.
|
|
||||||
The goal is that the version is always the last thing in ``path``:
|
|
||||||
|
|
||||||
* ``bowtie2-2.2.5``
|
|
||||||
* ``libevent-2.0.21``
|
|
||||||
* ``cuda_8.0.44``
|
|
||||||
|
|
||||||
Args:
|
|
||||||
path (str): The filename or URL for the package
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The ``path`` with any extraneous suffixes removed
|
|
||||||
"""
|
|
||||||
# NOTE: This could be done with complicated regexes in parse_version_offset
|
|
||||||
# NOTE: The problem is that we would have to add these regexes to the end
|
|
||||||
# NOTE: of every single version regex. Easier to just strip them off
|
|
||||||
# NOTE: permanently
|
|
||||||
|
|
||||||
suffix_regexes = [
|
|
||||||
# Download type
|
|
||||||
r"[Ii]nstall",
|
|
||||||
r"all",
|
|
||||||
r"code",
|
|
||||||
r"[Ss]ources?",
|
|
||||||
r"file",
|
|
||||||
r"full",
|
|
||||||
r"single",
|
|
||||||
r"with[a-zA-Z_-]+",
|
|
||||||
r"rock",
|
|
||||||
r"src(_0)?",
|
|
||||||
r"public",
|
|
||||||
r"bin",
|
|
||||||
r"binary",
|
|
||||||
r"run",
|
|
||||||
r"[Uu]niversal",
|
|
||||||
r"jar",
|
|
||||||
r"complete",
|
|
||||||
r"dynamic",
|
|
||||||
r"oss",
|
|
||||||
r"gem",
|
|
||||||
r"tar",
|
|
||||||
r"sh",
|
|
||||||
# Download version
|
|
||||||
r"release",
|
|
||||||
r"bin",
|
|
||||||
r"stable",
|
|
||||||
r"[Ff]inal",
|
|
||||||
r"rel",
|
|
||||||
r"orig",
|
|
||||||
r"dist",
|
|
||||||
r"\+",
|
|
||||||
# License
|
|
||||||
r"gpl",
|
|
||||||
# Arch
|
|
||||||
# Needs to come before and after OS, appears in both orders
|
|
||||||
r"ia32",
|
|
||||||
r"intel",
|
|
||||||
r"amd64",
|
|
||||||
r"linux64",
|
|
||||||
r"x64",
|
|
||||||
r"64bit",
|
|
||||||
r"x86[_-]64",
|
|
||||||
r"i586_64",
|
|
||||||
r"x86",
|
|
||||||
r"i[36]86",
|
|
||||||
r"ppc64(le)?",
|
|
||||||
r"armv?(7l|6l|64)",
|
|
||||||
# Other
|
|
||||||
r"cpp",
|
|
||||||
r"gtk",
|
|
||||||
r"incubating",
|
|
||||||
# OS
|
|
||||||
r"[Ll]inux(_64)?",
|
|
||||||
r"LINUX",
|
|
||||||
r"[Uu]ni?x",
|
|
||||||
r"[Ss]un[Oo][Ss]",
|
|
||||||
r"[Mm]ac[Oo][Ss][Xx]?",
|
|
||||||
r"[Oo][Ss][Xx]",
|
|
||||||
r"[Dd]arwin(64)?",
|
|
||||||
r"[Aa]pple",
|
|
||||||
r"[Ww]indows",
|
|
||||||
r"[Ww]in(64|32)?",
|
|
||||||
r"[Cc]ygwin(64|32)?",
|
|
||||||
r"[Mm]ingw",
|
|
||||||
r"centos",
|
|
||||||
# Arch
|
|
||||||
# Needs to come before and after OS, appears in both orders
|
|
||||||
r"ia32",
|
|
||||||
r"intel",
|
|
||||||
r"amd64",
|
|
||||||
r"linux64",
|
|
||||||
r"x64",
|
|
||||||
r"64bit",
|
|
||||||
r"x86[_-]64",
|
|
||||||
r"i586_64",
|
|
||||||
r"x86",
|
|
||||||
r"i[36]86",
|
|
||||||
r"ppc64(le)?",
|
|
||||||
r"armv?(7l|6l|64)?",
|
|
||||||
# PyPI
|
|
||||||
r"[._-]py[23].*\.whl",
|
|
||||||
r"[._-]cp[23].*\.whl",
|
|
||||||
r"[._-]win.*\.exe",
|
|
||||||
]
|
|
||||||
|
|
||||||
for regex in suffix_regexes:
|
|
||||||
# Remove the suffix from the end of the path
|
|
||||||
# This may be done multiple times
|
|
||||||
path = re.sub(r"[._-]?" + regex + "$", "", path)
|
|
||||||
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def strip_name_suffixes(path, version):
|
def strip_name_suffixes(path, version):
|
||||||
@@ -341,69 +117,6 @@ def strip_name_suffixes(path, version):
|
|||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
def split_url_extension(path):
|
|
||||||
"""Some URLs have a query string, e.g.:
|
|
||||||
|
|
||||||
1. https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7.tgz?raw=true
|
|
||||||
2. http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin.tar.gz
|
|
||||||
3. https://gitlab.kitware.com/vtk/vtk/repository/archive.tar.bz2?ref=v7.0.0
|
|
||||||
|
|
||||||
In (1), the query string needs to be stripped to get at the
|
|
||||||
extension, but in (2) & (3), the filename is IN a single final query
|
|
||||||
argument.
|
|
||||||
|
|
||||||
This strips the URL into three pieces: ``prefix``, ``ext``, and ``suffix``.
|
|
||||||
The suffix contains anything that was stripped off the URL to
|
|
||||||
get at the file extension. In (1), it will be ``'?raw=true'``, but
|
|
||||||
in (2), it will be empty. In (3) the suffix is a parameter that follows
|
|
||||||
after the file extension, e.g.:
|
|
||||||
|
|
||||||
1. ``('https://github.com/losalamos/CLAMR/blob/packages/PowerParser_v2.0.7', '.tgz', '?raw=true')``
|
|
||||||
2. ``('http://www.apache.org/dyn/closer.cgi?path=/cassandra/1.2.0/apache-cassandra-1.2.0-rc2-bin', '.tar.gz', None)``
|
|
||||||
3. ``('https://gitlab.kitware.com/vtk/vtk/repository/archive', '.tar.bz2', '?ref=v7.0.0')``
|
|
||||||
"""
|
|
||||||
prefix, ext, suffix = path, "", ""
|
|
||||||
|
|
||||||
# Strip off sourceforge download suffix.
|
|
||||||
# e.g. https://sourceforge.net/projects/glew/files/glew/2.0.0/glew-2.0.0.tgz/download
|
|
||||||
prefix, suffix = spath.find_sourceforge_suffix(path)
|
|
||||||
|
|
||||||
ext = comp.extension_from_path(prefix)
|
|
||||||
if ext is not None:
|
|
||||||
prefix = comp.strip_extension(prefix)
|
|
||||||
|
|
||||||
else:
|
|
||||||
prefix, suf = strip_query_and_fragment(prefix)
|
|
||||||
ext = comp.extension_from_path(prefix)
|
|
||||||
prefix = comp.strip_extension(prefix)
|
|
||||||
suffix = suf + suffix
|
|
||||||
if ext is None:
|
|
||||||
ext = ""
|
|
||||||
|
|
||||||
return prefix, ext, suffix
|
|
||||||
|
|
||||||
|
|
||||||
def determine_url_file_extension(path):
|
|
||||||
"""This returns the type of archive a URL refers to. This is
|
|
||||||
sometimes confusing because of URLs like:
|
|
||||||
|
|
||||||
(1) https://github.com/petdance/ack/tarball/1.93_02
|
|
||||||
|
|
||||||
Where the URL doesn't actually contain the filename. We need
|
|
||||||
to know what type it is so that we can appropriately name files
|
|
||||||
in mirrors.
|
|
||||||
"""
|
|
||||||
match = re.search(r"github.com/.+/(zip|tar)ball/", path)
|
|
||||||
if match:
|
|
||||||
if match.group(1) == "zip":
|
|
||||||
return "zip"
|
|
||||||
elif match.group(1) == "tar":
|
|
||||||
return "tar.gz"
|
|
||||||
|
|
||||||
prefix, ext, suffix = split_url_extension(path)
|
|
||||||
return ext
|
|
||||||
|
|
||||||
|
|
||||||
def parse_version_offset(path):
|
def parse_version_offset(path):
|
||||||
"""Try to extract a version string from a filename or URL.
|
"""Try to extract a version string from a filename or URL.
|
||||||
|
|
||||||
@@ -426,13 +139,13 @@ def parse_version_offset(path):
|
|||||||
# path: The prefix of the URL, everything before the ext and suffix
|
# path: The prefix of the URL, everything before the ext and suffix
|
||||||
# ext: The file extension
|
# ext: The file extension
|
||||||
# suffix: Any kind of query string that begins with a '?'
|
# suffix: Any kind of query string that begins with a '?'
|
||||||
path, ext, suffix = split_url_extension(path)
|
path, ext, suffix = llnl.url.split_url_extension(path)
|
||||||
|
|
||||||
# stem: Everything from path after the final '/'
|
# stem: Everything from path after the final '/'
|
||||||
original_stem = os.path.basename(path)
|
original_stem = os.path.basename(path)
|
||||||
|
|
||||||
# Try to strip off anything after the version number
|
# Try to strip off anything after the version number
|
||||||
stem = strip_version_suffixes(original_stem)
|
stem = llnl.url.strip_version_suffixes(original_stem)
|
||||||
|
|
||||||
# Assumptions:
|
# Assumptions:
|
||||||
#
|
#
|
||||||
@@ -620,7 +333,7 @@ def parse_name_offset(path, v=None):
|
|||||||
# path: The prefix of the URL, everything before the ext and suffix
|
# path: The prefix of the URL, everything before the ext and suffix
|
||||||
# ext: The file extension
|
# ext: The file extension
|
||||||
# suffix: Any kind of query string that begins with a '?'
|
# suffix: Any kind of query string that begins with a '?'
|
||||||
path, ext, suffix = split_url_extension(path)
|
path, ext, suffix = llnl.url.split_url_extension(path)
|
||||||
|
|
||||||
# stem: Everything from path after the final '/'
|
# stem: Everything from path after the final '/'
|
||||||
original_stem = os.path.basename(path)
|
original_stem = os.path.basename(path)
|
||||||
@@ -735,28 +448,6 @@ def parse_name_and_version(path):
|
|||||||
return (name, ver)
|
return (name, ver)
|
||||||
|
|
||||||
|
|
||||||
def insensitize(string):
|
|
||||||
"""Change upper and lowercase letters to be case insensitive in
|
|
||||||
the provided string. e.g., 'a' becomes '[Aa]', 'B' becomes
|
|
||||||
'[bB]', etc. Use for building regexes."""
|
|
||||||
|
|
||||||
def to_ins(match):
|
|
||||||
char = match.group(1)
|
|
||||||
return "[%s%s]" % (char.lower(), char.upper())
|
|
||||||
|
|
||||||
return re.sub(r"([a-zA-Z])", to_ins, string)
|
|
||||||
|
|
||||||
|
|
||||||
def cumsum(elts, init=0, fn=lambda x: x):
|
|
||||||
"""Return cumulative sum of result of fn on each element in elts."""
|
|
||||||
sums = []
|
|
||||||
s = init
|
|
||||||
for i, e in enumerate(elts):
|
|
||||||
sums.append(s)
|
|
||||||
s += fn(e)
|
|
||||||
return sums
|
|
||||||
|
|
||||||
|
|
||||||
def find_all(substring, string):
|
def find_all(substring, string):
|
||||||
"""Returns a list containing the indices of
|
"""Returns a list containing the indices of
|
||||||
every occurrence of substring in string."""
|
every occurrence of substring in string."""
|
||||||
@@ -912,6 +603,122 @@ def color_url(path, **kwargs):
|
|||||||
return colorize(out.getvalue())
|
return colorize(out.getvalue())
|
||||||
|
|
||||||
|
|
||||||
|
def find_versions_of_archive(
|
||||||
|
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
|
||||||
|
):
|
||||||
|
"""Scrape web pages for new versions of a tarball. This function prefers URLs in the
|
||||||
|
following order: links found on the scraped page that match a url generated by the
|
||||||
|
reference package, found and in the archive_urls list, found and derived from those
|
||||||
|
in the archive_urls list, and if none are found for a version then the item in the
|
||||||
|
archive_urls list is included for the version.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
archive_urls (str or list or tuple): URL or sequence of URLs for
|
||||||
|
different versions of a package. Typically these are just the
|
||||||
|
tarballs from the package file itself. By default, this searches
|
||||||
|
the parent directories of archives.
|
||||||
|
list_url (str or None): URL for a listing of archives.
|
||||||
|
Spack will scrape these pages for download links that look
|
||||||
|
like the archive URL.
|
||||||
|
list_depth (int): max depth to follow links on list_url pages.
|
||||||
|
Defaults to 0.
|
||||||
|
concurrency (int): maximum number of concurrent requests
|
||||||
|
reference_package (spack.package_base.PackageBase or None): a spack package
|
||||||
|
used as a reference for url detection. Uses the url_for_version
|
||||||
|
method on the package to produce reference urls which, if found,
|
||||||
|
are preferred.
|
||||||
|
"""
|
||||||
|
if not isinstance(archive_urls, (list, tuple)):
|
||||||
|
archive_urls = [archive_urls]
|
||||||
|
|
||||||
|
# Generate a list of list_urls based on archive urls and any
|
||||||
|
# explicitly listed list_url in the package
|
||||||
|
list_urls = set()
|
||||||
|
if list_url is not None:
|
||||||
|
list_urls.add(list_url)
|
||||||
|
for aurl in archive_urls:
|
||||||
|
list_urls |= llnl.url.find_list_urls(aurl)
|
||||||
|
|
||||||
|
# Add '/' to the end of the URL. Some web servers require this.
|
||||||
|
additional_list_urls = set()
|
||||||
|
for lurl in list_urls:
|
||||||
|
if not lurl.endswith("/"):
|
||||||
|
additional_list_urls.add(lurl + "/")
|
||||||
|
list_urls |= additional_list_urls
|
||||||
|
|
||||||
|
# Grab some web pages to scrape.
|
||||||
|
pages, links = spack.util.web.spider(list_urls, depth=list_depth, concurrency=concurrency)
|
||||||
|
|
||||||
|
# Scrape them for archive URLs
|
||||||
|
regexes = []
|
||||||
|
for aurl in archive_urls:
|
||||||
|
# This creates a regex from the URL with a capture group for
|
||||||
|
# the version part of the URL. The capture group is converted
|
||||||
|
# to a generic wildcard, so we can use this to extract things
|
||||||
|
# on a page that look like archive URLs.
|
||||||
|
url_regex = wildcard_version(aurl)
|
||||||
|
|
||||||
|
# We'll be a bit more liberal and just look for the archive
|
||||||
|
# part, not the full path.
|
||||||
|
# this is a URL so it is a posixpath even on Windows
|
||||||
|
url_regex = pathlib.PurePosixPath(url_regex).name
|
||||||
|
|
||||||
|
# We need to add a / to the beginning of the regex to prevent
|
||||||
|
# Spack from picking up similarly named packages like:
|
||||||
|
# https://cran.r-project.org/src/contrib/pls_2.6-0.tar.gz
|
||||||
|
# https://cran.r-project.org/src/contrib/enpls_5.7.tar.gz
|
||||||
|
# https://cran.r-project.org/src/contrib/autopls_1.3.tar.gz
|
||||||
|
# https://cran.r-project.org/src/contrib/matrixpls_1.0.4.tar.gz
|
||||||
|
url_regex = "/" + url_regex
|
||||||
|
|
||||||
|
# We need to add a $ anchor to the end of the regex to prevent
|
||||||
|
# Spack from picking up signature files like:
|
||||||
|
# .asc
|
||||||
|
# .md5
|
||||||
|
# .sha256
|
||||||
|
# .sig
|
||||||
|
# However, SourceForge downloads still need to end in '/download'.
|
||||||
|
url_regex += r"(\/download)?"
|
||||||
|
# PyPI adds #sha256=... to the end of the URL
|
||||||
|
url_regex += "(#sha256=.*)?"
|
||||||
|
url_regex += "$"
|
||||||
|
|
||||||
|
regexes.append(url_regex)
|
||||||
|
|
||||||
|
regexes = [re.compile(r) for r in regexes]
|
||||||
|
# Build a dict version -> URL from any links that match the wildcards.
|
||||||
|
# Walk through archive_url links first.
|
||||||
|
# Any conflicting versions will be overwritten by the list_url links.
|
||||||
|
versions = {}
|
||||||
|
matched = set()
|
||||||
|
for url in sorted(links):
|
||||||
|
url = convert_to_posix_path(url)
|
||||||
|
if any(r.search(url) for r in regexes):
|
||||||
|
try:
|
||||||
|
ver = parse_version(url)
|
||||||
|
if ver in matched:
|
||||||
|
continue
|
||||||
|
versions[ver] = url
|
||||||
|
# prevent this version from getting overwritten
|
||||||
|
if reference_package is not None:
|
||||||
|
if url == reference_package.url_for_version(ver):
|
||||||
|
matched.add(ver)
|
||||||
|
else:
|
||||||
|
extrapolated_urls = [substitute_version(u, ver) for u in archive_urls]
|
||||||
|
if url in extrapolated_urls:
|
||||||
|
matched.add(ver)
|
||||||
|
except UndetectableVersionError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for url in archive_urls:
|
||||||
|
url = convert_to_posix_path(url)
|
||||||
|
ver = parse_version(url)
|
||||||
|
if ver not in versions:
|
||||||
|
versions[ver] = url
|
||||||
|
|
||||||
|
return versions
|
||||||
|
|
||||||
|
|
||||||
class UrlParseError(spack.error.SpackError):
|
class UrlParseError(spack.error.SpackError):
|
||||||
"""Raised when the URL module can't parse something correctly."""
|
"""Raised when the URL module can't parse something correctly."""
|
||||||
|
|
||||||
|
@@ -9,27 +9,13 @@
|
|||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
from itertools import product
|
|
||||||
|
|
||||||
|
import llnl.url
|
||||||
from llnl.util import tty
|
from llnl.util import tty
|
||||||
|
|
||||||
import spack.util.path as spath
|
|
||||||
from spack.error import SpackError
|
from spack.error import SpackError
|
||||||
from spack.util.executable import CommandNotFoundError, which
|
from spack.util.executable import CommandNotFoundError, which
|
||||||
|
|
||||||
# Supported archive extensions.
|
|
||||||
PRE_EXTS = ["tar", "TAR"]
|
|
||||||
EXTS = ["gz", "bz2", "xz", "Z"]
|
|
||||||
NOTAR_EXTS = ["zip", "tgz", "tbz2", "tbz", "txz"]
|
|
||||||
CONTRACTION_MAP = {"tgz": "tar.gz", "txz": "tar.xz", "tbz": "tar.bz2", "tbz2": "tar.bz2"}
|
|
||||||
|
|
||||||
# Add PRE_EXTS and EXTS last so that .tar.gz is matched *before* .tar or .gz
|
|
||||||
ALLOWED_ARCHIVE_TYPES = (
|
|
||||||
[".".join(ext) for ext in product(PRE_EXTS, EXTS)] + PRE_EXTS + EXTS + NOTAR_EXTS
|
|
||||||
)
|
|
||||||
|
|
||||||
ALLOWED_SINGLE_EXT_ARCHIVE_TYPES = PRE_EXTS + EXTS + NOTAR_EXTS
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import bz2 # noqa
|
import bz2 # noqa
|
||||||
|
|
||||||
@@ -66,10 +52,6 @@ def is_bz2_supported():
|
|||||||
return _bz2_support
|
return _bz2_support
|
||||||
|
|
||||||
|
|
||||||
def allowed_archive(path):
|
|
||||||
return False if not path else any(path.endswith(t) for t in ALLOWED_ARCHIVE_TYPES)
|
|
||||||
|
|
||||||
|
|
||||||
def _system_untar(archive_file, remove_archive_file=False):
|
def _system_untar(archive_file, remove_archive_file=False):
|
||||||
"""Returns path to unarchived tar file.
|
"""Returns path to unarchived tar file.
|
||||||
Untars archive via system tar.
|
Untars archive via system tar.
|
||||||
@@ -78,7 +60,7 @@ def _system_untar(archive_file, remove_archive_file=False):
|
|||||||
archive_file (str): absolute path to the archive to be extracted.
|
archive_file (str): absolute path to the archive to be extracted.
|
||||||
Can be one of .tar(.[gz|bz2|xz|Z]) or .(tgz|tbz|tbz2|txz).
|
Can be one of .tar(.[gz|bz2|xz|Z]) or .(tgz|tbz|tbz2|txz).
|
||||||
"""
|
"""
|
||||||
archive_file_no_ext = strip_extension(archive_file)
|
archive_file_no_ext = llnl.url.strip_extension(archive_file)
|
||||||
outfile = os.path.basename(archive_file_no_ext)
|
outfile = os.path.basename(archive_file_no_ext)
|
||||||
if archive_file_no_ext == archive_file:
|
if archive_file_no_ext == archive_file:
|
||||||
# the archive file has no extension. Tar on windows cannot untar onto itself
|
# the archive file has no extension. Tar on windows cannot untar onto itself
|
||||||
@@ -114,7 +96,7 @@ def _bunzip2(archive_file):
|
|||||||
def _py_bunzip(archive_file):
|
def _py_bunzip(archive_file):
|
||||||
"""Returns path to decompressed file.
|
"""Returns path to decompressed file.
|
||||||
Decompresses bz2 compressed archives/files via python's bz2 module"""
|
Decompresses bz2 compressed archives/files via python's bz2 module"""
|
||||||
decompressed_file = os.path.basename(strip_compression_extension(archive_file, "bz2"))
|
decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2"))
|
||||||
working_dir = os.getcwd()
|
working_dir = os.getcwd()
|
||||||
archive_out = os.path.join(working_dir, decompressed_file)
|
archive_out = os.path.join(working_dir, decompressed_file)
|
||||||
f_bz = bz2.BZ2File(archive_file, mode="rb")
|
f_bz = bz2.BZ2File(archive_file, mode="rb")
|
||||||
@@ -128,7 +110,7 @@ def _system_bunzip(archive_file):
|
|||||||
"""Returns path to decompressed file.
|
"""Returns path to decompressed file.
|
||||||
Decompresses bz2 compressed archives/files via system bzip2 utility"""
|
Decompresses bz2 compressed archives/files via system bzip2 utility"""
|
||||||
compressed_file_name = os.path.basename(archive_file)
|
compressed_file_name = os.path.basename(archive_file)
|
||||||
decompressed_file = os.path.basename(strip_compression_extension(archive_file, "bz2"))
|
decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2"))
|
||||||
working_dir = os.getcwd()
|
working_dir = os.getcwd()
|
||||||
archive_out = os.path.join(working_dir, decompressed_file)
|
archive_out = os.path.join(working_dir, decompressed_file)
|
||||||
copy_path = os.path.join(working_dir, compressed_file_name)
|
copy_path = os.path.join(working_dir, compressed_file_name)
|
||||||
@@ -158,7 +140,7 @@ def _gunzip(archive_file):
|
|||||||
def _py_gunzip(archive_file):
|
def _py_gunzip(archive_file):
|
||||||
"""Returns path to gunzip'd file
|
"""Returns path to gunzip'd file
|
||||||
Decompresses `.gz` compressed archvies via python gzip module"""
|
Decompresses `.gz` compressed archvies via python gzip module"""
|
||||||
decompressed_file = os.path.basename(strip_compression_extension(archive_file, "gz"))
|
decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "gz"))
|
||||||
working_dir = os.getcwd()
|
working_dir = os.getcwd()
|
||||||
destination_abspath = os.path.join(working_dir, decompressed_file)
|
destination_abspath = os.path.join(working_dir, decompressed_file)
|
||||||
f_in = gzip.open(archive_file, "rb")
|
f_in = gzip.open(archive_file, "rb")
|
||||||
@@ -171,7 +153,7 @@ def _py_gunzip(archive_file):
|
|||||||
def _system_gunzip(archive_file):
|
def _system_gunzip(archive_file):
|
||||||
"""Returns path to gunzip'd file
|
"""Returns path to gunzip'd file
|
||||||
Decompresses `.gz` compressed files via system gzip"""
|
Decompresses `.gz` compressed files via system gzip"""
|
||||||
archive_file_no_ext = strip_compression_extension(archive_file)
|
archive_file_no_ext = llnl.url.strip_compression_extension(archive_file)
|
||||||
if archive_file_no_ext == archive_file:
|
if archive_file_no_ext == archive_file:
|
||||||
# the zip file has no extension. On Unix gunzip cannot unzip onto itself
|
# the zip file has no extension. On Unix gunzip cannot unzip onto itself
|
||||||
archive_file = archive_file + ".gz"
|
archive_file = archive_file + ".gz"
|
||||||
@@ -196,7 +178,7 @@ def _unzip(archive_file):
|
|||||||
Args:
|
Args:
|
||||||
archive_file (str): absolute path of the file to be decompressed
|
archive_file (str): absolute path of the file to be decompressed
|
||||||
"""
|
"""
|
||||||
extracted_file = os.path.basename(strip_extension(archive_file, "zip"))
|
extracted_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="zip"))
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
return _system_untar(archive_file)
|
return _system_untar(archive_file)
|
||||||
else:
|
else:
|
||||||
@@ -259,7 +241,7 @@ def unarchive(archive_file):
|
|||||||
def _py_lzma(archive_file):
|
def _py_lzma(archive_file):
|
||||||
"""Returns path to decompressed .xz files
|
"""Returns path to decompressed .xz files
|
||||||
Decompress lzma compressed .xz files via python lzma module"""
|
Decompress lzma compressed .xz files via python lzma module"""
|
||||||
decompressed_file = os.path.basename(strip_compression_extension(archive_file, "xz"))
|
decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "xz"))
|
||||||
archive_out = os.path.join(os.getcwd(), decompressed_file)
|
archive_out = os.path.join(os.getcwd(), decompressed_file)
|
||||||
with open(archive_out, "wb") as ar:
|
with open(archive_out, "wb") as ar:
|
||||||
with lzma.open(archive_file) as lar:
|
with lzma.open(archive_file) as lar:
|
||||||
@@ -272,7 +254,7 @@ def _xz(archive_file):
|
|||||||
Decompress lzma compressed .xz files via xz command line
|
Decompress lzma compressed .xz files via xz command line
|
||||||
tool.
|
tool.
|
||||||
"""
|
"""
|
||||||
decompressed_file = os.path.basename(strip_extension(archive_file, "xz"))
|
decompressed_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="xz"))
|
||||||
working_dir = os.getcwd()
|
working_dir = os.getcwd()
|
||||||
destination_abspath = os.path.join(working_dir, decompressed_file)
|
destination_abspath = os.path.join(working_dir, decompressed_file)
|
||||||
compressed_file = os.path.basename(archive_file)
|
compressed_file = os.path.basename(archive_file)
|
||||||
@@ -297,13 +279,13 @@ def _system_7zip(archive_file):
|
|||||||
Args:
|
Args:
|
||||||
archive_file (str): absolute path of file to be unarchived
|
archive_file (str): absolute path of file to be unarchived
|
||||||
"""
|
"""
|
||||||
outfile = os.path.basename(strip_compression_extension(archive_file))
|
outfile = os.path.basename(llnl.url.strip_compression_extension(archive_file))
|
||||||
_7z = which("7z")
|
_7z = which("7z")
|
||||||
if not _7z:
|
if not _7z:
|
||||||
raise CommandNotFoundError(
|
raise CommandNotFoundError(
|
||||||
"7z unavailable,\
|
"7z unavailable,\
|
||||||
unable to extract %s files. 7z can be installed via Spack"
|
unable to extract %s files. 7z can be installed via Spack"
|
||||||
% extension_from_path(archive_file)
|
% llnl.url.extension_from_path(archive_file)
|
||||||
)
|
)
|
||||||
_7z.add_default_arg("e")
|
_7z.add_default_arg("e")
|
||||||
_7z(archive_file)
|
_7z(archive_file)
|
||||||
@@ -318,7 +300,7 @@ def decompressor_for(path, extension=None):
|
|||||||
if not extension:
|
if not extension:
|
||||||
extension = extension_from_file(path, decompress=True)
|
extension = extension_from_file(path, decompress=True)
|
||||||
|
|
||||||
if not allowed_archive(extension):
|
if not llnl.url.allowed_archive(extension):
|
||||||
raise CommandNotFoundError(
|
raise CommandNotFoundError(
|
||||||
"Cannot extract archive, \
|
"Cannot extract archive, \
|
||||||
unrecognized file extension: '%s'"
|
unrecognized file extension: '%s'"
|
||||||
@@ -394,7 +376,7 @@ def decompressor_for_win(extension):
|
|||||||
path (str): path of the archive file requiring decompression
|
path (str): path of the archive file requiring decompression
|
||||||
extension (str): extension
|
extension (str): extension
|
||||||
"""
|
"""
|
||||||
extension = expand_contracted_extension(extension)
|
extension = llnl.url.expand_contracted_extension(extension)
|
||||||
# Windows native tar can handle .zip extensions, use standard
|
# Windows native tar can handle .zip extensions, use standard
|
||||||
# unzip method
|
# unzip method
|
||||||
if re.match(r"zip$", extension):
|
if re.match(r"zip$", extension):
|
||||||
@@ -415,7 +397,7 @@ def decompressor_for_win(extension):
|
|||||||
# python based decompression strategy
|
# python based decompression strategy
|
||||||
# Expand extension from contracted extension i.e. tar.gz from .tgz
|
# Expand extension from contracted extension i.e. tar.gz from .tgz
|
||||||
# no-op on non contracted extensions
|
# no-op on non contracted extensions
|
||||||
compression_extension = compression_ext_from_compressed_archive(extension)
|
compression_extension = llnl.url.compression_ext_from_compressed_archive(extension)
|
||||||
decompressor = _determine_py_decomp_archive_strategy(compression_extension)
|
decompressor = _determine_py_decomp_archive_strategy(compression_extension)
|
||||||
if not decompressor:
|
if not decompressor:
|
||||||
raise SpackError(
|
raise SpackError(
|
||||||
@@ -657,7 +639,7 @@ def extension_from_stream(stream, decompress=False):
|
|||||||
"Cannot derive file extension from magic number;"
|
"Cannot derive file extension from magic number;"
|
||||||
" falling back to regex path parsing."
|
" falling back to regex path parsing."
|
||||||
)
|
)
|
||||||
return extension_from_path(stream.name)
|
return llnl.url.extension_from_path(stream.name)
|
||||||
resultant_ext = suffix_ext if not prefix_ext else ".".join([prefix_ext, suffix_ext])
|
resultant_ext = suffix_ext if not prefix_ext else ".".join([prefix_ext, suffix_ext])
|
||||||
tty.debug("File extension %s successfully derived by magic number." % resultant_ext)
|
tty.debug("File extension %s successfully derived by magic number." % resultant_ext)
|
||||||
return resultant_ext
|
return resultant_ext
|
||||||
@@ -693,114 +675,11 @@ def extension_from_file(file, decompress=False):
|
|||||||
if ext and ext.startswith("tar."):
|
if ext and ext.startswith("tar."):
|
||||||
suf = ext.split(".")[1]
|
suf = ext.split(".")[1]
|
||||||
abbr = "t" + suf
|
abbr = "t" + suf
|
||||||
if check_extension(file, abbr):
|
if llnl.url.has_extension(file, abbr):
|
||||||
return abbr
|
return abbr
|
||||||
if not ext:
|
if not ext:
|
||||||
# If unable to parse extension from stream,
|
# If unable to parse extension from stream,
|
||||||
# attempt to fall back to string parsing
|
# attempt to fall back to string parsing
|
||||||
ext = extension_from_path(file)
|
ext = llnl.url.extension_from_path(file)
|
||||||
return ext
|
return ext
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def extension_from_path(path):
|
|
||||||
"""Returns the allowed archive extension for a path.
|
|
||||||
If path does not include a valid archive extension
|
|
||||||
(see`spack.util.compression.ALLOWED_ARCHIVE_TYPES`) return None
|
|
||||||
"""
|
|
||||||
if path is None:
|
|
||||||
raise ValueError("Can't call extension() on None")
|
|
||||||
|
|
||||||
for t in ALLOWED_ARCHIVE_TYPES:
|
|
||||||
if check_extension(path, t):
|
|
||||||
return t
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def strip_compression_extension(path, ext=None):
|
|
||||||
"""Returns path with last supported (can be combined with tar) or
|
|
||||||
provided archive extension stripped"""
|
|
||||||
path_ext = extension_from_path(path)
|
|
||||||
if path_ext:
|
|
||||||
path = expand_contracted_extension_in_path(path)
|
|
||||||
exts_to_check = EXTS
|
|
||||||
if ext:
|
|
||||||
exts_to_check = [ext]
|
|
||||||
for ext_check in exts_to_check:
|
|
||||||
mod_path = check_and_remove_ext(path, ext_check)
|
|
||||||
if mod_path != path:
|
|
||||||
return mod_path
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def strip_extension(path, ext=None):
|
|
||||||
"""Returns the part of a path that does not include extension.
|
|
||||||
If ext is given, only attempts to remove that extension. If no
|
|
||||||
extension given, attempts to strip any valid extension from path"""
|
|
||||||
if ext:
|
|
||||||
return check_and_remove_ext(path, ext)
|
|
||||||
for t in ALLOWED_ARCHIVE_TYPES:
|
|
||||||
mod_path = check_and_remove_ext(path, t)
|
|
||||||
if mod_path != path:
|
|
||||||
return mod_path
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def check_extension(path, ext):
|
|
||||||
"""Returns true if extension is present in path
|
|
||||||
false otherwise"""
|
|
||||||
# Strip sourceforge suffix.
|
|
||||||
prefix, _ = spath.find_sourceforge_suffix(path)
|
|
||||||
if not ext.startswith(r"\."):
|
|
||||||
ext = r"\.%s$" % ext
|
|
||||||
if re.search(ext, prefix):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def reg_remove_ext(path, ext):
|
|
||||||
"""Returns path with ext remove via regex"""
|
|
||||||
if path and ext:
|
|
||||||
suffix = r"\.%s$" % ext
|
|
||||||
return re.sub(suffix, "", path)
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def check_and_remove_ext(path, ext):
|
|
||||||
"""Returns path with extension removed if extension
|
|
||||||
is present in path. Otherwise just returns path"""
|
|
||||||
if check_extension(path, ext):
|
|
||||||
return reg_remove_ext(path, ext)
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def _substitute_extension(path, old_ext, new_ext):
|
|
||||||
"""Returns path with old_ext replaced with new_ext.
|
|
||||||
old_ext and new_ext can be extension strings or regexs"""
|
|
||||||
return re.sub(rf"{old_ext}", rf"{new_ext}", path)
|
|
||||||
|
|
||||||
|
|
||||||
def expand_contracted_extension_in_path(path, ext=None):
|
|
||||||
"""Returns path with any contraction extension (i.e. tgz) expanded
|
|
||||||
(i.e. tar.gz). If ext is specified, only attempt to expand that extension"""
|
|
||||||
if not ext:
|
|
||||||
ext = extension_from_path(path)
|
|
||||||
expanded_ext = expand_contracted_extension(ext)
|
|
||||||
if expanded_ext != ext:
|
|
||||||
return _substitute_extension(path, ext, expanded_ext)
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def expand_contracted_extension(extension):
|
|
||||||
"""Return expanded version of contracted extension
|
|
||||||
i.e. .tgz -> .tar.gz, no op on non contracted extensions"""
|
|
||||||
extension = extension.strip(".")
|
|
||||||
return CONTRACTION_MAP.get(extension, extension)
|
|
||||||
|
|
||||||
|
|
||||||
def compression_ext_from_compressed_archive(extension):
|
|
||||||
"""Returns compression extension for a compressed archive"""
|
|
||||||
extension = expand_contracted_extension(extension)
|
|
||||||
for ext in [*EXTS]:
|
|
||||||
if ext in extension:
|
|
||||||
return ext
|
|
||||||
|
@@ -10,6 +10,10 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.response
|
||||||
|
from urllib.error import URLError
|
||||||
|
from urllib.request import BaseHandler
|
||||||
|
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
|
|
||||||
@@ -222,3 +226,21 @@ def get_blob_headers(self):
|
|||||||
}
|
}
|
||||||
|
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
|
|
||||||
|
def gcs_open(req, *args, **kwargs):
|
||||||
|
"""Open a reader stream to a blob object on GCS"""
|
||||||
|
url = urllib.parse.urlparse(req.get_full_url())
|
||||||
|
gcsblob = GCSBlob(url)
|
||||||
|
|
||||||
|
if not gcsblob.exists():
|
||||||
|
raise URLError("GCS blob {0} does not exist".format(gcsblob.blob_path))
|
||||||
|
stream = gcsblob.get_blob_byte_stream()
|
||||||
|
headers = gcsblob.get_blob_headers()
|
||||||
|
|
||||||
|
return urllib.response.addinfourl(stream, headers, url)
|
||||||
|
|
||||||
|
|
||||||
|
class GCSHandler(BaseHandler):
|
||||||
|
def gs_open(self, req):
|
||||||
|
return gcs_open(req)
|
||||||
|
@@ -109,15 +109,6 @@ def win_exe_ext():
|
|||||||
return ".exe"
|
return ".exe"
|
||||||
|
|
||||||
|
|
||||||
def find_sourceforge_suffix(path):
|
|
||||||
"""find and match sourceforge filepath components
|
|
||||||
Return match object"""
|
|
||||||
match = re.search(r"(.*(?:sourceforge\.net|sf\.net)/.*)(/download)$", path)
|
|
||||||
if match:
|
|
||||||
return match.groups()
|
|
||||||
return path, ""
|
|
||||||
|
|
||||||
|
|
||||||
def path_to_os_path(*pths):
|
def path_to_os_path(*pths):
|
||||||
"""
|
"""
|
||||||
Takes an arbitrary number of positional parameters
|
Takes an arbitrary number of positional parameters
|
||||||
|
@@ -3,10 +3,13 @@
|
|||||||
#
|
#
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
import os
|
import os
|
||||||
|
import urllib.error
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
import urllib.response
|
||||||
|
from io import BufferedReader, BytesIO, IOBase
|
||||||
from typing import Any, Dict, Tuple
|
from typing import Any, Dict, Tuple
|
||||||
|
|
||||||
import spack
|
|
||||||
import spack.config
|
import spack.config
|
||||||
|
|
||||||
#: Map (mirror name, method) tuples to s3 client instances.
|
#: Map (mirror name, method) tuples to s3 client instances.
|
||||||
@@ -114,4 +117,72 @@ def get_mirror_s3_connection_info(mirror, method):
|
|||||||
if endpoint_url:
|
if endpoint_url:
|
||||||
s3_client_args["endpoint_url"] = _parse_s3_endpoint_url(endpoint_url)
|
s3_client_args["endpoint_url"] = _parse_s3_endpoint_url(endpoint_url)
|
||||||
|
|
||||||
return (s3_connection, s3_client_args)
|
return s3_connection, s3_client_args
|
||||||
|
|
||||||
|
|
||||||
|
# NOTE(opadron): Workaround issue in boto where its StreamingBody
|
||||||
|
# implementation is missing several APIs expected from IOBase. These missing
|
||||||
|
# APIs prevent the streams returned by boto from being passed as-are along to
|
||||||
|
# urllib.
|
||||||
|
#
|
||||||
|
# https://github.com/boto/botocore/issues/879
|
||||||
|
# https://github.com/python/cpython/pull/3249
|
||||||
|
class WrapStream(BufferedReader):
|
||||||
|
def __init__(self, raw):
|
||||||
|
# In botocore >=1.23.47, StreamingBody inherits from IOBase, so we
|
||||||
|
# only add missing attributes in older versions.
|
||||||
|
# https://github.com/boto/botocore/commit/a624815eabac50442ed7404f3c4f2664cd0aa784
|
||||||
|
if not isinstance(raw, IOBase):
|
||||||
|
raw.readable = lambda: True
|
||||||
|
raw.writable = lambda: False
|
||||||
|
raw.seekable = lambda: False
|
||||||
|
raw.closed = False
|
||||||
|
raw.flush = lambda: None
|
||||||
|
super().__init__(raw)
|
||||||
|
|
||||||
|
def detach(self):
|
||||||
|
self.raw = None
|
||||||
|
|
||||||
|
def read(self, *args, **kwargs):
|
||||||
|
return self.raw.read(*args, **kwargs)
|
||||||
|
|
||||||
|
def __getattr__(self, key):
|
||||||
|
return getattr(self.raw, key)
|
||||||
|
|
||||||
|
|
||||||
|
def _s3_open(url, method="GET"):
|
||||||
|
parsed = urllib.parse.urlparse(url)
|
||||||
|
s3 = get_s3_session(url, method="fetch")
|
||||||
|
|
||||||
|
bucket = parsed.netloc
|
||||||
|
key = parsed.path
|
||||||
|
|
||||||
|
if key.startswith("/"):
|
||||||
|
key = key[1:]
|
||||||
|
|
||||||
|
if method not in ("GET", "HEAD"):
|
||||||
|
raise urllib.error.URLError(
|
||||||
|
"Only GET and HEAD verbs are currently supported for the s3:// scheme"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if method == "GET":
|
||||||
|
obj = s3.get_object(Bucket=bucket, Key=key)
|
||||||
|
# NOTE(opadron): Apply workaround here (see above)
|
||||||
|
stream = WrapStream(obj["Body"])
|
||||||
|
elif method == "HEAD":
|
||||||
|
obj = s3.head_object(Bucket=bucket, Key=key)
|
||||||
|
stream = BytesIO()
|
||||||
|
except s3.ClientError as e:
|
||||||
|
raise urllib.error.URLError(e) from e
|
||||||
|
|
||||||
|
headers = obj["ResponseMetadata"]["HTTPHeaders"]
|
||||||
|
|
||||||
|
return url, headers, stream
|
||||||
|
|
||||||
|
|
||||||
|
class UrllibS3Handler(urllib.request.BaseHandler):
|
||||||
|
def s3_open(self, req):
|
||||||
|
orig_url = req.get_full_url()
|
||||||
|
url, headers, stream = _s3_open(orig_url, method=req.get_method())
|
||||||
|
return urllib.response.addinfourl(stream, headers, url)
|
||||||
|
@@ -21,23 +21,17 @@
|
|||||||
from urllib.error import HTTPError, URLError
|
from urllib.error import HTTPError, URLError
|
||||||
from urllib.request import HTTPSHandler, Request, build_opener
|
from urllib.request import HTTPSHandler, Request, build_opener
|
||||||
|
|
||||||
import llnl.util.lang
|
import llnl.url
|
||||||
import llnl.util.tty as tty
|
from llnl.util import lang, tty
|
||||||
from llnl.util.filesystem import mkdirp, rename, working_dir
|
from llnl.util.filesystem import mkdirp, rename, working_dir
|
||||||
|
|
||||||
import spack
|
|
||||||
import spack.config
|
import spack.config
|
||||||
import spack.error
|
import spack.error
|
||||||
import spack.gcs_handler
|
|
||||||
import spack.s3_handler
|
|
||||||
import spack.url
|
|
||||||
import spack.util.crypto
|
|
||||||
import spack.util.gcs as gcs_util
|
|
||||||
import spack.util.s3 as s3_util
|
|
||||||
import spack.util.url as url_util
|
import spack.util.url as url_util
|
||||||
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
|
|
||||||
from spack.util.executable import CommandNotFoundError, which
|
from .executable import CommandNotFoundError, which
|
||||||
from spack.util.path import convert_to_posix_path
|
from .gcs import GCSBlob, GCSBucket, GCSHandler
|
||||||
|
from .s3 import UrllibS3Handler, get_s3_session
|
||||||
|
|
||||||
|
|
||||||
class DetailedHTTPError(HTTPError):
|
class DetailedHTTPError(HTTPError):
|
||||||
@@ -66,8 +60,8 @@ def http_error_default(self, req, fp, code, msg, hdrs):
|
|||||||
|
|
||||||
|
|
||||||
def _urlopen():
|
def _urlopen():
|
||||||
s3 = spack.s3_handler.UrllibS3Handler()
|
s3 = UrllibS3Handler()
|
||||||
gcs = spack.gcs_handler.GCSHandler()
|
gcs = GCSHandler()
|
||||||
error_handler = SpackHTTPDefaultErrorHandler()
|
error_handler = SpackHTTPDefaultErrorHandler()
|
||||||
|
|
||||||
# One opener with HTTPS ssl enabled
|
# One opener with HTTPS ssl enabled
|
||||||
@@ -90,7 +84,7 @@ def dispatch_open(fullurl, data=None, timeout=None):
|
|||||||
|
|
||||||
|
|
||||||
#: Dispatches to the correct OpenerDirector.open, based on Spack configuration.
|
#: Dispatches to the correct OpenerDirector.open, based on Spack configuration.
|
||||||
urlopen = llnl.util.lang.Singleton(_urlopen)
|
urlopen = lang.Singleton(_urlopen)
|
||||||
|
|
||||||
#: User-Agent used in Request objects
|
#: User-Agent used in Request objects
|
||||||
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
|
SPACK_USER_AGENT = "Spackbot/{0}".format(spack.spack_version)
|
||||||
@@ -190,14 +184,14 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non
|
|||||||
while remote_path.startswith("/"):
|
while remote_path.startswith("/"):
|
||||||
remote_path = remote_path[1:]
|
remote_path = remote_path[1:]
|
||||||
|
|
||||||
s3 = s3_util.get_s3_session(remote_url, method="push")
|
s3 = get_s3_session(remote_url, method="push")
|
||||||
s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args)
|
s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args)
|
||||||
|
|
||||||
if not keep_original:
|
if not keep_original:
|
||||||
os.remove(local_file_path)
|
os.remove(local_file_path)
|
||||||
|
|
||||||
elif remote_url.scheme == "gs":
|
elif remote_url.scheme == "gs":
|
||||||
gcs = gcs_util.GCSBlob(remote_url)
|
gcs = GCSBlob(remote_url)
|
||||||
gcs.upload_to_blob(local_file_path)
|
gcs.upload_to_blob(local_file_path)
|
||||||
if not keep_original:
|
if not keep_original:
|
||||||
os.remove(local_file_path)
|
os.remove(local_file_path)
|
||||||
@@ -427,7 +421,7 @@ def remove_url(url, recursive=False):
|
|||||||
|
|
||||||
if url.scheme == "s3":
|
if url.scheme == "s3":
|
||||||
# Try to find a mirror for potential connection information
|
# Try to find a mirror for potential connection information
|
||||||
s3 = s3_util.get_s3_session(url, method="push")
|
s3 = get_s3_session(url, method="push")
|
||||||
bucket = url.netloc
|
bucket = url.netloc
|
||||||
if recursive:
|
if recursive:
|
||||||
# Because list_objects_v2 can only return up to 1000 items
|
# Because list_objects_v2 can only return up to 1000 items
|
||||||
@@ -460,10 +454,10 @@ def remove_url(url, recursive=False):
|
|||||||
|
|
||||||
elif url.scheme == "gs":
|
elif url.scheme == "gs":
|
||||||
if recursive:
|
if recursive:
|
||||||
bucket = gcs_util.GCSBucket(url)
|
bucket = GCSBucket(url)
|
||||||
bucket.destroy(recursive=recursive)
|
bucket.destroy(recursive=recursive)
|
||||||
else:
|
else:
|
||||||
blob = gcs_util.GCSBlob(url)
|
blob = GCSBlob(url)
|
||||||
blob.delete_blob()
|
blob.delete_blob()
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -538,14 +532,14 @@ def list_url(url, recursive=False):
|
|||||||
]
|
]
|
||||||
|
|
||||||
if url.scheme == "s3":
|
if url.scheme == "s3":
|
||||||
s3 = s3_util.get_s3_session(url, method="fetch")
|
s3 = get_s3_session(url, method="fetch")
|
||||||
if recursive:
|
if recursive:
|
||||||
return list(_iter_s3_prefix(s3, url))
|
return list(_iter_s3_prefix(s3, url))
|
||||||
|
|
||||||
return list(set(key.split("/", 1)[0] for key in _iter_s3_prefix(s3, url)))
|
return list(set(key.split("/", 1)[0] for key in _iter_s3_prefix(s3, url)))
|
||||||
|
|
||||||
elif url.scheme == "gs":
|
elif url.scheme == "gs":
|
||||||
gcs = gcs_util.GCSBucket(url)
|
gcs = GCSBucket(url)
|
||||||
return gcs.get_all_blobs(recursive=recursive)
|
return gcs.get_all_blobs(recursive=recursive)
|
||||||
|
|
||||||
|
|
||||||
@@ -636,7 +630,7 @@ def _spider(url, collect_nested):
|
|||||||
links.add(abs_link)
|
links.add(abs_link)
|
||||||
|
|
||||||
# Skip stuff that looks like an archive
|
# Skip stuff that looks like an archive
|
||||||
if any(raw_link.endswith(s) for s in ALLOWED_ARCHIVE_TYPES):
|
if any(raw_link.endswith(s) for s in llnl.url.ALLOWED_ARCHIVE_TYPES):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip already-visited links
|
# Skip already-visited links
|
||||||
@@ -696,7 +690,7 @@ def _spider(url, collect_nested):
|
|||||||
current_depth, depth, len(spider_args)
|
current_depth, depth, len(spider_args)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
results = tp.map(llnl.util.lang.star(_spider), spider_args)
|
results = tp.map(lang.star(_spider), spider_args)
|
||||||
spider_args = []
|
spider_args = []
|
||||||
collect = current_depth < depth
|
collect = current_depth < depth
|
||||||
for sub_pages, sub_links, sub_spider_args in results:
|
for sub_pages, sub_links, sub_spider_args in results:
|
||||||
@@ -713,123 +707,6 @@ def _spider(url, collect_nested):
|
|||||||
return pages, links
|
return pages, links
|
||||||
|
|
||||||
|
|
||||||
def find_versions_of_archive(
|
|
||||||
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
|
|
||||||
):
|
|
||||||
"""Scrape web pages for new versions of a tarball. This function prefers URLs in the
|
|
||||||
following order: links found on the scraped page that match a url generated by the
|
|
||||||
reference package, found and in the archive_urls list, found and derived from those
|
|
||||||
in the archive_urls list, and if none are found for a version then the item in the
|
|
||||||
archive_urls list is included for the version.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
archive_urls (str or list or tuple): URL or sequence of URLs for
|
|
||||||
different versions of a package. Typically these are just the
|
|
||||||
tarballs from the package file itself. By default, this searches
|
|
||||||
the parent directories of archives.
|
|
||||||
list_url (str or None): URL for a listing of archives.
|
|
||||||
Spack will scrape these pages for download links that look
|
|
||||||
like the archive URL.
|
|
||||||
list_depth (int): max depth to follow links on list_url pages.
|
|
||||||
Defaults to 0.
|
|
||||||
concurrency (int): maximum number of concurrent requests
|
|
||||||
reference_package (spack.package_base.PackageBase or None): a spack package
|
|
||||||
used as a reference for url detection. Uses the url_for_version
|
|
||||||
method on the package to produce reference urls which, if found,
|
|
||||||
are preferred.
|
|
||||||
"""
|
|
||||||
if not isinstance(archive_urls, (list, tuple)):
|
|
||||||
archive_urls = [archive_urls]
|
|
||||||
|
|
||||||
# Generate a list of list_urls based on archive urls and any
|
|
||||||
# explicitly listed list_url in the package
|
|
||||||
list_urls = set()
|
|
||||||
if list_url is not None:
|
|
||||||
list_urls.add(list_url)
|
|
||||||
for aurl in archive_urls:
|
|
||||||
list_urls |= spack.url.find_list_urls(aurl)
|
|
||||||
|
|
||||||
# Add '/' to the end of the URL. Some web servers require this.
|
|
||||||
additional_list_urls = set()
|
|
||||||
for lurl in list_urls:
|
|
||||||
if not lurl.endswith("/"):
|
|
||||||
additional_list_urls.add(lurl + "/")
|
|
||||||
list_urls |= additional_list_urls
|
|
||||||
|
|
||||||
# Grab some web pages to scrape.
|
|
||||||
pages, links = spider(list_urls, depth=list_depth, concurrency=concurrency)
|
|
||||||
|
|
||||||
# Scrape them for archive URLs
|
|
||||||
regexes = []
|
|
||||||
for aurl in archive_urls:
|
|
||||||
# This creates a regex from the URL with a capture group for
|
|
||||||
# the version part of the URL. The capture group is converted
|
|
||||||
# to a generic wildcard, so we can use this to extract things
|
|
||||||
# on a page that look like archive URLs.
|
|
||||||
url_regex = spack.url.wildcard_version(aurl)
|
|
||||||
|
|
||||||
# We'll be a bit more liberal and just look for the archive
|
|
||||||
# part, not the full path.
|
|
||||||
# this is a URL so it is a posixpath even on Windows
|
|
||||||
url_regex = PurePosixPath(url_regex).name
|
|
||||||
|
|
||||||
# We need to add a / to the beginning of the regex to prevent
|
|
||||||
# Spack from picking up similarly named packages like:
|
|
||||||
# https://cran.r-project.org/src/contrib/pls_2.6-0.tar.gz
|
|
||||||
# https://cran.r-project.org/src/contrib/enpls_5.7.tar.gz
|
|
||||||
# https://cran.r-project.org/src/contrib/autopls_1.3.tar.gz
|
|
||||||
# https://cran.r-project.org/src/contrib/matrixpls_1.0.4.tar.gz
|
|
||||||
url_regex = "/" + url_regex
|
|
||||||
|
|
||||||
# We need to add a $ anchor to the end of the regex to prevent
|
|
||||||
# Spack from picking up signature files like:
|
|
||||||
# .asc
|
|
||||||
# .md5
|
|
||||||
# .sha256
|
|
||||||
# .sig
|
|
||||||
# However, SourceForge downloads still need to end in '/download'.
|
|
||||||
url_regex += r"(\/download)?"
|
|
||||||
# PyPI adds #sha256=... to the end of the URL
|
|
||||||
url_regex += "(#sha256=.*)?"
|
|
||||||
url_regex += "$"
|
|
||||||
|
|
||||||
regexes.append(url_regex)
|
|
||||||
|
|
||||||
# Build a dict version -> URL from any links that match the wildcards.
|
|
||||||
# Walk through archive_url links first.
|
|
||||||
# Any conflicting versions will be overwritten by the list_url links.
|
|
||||||
versions = {}
|
|
||||||
matched = set()
|
|
||||||
for url in sorted(links):
|
|
||||||
url = convert_to_posix_path(url)
|
|
||||||
if any(re.search(r, url) for r in regexes):
|
|
||||||
try:
|
|
||||||
ver = spack.url.parse_version(url)
|
|
||||||
if ver in matched:
|
|
||||||
continue
|
|
||||||
versions[ver] = url
|
|
||||||
# prevent this version from getting overwritten
|
|
||||||
if reference_package is not None:
|
|
||||||
if url == reference_package.url_for_version(ver):
|
|
||||||
matched.add(ver)
|
|
||||||
else:
|
|
||||||
extrapolated_urls = [
|
|
||||||
spack.url.substitute_version(u, ver) for u in archive_urls
|
|
||||||
]
|
|
||||||
if url in extrapolated_urls:
|
|
||||||
matched.add(ver)
|
|
||||||
except spack.url.UndetectableVersionError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for url in archive_urls:
|
|
||||||
url = convert_to_posix_path(url)
|
|
||||||
ver = spack.url.parse_version(url)
|
|
||||||
if ver not in versions:
|
|
||||||
versions[ver] = url
|
|
||||||
|
|
||||||
return versions
|
|
||||||
|
|
||||||
|
|
||||||
def get_header(headers, header_name):
|
def get_header(headers, header_name):
|
||||||
"""Looks up a dict of headers for the given header value.
|
"""Looks up a dict of headers for the given header value.
|
||||||
|
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
#
|
#
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
import spack.util.web
|
import spack.url
|
||||||
from spack.package import *
|
from spack.package import *
|
||||||
|
|
||||||
|
|
||||||
@@ -120,9 +120,7 @@ def fetch_remote_versions(self, *args, **kwargs):
|
|||||||
return dict(
|
return dict(
|
||||||
map(
|
map(
|
||||||
lambda u: (u, self.url_for_version(u)),
|
lambda u: (u, self.url_for_version(u)),
|
||||||
spack.util.web.find_versions_of_archive(
|
spack.url.find_versions_of_archive(self.all_urls, self.list_url, self.list_depth),
|
||||||
self.all_urls, self.list_url, self.list_depth
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user