Factor out URL fetching into URLFetchStrategy
- Added FetchStrategy class to Spack - Isolated pieces that need to be separate from Stage for git/svn/http - Added URLFetchStrategy for curl-based fetching.
This commit is contained in:
parent
74a603dcd3
commit
52d140c337
222
lib/spack/spack/fetch_strategy.py
Normal file
222
lib/spack/spack/fetch_strategy.py
Normal file
@ -0,0 +1,222 @@
|
||||
##############################################################################
|
||||
# Copyright (c) 2013, Lawrence Livermore National Security, LLC.
|
||||
# Produced at the Lawrence Livermore National Laboratory.
|
||||
#
|
||||
# This file is part of Spack.
|
||||
# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
|
||||
# LLNL-CODE-647188
|
||||
#
|
||||
# For details, see https://scalability-llnl.github.io/spack
|
||||
# Please also see the LICENSE file for our notice and the LGPL.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License (as published by
|
||||
# the Free Software Foundation) version 2.1 dated February 1999.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
|
||||
# conditions of the GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation,
|
||||
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
##############################################################################
|
||||
"""
|
||||
Fetch strategies are used to download source code into a staging area
|
||||
in order to build it. They need to define the following methods:
|
||||
|
||||
* fetch()
|
||||
This should attempt to download/check out source from somewhere.
|
||||
* check()
|
||||
Apply a checksum to the downloaded source code, e.g. for an archive.
|
||||
May not do anything if the fetch method was safe to begin with.
|
||||
* expand()
|
||||
Expand (e.g., an archive) downloaded file to source.
|
||||
* reset()
|
||||
Restore original state of downloaded code. Used by clean commands.
|
||||
This may just remove the expanded source and re-expand an archive,
|
||||
or it may run something like git reset --hard.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
|
||||
import llnl.util.tty as tty
|
||||
|
||||
import spack
|
||||
import spack.error
|
||||
import spack.util.crypto as crypto
|
||||
from spack.util.compression import decompressor_for
|
||||
|
||||
|
||||
class FetchStrategy(object):
|
||||
def __init__(self):
|
||||
# The stage is initialized late, so that fetch strategies can be constructed
|
||||
# at package construction time. This is where things will be fetched.
|
||||
self.stage = None
|
||||
|
||||
|
||||
def set_stage(self, stage):
|
||||
"""This is called by Stage before any of the fetching
|
||||
methods are called on the stage."""
|
||||
self.stage = stage
|
||||
|
||||
|
||||
# Subclasses need to implement tehse methods
|
||||
def fetch(self): pass # Return True on success, False on fail
|
||||
def check(self): pass
|
||||
def expand(self): pass
|
||||
def reset(self): pass
|
||||
def __str__(self): pass
|
||||
|
||||
|
||||
|
||||
class URLFetchStrategy(FetchStrategy):
|
||||
|
||||
def __init__(self, url, digest=None):
|
||||
super(URLFetchStrategy, self).__init__()
|
||||
self.url = url
|
||||
self.digest = digest
|
||||
|
||||
|
||||
def fetch(self):
|
||||
assert(self.stage)
|
||||
|
||||
self.stage.chdir()
|
||||
|
||||
if self.archive_file:
|
||||
tty.msg("Already downloaded %s." % self.archive_file)
|
||||
return
|
||||
|
||||
tty.msg("Trying to fetch from %s" % self.url)
|
||||
|
||||
# Run curl but grab the mime type from the http headers
|
||||
headers = spack.curl('-#', # status bar
|
||||
'-O', # save file to disk
|
||||
'-D', '-', # print out HTML headers
|
||||
'-L', self.url,
|
||||
return_output=True, fail_on_error=False)
|
||||
|
||||
if spack.curl.returncode != 0:
|
||||
# clean up archive on failure.
|
||||
if self.archive_file:
|
||||
os.remove(self.archive_file)
|
||||
|
||||
if spack.curl.returncode == 60:
|
||||
# This is a certificate error. Suggest spack -k
|
||||
raise FailedDownloadError(
|
||||
self.url,
|
||||
"Curl was unable to fetch due to invalid certificate. "
|
||||
"This is either an attack, or your cluster's SSL configuration "
|
||||
"is bad. If you believe your SSL configuration is bad, you "
|
||||
"can try running spack -k, which will not check SSL certificates."
|
||||
"Use this at your own risk.")
|
||||
|
||||
# Check if we somehow got an HTML file rather than the archive we
|
||||
# asked for. We only look at the last content type, to handle
|
||||
# redirects properly.
|
||||
content_types = re.findall(r'Content-Type:[^\r\n]+', headers)
|
||||
if content_types and 'text/html' in content_types[-1]:
|
||||
tty.warn("The contents of " + self.archive_file + " look like HTML.",
|
||||
"The checksum will likely be bad. If it is, you can use",
|
||||
"'spack clean --dist' to remove the bad archive, then fix",
|
||||
"your internet gateway issue and install again.")
|
||||
|
||||
if not self.archive_file:
|
||||
raise FailedDownloadError(self.url)
|
||||
|
||||
|
||||
@property
|
||||
def archive_file(self):
|
||||
"""Path to the source archive within this stage directory."""
|
||||
assert(self.stage)
|
||||
path = os.path.join(self.stage.path, os.path.basename(self.url))
|
||||
return path if os.path.exists(path) else None
|
||||
|
||||
|
||||
def expand(self):
|
||||
assert(self.stage)
|
||||
tty.msg("Staging archive: %s" % self.archive_file)
|
||||
|
||||
self.stage.chdir()
|
||||
if not self.archive_file:
|
||||
raise NoArchiveFileError("URLFetchStrategy couldn't find archive file",
|
||||
"Failed on expand() for URL %s" % self.url)
|
||||
|
||||
print self.archive_file
|
||||
|
||||
decompress = decompressor_for(self.archive_file)
|
||||
decompress(self.archive_file)
|
||||
|
||||
|
||||
def check(self):
|
||||
"""Check the downloaded archive against a checksum digest.
|
||||
No-op if this stage checks code out of a repository."""
|
||||
assert(self.stage)
|
||||
if not self.digest:
|
||||
raise NoDigestError("Attempt to check URLFetchStrategy with no digest.")
|
||||
checker = crypto.Checker(digest)
|
||||
if not checker.check(self.archive_file):
|
||||
raise ChecksumError(
|
||||
"%s checksum failed for %s." % (checker.hash_name, self.archive_file),
|
||||
"Expected %s but got %s." % (digest, checker.sum))
|
||||
|
||||
|
||||
def reset(self):
|
||||
"""Removes the source path if it exists, then re-expands the archive."""
|
||||
assert(self.stage)
|
||||
if not self.archive_file:
|
||||
raise NoArchiveFileError("Tried to reset URLFetchStrategy before fetching",
|
||||
"Failed on reset() for URL %s" % self.url)
|
||||
if self.stage.source_path:
|
||||
shutil.rmtree(self.stage.source_path, ignore_errors=True)
|
||||
self.expand()
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return self.url
|
||||
|
||||
|
||||
|
||||
class GitFetchStrategy(FetchStrategy):
|
||||
pass
|
||||
|
||||
|
||||
class SvnFetchStrategy(FetchStrategy):
|
||||
pass
|
||||
|
||||
|
||||
def strategy_for_url(url):
|
||||
"""Given a URL, find an appropriate fetch strategy for it.
|
||||
Currently just gives you a URLFetchStrategy that uses curl.
|
||||
|
||||
TODO: make this return appropriate fetch strategies for other
|
||||
types of URLs.
|
||||
"""
|
||||
return URLFetchStrategy(url)
|
||||
|
||||
|
||||
class FetchStrategyError(spack.error.SpackError):
|
||||
def __init__(self, msg, long_msg):
|
||||
super(FetchStrategyError, self).__init__(msg, long_msg)
|
||||
|
||||
|
||||
class FailedDownloadError(FetchStrategyError):
|
||||
"""Raised wen a download fails."""
|
||||
def __init__(self, url, msg=""):
|
||||
super(FailedDownloadError, self).__init__(
|
||||
"Failed to fetch file from URL: %s" % url, msg)
|
||||
self.url = url
|
||||
|
||||
|
||||
class NoArchiveFileError(FetchStrategyError):
|
||||
def __init__(self, msg, long_msg):
|
||||
super(NoArchiveFileError, self).__init__(msg, long_msg)
|
||||
|
||||
|
||||
class NoDigestError(FetchStrategyError):
|
||||
def __init__(self, msg, long_msg):
|
||||
super(NoDigestError, self).__init__(msg, long_msg)
|
||||
|
||||
|
@ -337,7 +337,7 @@ def __init__(self, spec):
|
||||
|
||||
# Sanity check some required variables that could be
|
||||
# overridden by package authors.
|
||||
def sanity_check_dict(attr_name):
|
||||
def ensure_has_dict(attr_name):
|
||||
if not hasattr(self, attr_name):
|
||||
raise PackageError("Package %s must define %s" % attr_name)
|
||||
|
||||
@ -345,10 +345,10 @@ def sanity_check_dict(attr_name):
|
||||
if not isinstance(attr, dict):
|
||||
raise PackageError("Package %s has non-dict %s attribute!"
|
||||
% (self.name, attr_name))
|
||||
sanity_check_dict('versions')
|
||||
sanity_check_dict('dependencies')
|
||||
sanity_check_dict('conflicted')
|
||||
sanity_check_dict('patches')
|
||||
ensure_has_dict('versions')
|
||||
ensure_has_dict('dependencies')
|
||||
ensure_has_dict('conflicted')
|
||||
ensure_has_dict('patches')
|
||||
|
||||
# Check versions in the versions dict.
|
||||
for v in self.versions:
|
||||
@ -362,9 +362,8 @@ def sanity_check_dict(attr_name):
|
||||
# Version-ize the keys in versions dict
|
||||
try:
|
||||
self.versions = dict((Version(v), h) for v,h in self.versions.items())
|
||||
except ValueError:
|
||||
raise ValueError("Keys of versions dict in package %s must be versions!"
|
||||
% self.name)
|
||||
except ValueError, e:
|
||||
raise ValueError("In package %s: %s" % (self.name, e.message))
|
||||
|
||||
# stage used to build this package.
|
||||
self._stage = None
|
||||
@ -600,9 +599,8 @@ def do_stage(self):
|
||||
|
||||
self.do_fetch()
|
||||
|
||||
archive_dir = self.stage.expanded_archive_path
|
||||
archive_dir = self.stage.source_path
|
||||
if not archive_dir:
|
||||
tty.msg("Staging archive: %s" % self.stage.archive_file)
|
||||
self.stage.expand_archive()
|
||||
tty.msg("Created stage directory in %s." % self.stage.path)
|
||||
else:
|
||||
@ -620,7 +618,7 @@ def do_patch(self):
|
||||
|
||||
# Construct paths to special files in the archive dir used to
|
||||
# keep track of whether patches were successfully applied.
|
||||
archive_dir = self.stage.expanded_archive_path
|
||||
archive_dir = self.stage.source_path
|
||||
good_file = join_path(archive_dir, '.spack_patched')
|
||||
bad_file = join_path(archive_dir, '.spack_patch_failed')
|
||||
|
||||
|
@ -95,7 +95,7 @@ def __init__(self, checksum, url):
|
||||
|
||||
|
||||
def version(ver, checksum, **kwargs):
|
||||
"""Adds a version and associated metadata to the package."""
|
||||
"""Adds a version and metadata describing how to fetch it."""
|
||||
pkg = caller_locals()
|
||||
|
||||
versions = pkg.setdefault('versions', {})
|
||||
|
@ -32,18 +32,20 @@
|
||||
|
||||
import spack
|
||||
import spack.config
|
||||
from spack.fetch_strategy import strategy_for_url, URLFetchStrategy
|
||||
import spack.error
|
||||
import spack.util.crypto as crypto
|
||||
from spack.util.compression import decompressor_for
|
||||
|
||||
|
||||
|
||||
STAGE_PREFIX = 'spack-stage-'
|
||||
|
||||
|
||||
class Stage(object):
|
||||
"""A Stage object manaages a directory where an archive is downloaded,
|
||||
expanded, and built before being installed. It also handles downloading
|
||||
the archive. A stage's lifecycle looks like this:
|
||||
"""A Stage object manaages a directory where some source code is
|
||||
downloaded and built before being installed. It handles
|
||||
fetching the source code, either as an archive to be expanded
|
||||
or by checking it out of a repository. A stage's lifecycle
|
||||
looks like this:
|
||||
|
||||
Stage()
|
||||
Constructor creates the stage directory.
|
||||
@ -71,18 +73,24 @@ class Stage(object):
|
||||
def __init__(self, url, **kwargs):
|
||||
"""Create a stage object.
|
||||
Parameters:
|
||||
url URL of the archive to be downloaded into this stage.
|
||||
url_or_fetch_strategy
|
||||
URL of the archive to be downloaded into this stage, OR
|
||||
a valid FetchStrategy.
|
||||
|
||||
name If a name is provided, then this stage is a named stage
|
||||
and will persist between runs (or if you construct another
|
||||
stage object later). If name is not provided, then this
|
||||
stage will be given a unique name automatically.
|
||||
name
|
||||
If a name is provided, then this stage is a named stage
|
||||
and will persist between runs (or if you construct another
|
||||
stage object later). If name is not provided, then this
|
||||
stage will be given a unique name automatically.
|
||||
"""
|
||||
if isinstance(url, basestring):
|
||||
self.fetcher = strategy_for_url(url)
|
||||
self.fetcher.set_stage(self)
|
||||
|
||||
self.name = kwargs.get('name')
|
||||
self.mirror_path = kwargs.get('mirror_path')
|
||||
|
||||
self.tmp_root = find_tmp_root()
|
||||
self.url = url
|
||||
|
||||
self.path = None
|
||||
self._setup()
|
||||
@ -198,17 +206,17 @@ def archive_file(self):
|
||||
|
||||
|
||||
@property
|
||||
def expanded_archive_path(self):
|
||||
"""Returns the path to the expanded archive directory if it's expanded;
|
||||
None if the archive hasn't been expanded.
|
||||
"""
|
||||
if not self.archive_file:
|
||||
return None
|
||||
def source_path(self):
|
||||
"""Returns the path to the expanded/checked out source code
|
||||
within this fetch strategy's path.
|
||||
|
||||
for file in os.listdir(self.path):
|
||||
archive_path = join_path(self.path, file)
|
||||
if os.path.isdir(archive_path):
|
||||
return archive_path
|
||||
This assumes nothing else is going ot be put in the
|
||||
FetchStrategy's path. It searches for the first
|
||||
subdirectory of the path it can find, then returns that.
|
||||
"""
|
||||
for p in [os.path.join(self.path, f) for f in os.listdir(self.path)]:
|
||||
if os.path.isdir(p):
|
||||
return p
|
||||
return None
|
||||
|
||||
|
||||
@ -220,71 +228,35 @@ def chdir(self):
|
||||
tty.die("Setup failed: no such directory: " + self.path)
|
||||
|
||||
|
||||
def fetch_from_url(self, url):
|
||||
# Run curl but grab the mime type from the http headers
|
||||
headers = spack.curl('-#', # status bar
|
||||
'-O', # save file to disk
|
||||
'-D', '-', # print out HTML headers
|
||||
'-L', url,
|
||||
return_output=True, fail_on_error=False)
|
||||
|
||||
if spack.curl.returncode != 0:
|
||||
# clean up archive on failure.
|
||||
if self.archive_file:
|
||||
os.remove(self.archive_file)
|
||||
|
||||
if spack.curl.returncode == 60:
|
||||
# This is a certificate error. Suggest spack -k
|
||||
raise FailedDownloadError(
|
||||
url,
|
||||
"Curl was unable to fetch due to invalid certificate. "
|
||||
"This is either an attack, or your cluster's SSL configuration "
|
||||
"is bad. If you believe your SSL configuration is bad, you "
|
||||
"can try running spack -k, which will not check SSL certificates."
|
||||
"Use this at your own risk.")
|
||||
|
||||
# Check if we somehow got an HTML file rather than the archive we
|
||||
# asked for. We only look at the last content type, to handle
|
||||
# redirects properly.
|
||||
content_types = re.findall(r'Content-Type:[^\r\n]+', headers)
|
||||
if content_types and 'text/html' in content_types[-1]:
|
||||
tty.warn("The contents of " + self.archive_file + " look like HTML.",
|
||||
"The checksum will likely be bad. If it is, you can use",
|
||||
"'spack clean --dist' to remove the bad archive, then fix",
|
||||
"your internet gateway issue and install again.")
|
||||
|
||||
|
||||
def fetch(self):
|
||||
"""Downloads the file at URL to the stage. Returns true if it was downloaded,
|
||||
false if it already existed."""
|
||||
"""Downloads an archive or checks out code from a repository."""
|
||||
self.chdir()
|
||||
if self.archive_file:
|
||||
tty.msg("Already downloaded %s." % self.archive_file)
|
||||
|
||||
else:
|
||||
urls = [self.url]
|
||||
if self.mirror_path:
|
||||
urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()] + urls
|
||||
fetchers = [self.fetcher]
|
||||
|
||||
for url in urls:
|
||||
tty.msg("Trying to fetch from %s" % url)
|
||||
self.fetch_from_url(url)
|
||||
if self.archive_file:
|
||||
break
|
||||
# TODO: move mirror logic out of here and clean it up!
|
||||
if self.mirror_path:
|
||||
urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()]
|
||||
digest = None
|
||||
if isinstance(self.fetcher, URLFetchStrategy):
|
||||
digest = self.fetcher.digest
|
||||
fetchers = [URLFetchStrategy(url, digest) for url in urls] + fetchers
|
||||
for f in fetchers:
|
||||
f.set_stage(self)
|
||||
|
||||
if not self.archive_file:
|
||||
raise FailedDownloadError(url)
|
||||
|
||||
return self.archive_file
|
||||
for fetcher in fetchers:
|
||||
try:
|
||||
fetcher.fetch()
|
||||
break
|
||||
except spack.error.SpackError, e:
|
||||
tty.msg("Download from %s failed." % fetcher)
|
||||
continue
|
||||
|
||||
|
||||
def check(self, digest):
|
||||
"""Check the downloaded archive against a checksum digest"""
|
||||
checker = crypto.Checker(digest)
|
||||
if not checker.check(self.archive_file):
|
||||
raise ChecksumError(
|
||||
"%s checksum failed for %s." % (checker.hash_name, self.archive_file),
|
||||
"Expected %s but got %s." % (digest, checker.sum))
|
||||
"""Check the downloaded archive against a checksum digest.
|
||||
No-op if this stage checks code out of a repository."""
|
||||
self.fetcher.check()
|
||||
|
||||
|
||||
def expand_archive(self):
|
||||
@ -292,19 +264,14 @@ def expand_archive(self):
|
||||
archive. Fail if the stage is not set up or if the archive is not yet
|
||||
downloaded.
|
||||
"""
|
||||
self.chdir()
|
||||
if not self.archive_file:
|
||||
tty.die("Attempt to expand archive before fetching.")
|
||||
|
||||
decompress = decompressor_for(self.archive_file)
|
||||
decompress(self.archive_file)
|
||||
self.fetcher.expand()
|
||||
|
||||
|
||||
def chdir_to_archive(self):
|
||||
"""Changes directory to the expanded archive directory.
|
||||
Dies with an error if there was no expanded archive.
|
||||
"""
|
||||
path = self.expanded_archive_path
|
||||
path = self.source_path
|
||||
if not path:
|
||||
tty.die("Attempt to chdir before expanding archive.")
|
||||
else:
|
||||
@ -317,12 +284,7 @@ def restage(self):
|
||||
"""Removes the expanded archive path if it exists, then re-expands
|
||||
the archive.
|
||||
"""
|
||||
if not self.archive_file:
|
||||
tty.die("Attempt to restage when not staged.")
|
||||
|
||||
if self.expanded_archive_path:
|
||||
shutil.rmtree(self.expanded_archive_path, True)
|
||||
self.expand_archive()
|
||||
self.fetcher.reset()
|
||||
|
||||
|
||||
def destroy(self):
|
||||
@ -393,15 +355,26 @@ def find_tmp_root():
|
||||
return None
|
||||
|
||||
|
||||
class FailedDownloadError(spack.error.SpackError):
|
||||
"""Raised wen a download fails."""
|
||||
def __init__(self, url, msg=""):
|
||||
super(FailedDownloadError, self).__init__(
|
||||
"Failed to fetch file from URL: %s" % url, msg)
|
||||
self.url = url
|
||||
class StageError(spack.error.SpackError):
|
||||
def __init__(self, message, long_message=None):
|
||||
super(self, StageError).__init__(message, long_message)
|
||||
|
||||
|
||||
class ChecksumError(spack.error.SpackError):
|
||||
class ChecksumError(StageError):
|
||||
"""Raised when archive fails to checksum."""
|
||||
def __init__(self, message, long_msg):
|
||||
def __init__(self, message, long_msg=None):
|
||||
super(ChecksumError, self).__init__(message, long_msg)
|
||||
|
||||
|
||||
class RestageError(StageError):
|
||||
def __init__(self, message, long_msg=None):
|
||||
super(RestageError, self).__init__(message, long_msg)
|
||||
|
||||
|
||||
class ChdirError(StageError):
|
||||
def __init__(self, message, long_msg=None):
|
||||
super(ChdirError, self).__init__(message, long_msg)
|
||||
|
||||
|
||||
# Keep this in namespace for convenience
|
||||
FailedDownloadError = spack.fetch_strategy.FailedDownloadError
|
||||
|
@ -146,7 +146,7 @@ def check_fetch(self, stage, stage_name):
|
||||
stage_path = self.get_stage_path(stage, stage_name)
|
||||
self.assertTrue(archive_name in os.listdir(stage_path))
|
||||
self.assertEqual(join_path(stage_path, archive_name),
|
||||
stage.archive_file)
|
||||
stage.fetcher.archive_file)
|
||||
|
||||
|
||||
def check_expand_archive(self, stage, stage_name):
|
||||
@ -156,7 +156,7 @@ def check_expand_archive(self, stage, stage_name):
|
||||
|
||||
self.assertEqual(
|
||||
join_path(stage_path, archive_dir),
|
||||
stage.expanded_archive_path)
|
||||
stage.source_path)
|
||||
|
||||
readme = join_path(stage_path, archive_dir, readme_name)
|
||||
self.assertTrue(os.path.isfile(readme))
|
||||
@ -292,7 +292,7 @@ def test_restage(self):
|
||||
with closing(open('foobar', 'w')) as file:
|
||||
file.write("this file is to be destroyed.")
|
||||
|
||||
self.assertTrue('foobar' in os.listdir(stage.expanded_archive_path))
|
||||
self.assertTrue('foobar' in os.listdir(stage.source_path))
|
||||
|
||||
# Make sure the file is not there after restage.
|
||||
stage.restage()
|
||||
@ -301,7 +301,7 @@ def test_restage(self):
|
||||
|
||||
stage.chdir_to_archive()
|
||||
self.check_chdir_to_archive(stage, stage_name)
|
||||
self.assertFalse('foobar' in os.listdir(stage.expanded_archive_path))
|
||||
self.assertFalse('foobar' in os.listdir(stage.source_path))
|
||||
|
||||
stage.destroy()
|
||||
self.check_destroy(stage, stage_name)
|
||||
|
Loading…
Reference in New Issue
Block a user