Factor out URL fetching into URLFetchStrategy

- Added FetchStrategy class to Spack
- Isolated pieces that need to be separate from Stage for git/svn/http
- Added URLFetchStrategy for curl-based fetching.
This commit is contained in:
Todd Gamblin 2014-08-25 01:11:12 -07:00
parent 74a603dcd3
commit 52d140c337
5 changed files with 308 additions and 115 deletions

View File

@ -0,0 +1,222 @@
##############################################################################
# Copyright (c) 2013, Lawrence Livermore National Security, LLC.
# Produced at the Lawrence Livermore National Laboratory.
#
# This file is part of Spack.
# Written by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
# LLNL-CODE-647188
#
# For details, see https://scalability-llnl.github.io/spack
# Please also see the LICENSE file for our notice and the LGPL.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License (as published by
# the Free Software Foundation) version 2.1 dated February 1999.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
# conditions of the GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
"""
Fetch strategies are used to download source code into a staging area
in order to build it. They need to define the following methods:
* fetch()
This should attempt to download/check out source from somewhere.
* check()
Apply a checksum to the downloaded source code, e.g. for an archive.
May not do anything if the fetch method was safe to begin with.
* expand()
Expand (e.g., an archive) downloaded file to source.
* reset()
Restore original state of downloaded code. Used by clean commands.
This may just remove the expanded source and re-expand an archive,
or it may run something like git reset --hard.
"""
import os
import re
import shutil
import llnl.util.tty as tty
import spack
import spack.error
import spack.util.crypto as crypto
from spack.util.compression import decompressor_for
class FetchStrategy(object):
def __init__(self):
# The stage is initialized late, so that fetch strategies can be constructed
# at package construction time. This is where things will be fetched.
self.stage = None
def set_stage(self, stage):
"""This is called by Stage before any of the fetching
methods are called on the stage."""
self.stage = stage
# Subclasses need to implement tehse methods
def fetch(self): pass # Return True on success, False on fail
def check(self): pass
def expand(self): pass
def reset(self): pass
def __str__(self): pass
class URLFetchStrategy(FetchStrategy):
def __init__(self, url, digest=None):
super(URLFetchStrategy, self).__init__()
self.url = url
self.digest = digest
def fetch(self):
assert(self.stage)
self.stage.chdir()
if self.archive_file:
tty.msg("Already downloaded %s." % self.archive_file)
return
tty.msg("Trying to fetch from %s" % self.url)
# Run curl but grab the mime type from the http headers
headers = spack.curl('-#', # status bar
'-O', # save file to disk
'-D', '-', # print out HTML headers
'-L', self.url,
return_output=True, fail_on_error=False)
if spack.curl.returncode != 0:
# clean up archive on failure.
if self.archive_file:
os.remove(self.archive_file)
if spack.curl.returncode == 60:
# This is a certificate error. Suggest spack -k
raise FailedDownloadError(
self.url,
"Curl was unable to fetch due to invalid certificate. "
"This is either an attack, or your cluster's SSL configuration "
"is bad. If you believe your SSL configuration is bad, you "
"can try running spack -k, which will not check SSL certificates."
"Use this at your own risk.")
# Check if we somehow got an HTML file rather than the archive we
# asked for. We only look at the last content type, to handle
# redirects properly.
content_types = re.findall(r'Content-Type:[^\r\n]+', headers)
if content_types and 'text/html' in content_types[-1]:
tty.warn("The contents of " + self.archive_file + " look like HTML.",
"The checksum will likely be bad. If it is, you can use",
"'spack clean --dist' to remove the bad archive, then fix",
"your internet gateway issue and install again.")
if not self.archive_file:
raise FailedDownloadError(self.url)
@property
def archive_file(self):
"""Path to the source archive within this stage directory."""
assert(self.stage)
path = os.path.join(self.stage.path, os.path.basename(self.url))
return path if os.path.exists(path) else None
def expand(self):
assert(self.stage)
tty.msg("Staging archive: %s" % self.archive_file)
self.stage.chdir()
if not self.archive_file:
raise NoArchiveFileError("URLFetchStrategy couldn't find archive file",
"Failed on expand() for URL %s" % self.url)
print self.archive_file
decompress = decompressor_for(self.archive_file)
decompress(self.archive_file)
def check(self):
"""Check the downloaded archive against a checksum digest.
No-op if this stage checks code out of a repository."""
assert(self.stage)
if not self.digest:
raise NoDigestError("Attempt to check URLFetchStrategy with no digest.")
checker = crypto.Checker(digest)
if not checker.check(self.archive_file):
raise ChecksumError(
"%s checksum failed for %s." % (checker.hash_name, self.archive_file),
"Expected %s but got %s." % (digest, checker.sum))
def reset(self):
"""Removes the source path if it exists, then re-expands the archive."""
assert(self.stage)
if not self.archive_file:
raise NoArchiveFileError("Tried to reset URLFetchStrategy before fetching",
"Failed on reset() for URL %s" % self.url)
if self.stage.source_path:
shutil.rmtree(self.stage.source_path, ignore_errors=True)
self.expand()
def __str__(self):
return self.url
class GitFetchStrategy(FetchStrategy):
pass
class SvnFetchStrategy(FetchStrategy):
pass
def strategy_for_url(url):
"""Given a URL, find an appropriate fetch strategy for it.
Currently just gives you a URLFetchStrategy that uses curl.
TODO: make this return appropriate fetch strategies for other
types of URLs.
"""
return URLFetchStrategy(url)
class FetchStrategyError(spack.error.SpackError):
def __init__(self, msg, long_msg):
super(FetchStrategyError, self).__init__(msg, long_msg)
class FailedDownloadError(FetchStrategyError):
"""Raised wen a download fails."""
def __init__(self, url, msg=""):
super(FailedDownloadError, self).__init__(
"Failed to fetch file from URL: %s" % url, msg)
self.url = url
class NoArchiveFileError(FetchStrategyError):
def __init__(self, msg, long_msg):
super(NoArchiveFileError, self).__init__(msg, long_msg)
class NoDigestError(FetchStrategyError):
def __init__(self, msg, long_msg):
super(NoDigestError, self).__init__(msg, long_msg)

View File

@ -337,7 +337,7 @@ def __init__(self, spec):
# Sanity check some required variables that could be
# overridden by package authors.
def sanity_check_dict(attr_name):
def ensure_has_dict(attr_name):
if not hasattr(self, attr_name):
raise PackageError("Package %s must define %s" % attr_name)
@ -345,10 +345,10 @@ def sanity_check_dict(attr_name):
if not isinstance(attr, dict):
raise PackageError("Package %s has non-dict %s attribute!"
% (self.name, attr_name))
sanity_check_dict('versions')
sanity_check_dict('dependencies')
sanity_check_dict('conflicted')
sanity_check_dict('patches')
ensure_has_dict('versions')
ensure_has_dict('dependencies')
ensure_has_dict('conflicted')
ensure_has_dict('patches')
# Check versions in the versions dict.
for v in self.versions:
@ -362,9 +362,8 @@ def sanity_check_dict(attr_name):
# Version-ize the keys in versions dict
try:
self.versions = dict((Version(v), h) for v,h in self.versions.items())
except ValueError:
raise ValueError("Keys of versions dict in package %s must be versions!"
% self.name)
except ValueError, e:
raise ValueError("In package %s: %s" % (self.name, e.message))
# stage used to build this package.
self._stage = None
@ -600,9 +599,8 @@ def do_stage(self):
self.do_fetch()
archive_dir = self.stage.expanded_archive_path
archive_dir = self.stage.source_path
if not archive_dir:
tty.msg("Staging archive: %s" % self.stage.archive_file)
self.stage.expand_archive()
tty.msg("Created stage directory in %s." % self.stage.path)
else:
@ -620,7 +618,7 @@ def do_patch(self):
# Construct paths to special files in the archive dir used to
# keep track of whether patches were successfully applied.
archive_dir = self.stage.expanded_archive_path
archive_dir = self.stage.source_path
good_file = join_path(archive_dir, '.spack_patched')
bad_file = join_path(archive_dir, '.spack_patch_failed')

View File

@ -95,7 +95,7 @@ def __init__(self, checksum, url):
def version(ver, checksum, **kwargs):
"""Adds a version and associated metadata to the package."""
"""Adds a version and metadata describing how to fetch it."""
pkg = caller_locals()
versions = pkg.setdefault('versions', {})

View File

@ -32,18 +32,20 @@
import spack
import spack.config
from spack.fetch_strategy import strategy_for_url, URLFetchStrategy
import spack.error
import spack.util.crypto as crypto
from spack.util.compression import decompressor_for
STAGE_PREFIX = 'spack-stage-'
class Stage(object):
"""A Stage object manaages a directory where an archive is downloaded,
expanded, and built before being installed. It also handles downloading
the archive. A stage's lifecycle looks like this:
"""A Stage object manaages a directory where some source code is
downloaded and built before being installed. It handles
fetching the source code, either as an archive to be expanded
or by checking it out of a repository. A stage's lifecycle
looks like this:
Stage()
Constructor creates the stage directory.
@ -71,18 +73,24 @@ class Stage(object):
def __init__(self, url, **kwargs):
"""Create a stage object.
Parameters:
url URL of the archive to be downloaded into this stage.
url_or_fetch_strategy
URL of the archive to be downloaded into this stage, OR
a valid FetchStrategy.
name If a name is provided, then this stage is a named stage
and will persist between runs (or if you construct another
stage object later). If name is not provided, then this
stage will be given a unique name automatically.
name
If a name is provided, then this stage is a named stage
and will persist between runs (or if you construct another
stage object later). If name is not provided, then this
stage will be given a unique name automatically.
"""
if isinstance(url, basestring):
self.fetcher = strategy_for_url(url)
self.fetcher.set_stage(self)
self.name = kwargs.get('name')
self.mirror_path = kwargs.get('mirror_path')
self.tmp_root = find_tmp_root()
self.url = url
self.path = None
self._setup()
@ -198,17 +206,17 @@ def archive_file(self):
@property
def expanded_archive_path(self):
"""Returns the path to the expanded archive directory if it's expanded;
None if the archive hasn't been expanded.
"""
if not self.archive_file:
return None
def source_path(self):
"""Returns the path to the expanded/checked out source code
within this fetch strategy's path.
for file in os.listdir(self.path):
archive_path = join_path(self.path, file)
if os.path.isdir(archive_path):
return archive_path
This assumes nothing else is going ot be put in the
FetchStrategy's path. It searches for the first
subdirectory of the path it can find, then returns that.
"""
for p in [os.path.join(self.path, f) for f in os.listdir(self.path)]:
if os.path.isdir(p):
return p
return None
@ -220,71 +228,35 @@ def chdir(self):
tty.die("Setup failed: no such directory: " + self.path)
def fetch_from_url(self, url):
# Run curl but grab the mime type from the http headers
headers = spack.curl('-#', # status bar
'-O', # save file to disk
'-D', '-', # print out HTML headers
'-L', url,
return_output=True, fail_on_error=False)
if spack.curl.returncode != 0:
# clean up archive on failure.
if self.archive_file:
os.remove(self.archive_file)
if spack.curl.returncode == 60:
# This is a certificate error. Suggest spack -k
raise FailedDownloadError(
url,
"Curl was unable to fetch due to invalid certificate. "
"This is either an attack, or your cluster's SSL configuration "
"is bad. If you believe your SSL configuration is bad, you "
"can try running spack -k, which will not check SSL certificates."
"Use this at your own risk.")
# Check if we somehow got an HTML file rather than the archive we
# asked for. We only look at the last content type, to handle
# redirects properly.
content_types = re.findall(r'Content-Type:[^\r\n]+', headers)
if content_types and 'text/html' in content_types[-1]:
tty.warn("The contents of " + self.archive_file + " look like HTML.",
"The checksum will likely be bad. If it is, you can use",
"'spack clean --dist' to remove the bad archive, then fix",
"your internet gateway issue and install again.")
def fetch(self):
"""Downloads the file at URL to the stage. Returns true if it was downloaded,
false if it already existed."""
"""Downloads an archive or checks out code from a repository."""
self.chdir()
if self.archive_file:
tty.msg("Already downloaded %s." % self.archive_file)
else:
urls = [self.url]
if self.mirror_path:
urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()] + urls
fetchers = [self.fetcher]
for url in urls:
tty.msg("Trying to fetch from %s" % url)
self.fetch_from_url(url)
if self.archive_file:
break
# TODO: move mirror logic out of here and clean it up!
if self.mirror_path:
urls = ["%s/%s" % (m, self.mirror_path) for m in _get_mirrors()]
digest = None
if isinstance(self.fetcher, URLFetchStrategy):
digest = self.fetcher.digest
fetchers = [URLFetchStrategy(url, digest) for url in urls] + fetchers
for f in fetchers:
f.set_stage(self)
if not self.archive_file:
raise FailedDownloadError(url)
return self.archive_file
for fetcher in fetchers:
try:
fetcher.fetch()
break
except spack.error.SpackError, e:
tty.msg("Download from %s failed." % fetcher)
continue
def check(self, digest):
"""Check the downloaded archive against a checksum digest"""
checker = crypto.Checker(digest)
if not checker.check(self.archive_file):
raise ChecksumError(
"%s checksum failed for %s." % (checker.hash_name, self.archive_file),
"Expected %s but got %s." % (digest, checker.sum))
"""Check the downloaded archive against a checksum digest.
No-op if this stage checks code out of a repository."""
self.fetcher.check()
def expand_archive(self):
@ -292,19 +264,14 @@ def expand_archive(self):
archive. Fail if the stage is not set up or if the archive is not yet
downloaded.
"""
self.chdir()
if not self.archive_file:
tty.die("Attempt to expand archive before fetching.")
decompress = decompressor_for(self.archive_file)
decompress(self.archive_file)
self.fetcher.expand()
def chdir_to_archive(self):
"""Changes directory to the expanded archive directory.
Dies with an error if there was no expanded archive.
"""
path = self.expanded_archive_path
path = self.source_path
if not path:
tty.die("Attempt to chdir before expanding archive.")
else:
@ -317,12 +284,7 @@ def restage(self):
"""Removes the expanded archive path if it exists, then re-expands
the archive.
"""
if not self.archive_file:
tty.die("Attempt to restage when not staged.")
if self.expanded_archive_path:
shutil.rmtree(self.expanded_archive_path, True)
self.expand_archive()
self.fetcher.reset()
def destroy(self):
@ -393,15 +355,26 @@ def find_tmp_root():
return None
class FailedDownloadError(spack.error.SpackError):
"""Raised wen a download fails."""
def __init__(self, url, msg=""):
super(FailedDownloadError, self).__init__(
"Failed to fetch file from URL: %s" % url, msg)
self.url = url
class StageError(spack.error.SpackError):
def __init__(self, message, long_message=None):
super(self, StageError).__init__(message, long_message)
class ChecksumError(spack.error.SpackError):
class ChecksumError(StageError):
"""Raised when archive fails to checksum."""
def __init__(self, message, long_msg):
def __init__(self, message, long_msg=None):
super(ChecksumError, self).__init__(message, long_msg)
class RestageError(StageError):
def __init__(self, message, long_msg=None):
super(RestageError, self).__init__(message, long_msg)
class ChdirError(StageError):
def __init__(self, message, long_msg=None):
super(ChdirError, self).__init__(message, long_msg)
# Keep this in namespace for convenience
FailedDownloadError = spack.fetch_strategy.FailedDownloadError

View File

@ -146,7 +146,7 @@ def check_fetch(self, stage, stage_name):
stage_path = self.get_stage_path(stage, stage_name)
self.assertTrue(archive_name in os.listdir(stage_path))
self.assertEqual(join_path(stage_path, archive_name),
stage.archive_file)
stage.fetcher.archive_file)
def check_expand_archive(self, stage, stage_name):
@ -156,7 +156,7 @@ def check_expand_archive(self, stage, stage_name):
self.assertEqual(
join_path(stage_path, archive_dir),
stage.expanded_archive_path)
stage.source_path)
readme = join_path(stage_path, archive_dir, readme_name)
self.assertTrue(os.path.isfile(readme))
@ -292,7 +292,7 @@ def test_restage(self):
with closing(open('foobar', 'w')) as file:
file.write("this file is to be destroyed.")
self.assertTrue('foobar' in os.listdir(stage.expanded_archive_path))
self.assertTrue('foobar' in os.listdir(stage.source_path))
# Make sure the file is not there after restage.
stage.restage()
@ -301,7 +301,7 @@ def test_restage(self):
stage.chdir_to_archive()
self.check_chdir_to_archive(stage, stage_name)
self.assertFalse('foobar' in os.listdir(stage.expanded_archive_path))
self.assertFalse('foobar' in os.listdir(stage.source_path))
stage.destroy()
self.check_destroy(stage, stage_name)