Added web spider capability for listing versions.
This commit is contained in:
		@@ -30,6 +30,8 @@ parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
 | 
			
		||||
                    help="print additional output during builds")
 | 
			
		||||
parser.add_argument('-d', '--debug', action='store_true', dest='debug',
 | 
			
		||||
                    help="write out debug logs during compile")
 | 
			
		||||
parser.add_argument('-m', '--mock', action='store_true', dest='mock',
 | 
			
		||||
                    help="Use mock packages instead of real ones.")
 | 
			
		||||
 | 
			
		||||
# each command module implements a parser() function, to which we pass its
 | 
			
		||||
# subparser for setup.
 | 
			
		||||
@@ -46,6 +48,10 @@ args = parser.parse_args()
 | 
			
		||||
# Set up environment based on args.
 | 
			
		||||
spack.verbose = args.verbose
 | 
			
		||||
spack.debug = args.debug
 | 
			
		||||
if args.mock:
 | 
			
		||||
    from spack.util.filesystem import new_path
 | 
			
		||||
    mock_path = new_path(spack.module_path, 'test', 'mock_packages')
 | 
			
		||||
    spack.packages_path = mock_path
 | 
			
		||||
 | 
			
		||||
# Try to load the particular command asked for and run it
 | 
			
		||||
command = spack.cmd.get_command(args.command)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										63
									
								
								lib/spack/spack/cmd/checksum.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								lib/spack/spack/cmd/checksum.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,63 @@
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import argparse
 | 
			
		||||
from pprint import pprint
 | 
			
		||||
from subprocess import CalledProcessError
 | 
			
		||||
 | 
			
		||||
import spack.tty as tty
 | 
			
		||||
import spack.packages as packages
 | 
			
		||||
from spack.stage import Stage
 | 
			
		||||
from spack.colify import colify
 | 
			
		||||
from spack.util.crypto import md5
 | 
			
		||||
from spack.version import *
 | 
			
		||||
 | 
			
		||||
group='foo'
 | 
			
		||||
description ="Checksum available versions of a package, print out checksums for addition to a package file."
 | 
			
		||||
 | 
			
		||||
def setup_parser(subparser):
 | 
			
		||||
    subparser.add_argument('package', metavar='PACKAGE', help='Package to list versions for')
 | 
			
		||||
    subparser.add_argument('versions', nargs=argparse.REMAINDER, help='Versions to generate checksums for')
 | 
			
		||||
    subparser.add_argument('-n', '--number', dest='number', type=int,
 | 
			
		||||
                           default=10, help='Number of versions to list')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def checksum(parser, args):
 | 
			
		||||
    # get the package we're going to generate checksums for
 | 
			
		||||
    pkg = packages.get(args.package)
 | 
			
		||||
 | 
			
		||||
    # If the user asked for specific versions, use those.
 | 
			
		||||
    # Otherwise get the latest n, where n is from the -n/--number param
 | 
			
		||||
    versions = [ver(v) for v in args.versions]
 | 
			
		||||
 | 
			
		||||
    if not all(type(v) == Version for v in versions):
 | 
			
		||||
        tty.die("Cannot generate checksums for version lists or " +
 | 
			
		||||
                "version ranges.  Use unambiguous versions.")
 | 
			
		||||
 | 
			
		||||
    if not versions:
 | 
			
		||||
        versions = pkg.fetch_available_versions()[:args.number]
 | 
			
		||||
        if not versions:
 | 
			
		||||
            tty.die("Could not fetch any available versions for %s."
 | 
			
		||||
                    % pkg.name)
 | 
			
		||||
 | 
			
		||||
    versions.sort()
 | 
			
		||||
    versions.reverse()
 | 
			
		||||
    urls = [pkg.url_for_version(v) for v in versions]
 | 
			
		||||
 | 
			
		||||
    tty.msg("Found %s versions to checksum." % len(urls))
 | 
			
		||||
    tty.msg("Downloading...")
 | 
			
		||||
 | 
			
		||||
    hashes = []
 | 
			
		||||
    for url, version in zip(urls, versions):
 | 
			
		||||
        stage = Stage("checksum-%s-%s" % (pkg.name, version), url)
 | 
			
		||||
        try:
 | 
			
		||||
            stage.fetch()
 | 
			
		||||
            hashes.append(md5(stage.archive_file))
 | 
			
		||||
        finally:
 | 
			
		||||
            stage.destroy()
 | 
			
		||||
 | 
			
		||||
    dict_string = ["{"]
 | 
			
		||||
    for i, (v, h) in enumerate(zip(versions, hashes)):
 | 
			
		||||
        comma = "" if i == len(hashes) - 1 else ","
 | 
			
		||||
        dict_string.append("    '%s' : '%s'%s" % (str(v), str(h), comma))
 | 
			
		||||
    dict_string.append("}")
 | 
			
		||||
    tty.msg("Checksummed new versions of %s:" % pkg.name, *dict_string)
 | 
			
		||||
@@ -2,8 +2,10 @@
 | 
			
		||||
import spack.cmd
 | 
			
		||||
 | 
			
		||||
import spack.tty as tty
 | 
			
		||||
import spack.url as url
 | 
			
		||||
import spack
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
description = "parse specs and print them out to the command line."
 | 
			
		||||
 | 
			
		||||
def setup_parser(subparser):
 | 
			
		||||
@@ -13,7 +15,11 @@ def spec(parser, args):
 | 
			
		||||
    specs = spack.cmd.parse_specs(args.specs)
 | 
			
		||||
    for spec in specs:
 | 
			
		||||
        spec.normalize()
 | 
			
		||||
        print spec.tree()
 | 
			
		||||
        print spec.tree(color=True)
 | 
			
		||||
 | 
			
		||||
        spec.concretize()
 | 
			
		||||
        print spec.tree()
 | 
			
		||||
        print spec.tree(color=True)
 | 
			
		||||
 | 
			
		||||
        pkg = spec.package
 | 
			
		||||
        wc = url.wildcard_version(pkg.url)
 | 
			
		||||
        print wc
 | 
			
		||||
 
 | 
			
		||||
@@ -2,12 +2,8 @@
 | 
			
		||||
import re
 | 
			
		||||
from subprocess import CalledProcessError
 | 
			
		||||
 | 
			
		||||
import spack
 | 
			
		||||
import spack.packages as packages
 | 
			
		||||
import spack.url as url
 | 
			
		||||
import spack.tty as tty
 | 
			
		||||
from spack.colify import colify
 | 
			
		||||
from spack.version import ver
 | 
			
		||||
 | 
			
		||||
description ="List available versions of a package"
 | 
			
		||||
 | 
			
		||||
@@ -17,4 +13,4 @@ def setup_parser(subparser):
 | 
			
		||||
 | 
			
		||||
def versions(parser, args):
 | 
			
		||||
    pkg = packages.get(args.package)
 | 
			
		||||
    colify(reversed(pkg.available_versions))
 | 
			
		||||
    colify(reversed(pkg.fetch_available_versions()))
 | 
			
		||||
 
 | 
			
		||||
@@ -29,6 +29,8 @@
 | 
			
		||||
from multi_function import platform
 | 
			
		||||
from stage import Stage
 | 
			
		||||
from spack.util.lang import memoized, list_modules
 | 
			
		||||
from spack.util.crypto import md5
 | 
			
		||||
from spack.util.web import get_pages
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Package(object):
 | 
			
		||||
@@ -251,6 +253,9 @@ class SomePackage(Package):
 | 
			
		||||
    """By default a package has no dependencies."""
 | 
			
		||||
    dependencies = {}
 | 
			
		||||
 | 
			
		||||
    """List of specs of virtual packages provided by this package."""
 | 
			
		||||
    provided_virtual_packages = {}
 | 
			
		||||
 | 
			
		||||
    #
 | 
			
		||||
    # These are default values for instance variables.
 | 
			
		||||
    #
 | 
			
		||||
@@ -310,6 +315,9 @@ def __init__(self, spec):
 | 
			
		||||
        if not hasattr(self, 'list_url'):
 | 
			
		||||
            self.list_url = os.path.dirname(self.url)
 | 
			
		||||
 | 
			
		||||
        if not hasattr(self, 'list_depth'):
 | 
			
		||||
            self.list_depth = 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def add_commands_to_module(self):
 | 
			
		||||
        """Populate the module scope of install() with some useful functions.
 | 
			
		||||
@@ -464,6 +472,11 @@ def url_version(self, version):
 | 
			
		||||
        return str(version)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def url_for_version(self, version):
 | 
			
		||||
        """Gives a URL that you can download a new version of this package from."""
 | 
			
		||||
        return url.substitute_version(self.url, self.url_version(version))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def remove_prefix(self):
 | 
			
		||||
        """Removes the prefix for a package along with any empty parent directories."""
 | 
			
		||||
        if self.dirty:
 | 
			
		||||
@@ -640,37 +653,42 @@ def do_clean_dist(self):
 | 
			
		||||
        tty.msg("Successfully cleaned %s" % self.name)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def fetch_available_versions(self):
 | 
			
		||||
        # If not, then try to fetch using list_url
 | 
			
		||||
        if not self._available_versions:
 | 
			
		||||
            self._available_versions = VersionList()
 | 
			
		||||
            url_regex = os.path.basename(url.wildcard_version(self.url))
 | 
			
		||||
            wildcard = self.version.wildcard()
 | 
			
		||||
 | 
			
		||||
            page_map = get_pages(self.list_url, depth=self.list_depth)
 | 
			
		||||
            for site, page in page_map.iteritems():
 | 
			
		||||
                strings = re.findall(url_regex, page)
 | 
			
		||||
 | 
			
		||||
                for s in strings:
 | 
			
		||||
                    match = re.search(wildcard, s)
 | 
			
		||||
                    if match:
 | 
			
		||||
                        v = match.group(0)
 | 
			
		||||
                        self._available_versions.add(Version(v))
 | 
			
		||||
 | 
			
		||||
            if not self._available_versions:
 | 
			
		||||
                tty.warn("Found no versions for %s" % self.name,
 | 
			
		||||
                         "Check the list_url and list_depth attribute on the "
 | 
			
		||||
                         + self.name + " package.",
 | 
			
		||||
                         "Use them to tell Spack where to look for versions.")
 | 
			
		||||
 | 
			
		||||
        return self._available_versions
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def available_versions(self):
 | 
			
		||||
        # If the package overrode available_versions, then use that.
 | 
			
		||||
        if self.versions is not None:
 | 
			
		||||
            return self.versions
 | 
			
		||||
 | 
			
		||||
        # If not, then try to fetch using list_url
 | 
			
		||||
        if not self._available_versions:
 | 
			
		||||
            self._available_versions = ver([self.version])
 | 
			
		||||
            try:
 | 
			
		||||
                # Run curl but grab the mime type from the http headers
 | 
			
		||||
                listing = spack.curl('-s', '-L', self.list_url, return_output=True)
 | 
			
		||||
                url_regex = os.path.basename(url.wildcard_version(self.url))
 | 
			
		||||
                strings = re.findall(url_regex, listing)
 | 
			
		||||
                wildcard = self.version.wildcard()
 | 
			
		||||
                for s in strings:
 | 
			
		||||
                    match = re.search(wildcard, s)
 | 
			
		||||
                    if match:
 | 
			
		||||
                        self._available_versions.add(Version(match.group(0)))
 | 
			
		||||
 | 
			
		||||
                if not self._available_versions:
 | 
			
		||||
                    tty.warn("Found no versions for %s" % self.name,
 | 
			
		||||
                             "Packate.available_versions may require adding the list_url attribute",
 | 
			
		||||
                             "to the package to tell Spack where to look for versions.")
 | 
			
		||||
 | 
			
		||||
            except subprocess.CalledProcessError:
 | 
			
		||||
                tty.warn("Could not connect to %s" % self.list_url,
 | 
			
		||||
                         "Package.available_versions requires an internet connection.",
 | 
			
		||||
                         "Version list may be incomplete.")
 | 
			
		||||
 | 
			
		||||
        return self._available_versions
 | 
			
		||||
        else:
 | 
			
		||||
            vlist = self.fetch_available_versions()
 | 
			
		||||
            if not vlist:
 | 
			
		||||
                vlist = ver([self.version])
 | 
			
		||||
            return vlist
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MakeExecutable(Executable):
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,7 @@
 | 
			
		||||
invalid_package_re = r'[_-][_-]+'
 | 
			
		||||
 | 
			
		||||
instances = {}
 | 
			
		||||
providers = {}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get(pkg_name):
 | 
			
		||||
@@ -29,6 +30,24 @@ def get(pkg_name):
 | 
			
		||||
    return instances[pkg_name]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_providers(vpkg_name):
 | 
			
		||||
    if not providers:
 | 
			
		||||
        compute_providers()
 | 
			
		||||
 | 
			
		||||
    if not vpkg_name in providers:
 | 
			
		||||
        raise UnknownPackageError("No such virtual package: %s" % vpkg_name)
 | 
			
		||||
 | 
			
		||||
    return providers[vpkg_name]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def compute_providers():
 | 
			
		||||
    for pkg in all_packages():
 | 
			
		||||
        for vpkg in pkg.provided_virtual_packages:
 | 
			
		||||
            if vpkg not in providers:
 | 
			
		||||
                providers[vpkg] = []
 | 
			
		||||
            providers[vpkg].append(pkg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def valid_package_name(pkg_name):
 | 
			
		||||
    return (re.match(valid_package_re, pkg_name) and
 | 
			
		||||
            not re.search(invalid_package_re, pkg_name))
 | 
			
		||||
@@ -75,6 +94,11 @@ def class_name_for_package_name(pkg_name):
 | 
			
		||||
    return class_name
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def exists(pkg_name):
 | 
			
		||||
    """Whether a package is concrete."""
 | 
			
		||||
    return os.path.exists(filename_for_package_name(pkg_name))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_class_for_package_name(pkg_name):
 | 
			
		||||
    file_name = filename_for_package_name(pkg_name)
 | 
			
		||||
 | 
			
		||||
@@ -149,7 +173,6 @@ def quote(string):
 | 
			
		||||
    out.write('}\n')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class InvalidPackageNameError(spack.error.SpackError):
 | 
			
		||||
    """Raised when we encounter a bad package name."""
 | 
			
		||||
    def __init__(self, name):
 | 
			
		||||
 
 | 
			
		||||
@@ -4,6 +4,7 @@ class Dyninst(Package):
 | 
			
		||||
    homepage = "https://paradyn.org"
 | 
			
		||||
    url      = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
 | 
			
		||||
    md5      = "bf03b33375afa66fe0efa46ce3f4b17a"
 | 
			
		||||
    list_url = "http://www.dyninst.org/downloads/dyninst-8.x"
 | 
			
		||||
 | 
			
		||||
    depends_on("libelf")
 | 
			
		||||
    depends_on("libdwarf")
 | 
			
		||||
 
 | 
			
		||||
@@ -45,16 +45,28 @@ class Mpileaks(Package):
 | 
			
		||||
        spack install mpileaks ^mpich
 | 
			
		||||
"""
 | 
			
		||||
import sys
 | 
			
		||||
import inspect
 | 
			
		||||
import spack.spec
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _caller_locals():
 | 
			
		||||
    """This will return the locals of the *parent* of the caller.
 | 
			
		||||
       This allows a fucntion to insert variables into its caller's
 | 
			
		||||
       scope.
 | 
			
		||||
    """
 | 
			
		||||
    stack = inspect.stack()
 | 
			
		||||
    try:
 | 
			
		||||
        return stack[2][0].f_locals
 | 
			
		||||
    finally:
 | 
			
		||||
        del stack
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def depends_on(*specs):
 | 
			
		||||
    """Adds a dependencies local variable in the locals of
 | 
			
		||||
       the calling class, based on args.
 | 
			
		||||
    """
 | 
			
		||||
    # Get the enclosing package's scope and add deps to it.
 | 
			
		||||
    locals = sys._getframe(1).f_locals
 | 
			
		||||
    dependencies = locals.setdefault("dependencies", {})
 | 
			
		||||
    dependencies = _caller_locals().setdefault("dependencies", {})
 | 
			
		||||
    for string in specs:
 | 
			
		||||
        for spec in spack.spec.parse(string):
 | 
			
		||||
            dependencies[spec.name] = spec
 | 
			
		||||
@@ -66,7 +78,6 @@ def provides(*args):
 | 
			
		||||
       can use the providing package to satisfy the dependency.
 | 
			
		||||
    """
 | 
			
		||||
    # Get the enclosing package's scope and add deps to it.
 | 
			
		||||
    locals = sys._getframe(1).f_locals
 | 
			
		||||
    provides = locals.setdefault("provides", [])
 | 
			
		||||
    provides = _caller_locals().setdefault("provides", [])
 | 
			
		||||
    for name in args:
 | 
			
		||||
        provides.append(name)
 | 
			
		||||
 
 | 
			
		||||
@@ -321,9 +321,15 @@ def package(self):
 | 
			
		||||
        return packages.get(self.name)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def virtual(self):
 | 
			
		||||
        return packages.exists(self.name)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def concrete(self):
 | 
			
		||||
        return bool(self.versions.concrete
 | 
			
		||||
        return bool(not self.virtual
 | 
			
		||||
                    and self.versions.concrete
 | 
			
		||||
                    # TODO: support variants
 | 
			
		||||
                    and self.architecture
 | 
			
		||||
                    and self.compiler and self.compiler.concrete
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,9 @@ class Callpath(Package):
 | 
			
		||||
    url      = "http://github.com/tgamblin/callpath-0.2.tar.gz"
 | 
			
		||||
    md5      = "foobarbaz"
 | 
			
		||||
 | 
			
		||||
    versions = [0.8, 0.9, 1.0]
 | 
			
		||||
    versions = { 0.8 : 'bf03b33375afa66fe0efa46ce3f4b17a',
 | 
			
		||||
                 0.9 : 'bf03b33375afa66fe0efa46ce3f4b17a',
 | 
			
		||||
                 1.0 : 'bf03b33375afa66fe0efa46ce3f4b17a' }
 | 
			
		||||
 | 
			
		||||
    depends_on("dyninst")
 | 
			
		||||
    depends_on("mpich")
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,11 @@ class Dyninst(Package):
 | 
			
		||||
    url      = "http://www.dyninst.org/sites/default/files/downloads/dyninst/8.1.2/DyninstAPI-8.1.2.tgz"
 | 
			
		||||
    md5      = "bf03b33375afa66fe0efa46ce3f4b17a"
 | 
			
		||||
 | 
			
		||||
    versions = '7.0, 7.0.1, 8.0, 8.1.1, 8.1.2'
 | 
			
		||||
    list_url = "http://www.dyninst.org/downloads/dyninst-8.x"
 | 
			
		||||
 | 
			
		||||
    versions = {
 | 
			
		||||
        '8.1.2' : 'bf03b33375afa66fe0efa46ce3f4b17a',
 | 
			
		||||
        '8.1.1' : '1f8743e3a5662b25ce64a7edf647e77d' }
 | 
			
		||||
 | 
			
		||||
    depends_on("libelf")
 | 
			
		||||
    depends_on("libdwarf")
 | 
			
		||||
 
 | 
			
		||||
@@ -11,6 +11,8 @@ class Libdwarf(Package):
 | 
			
		||||
 | 
			
		||||
    md5      = "64b42692e947d5180e162e46c689dfbf"
 | 
			
		||||
 | 
			
		||||
    versions = [20070703, 20111030, 20130207]
 | 
			
		||||
 | 
			
		||||
    depends_on("libelf")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,10 @@ class Libelf(Package):
 | 
			
		||||
    url      = "http://www.mr511.de/software/libelf-0.8.13.tar.gz"
 | 
			
		||||
    md5      = "4136d7b4c04df68b686570afa26988ac"
 | 
			
		||||
 | 
			
		||||
    versions = '0.8.10, 0.8.12, 0.8.13'
 | 
			
		||||
    versions =   {
 | 
			
		||||
        '0.8.13' : '4136d7b4c04df68b686570afa26988ac',
 | 
			
		||||
        '0.8.12' : 'e21f8273d9f5f6d43a59878dc274fec7',
 | 
			
		||||
        '0.8.10' : '9db4d36c283d9790d8fa7df1f4d7b4d9' }
 | 
			
		||||
 | 
			
		||||
    def install(self, prefix):
 | 
			
		||||
        configure("--prefix=%s" % prefix,
 | 
			
		||||
 
 | 
			
		||||
@@ -3,6 +3,9 @@
 | 
			
		||||
class Mpich(Package):
 | 
			
		||||
    homepage = "http://www.mpich.org"
 | 
			
		||||
    url      = "http://www.mpich.org/static/downloads/3.0.4/mpich-3.0.4.tar.gz"
 | 
			
		||||
 | 
			
		||||
    list_url   = "http://www.mpich.org/static/downloads/"
 | 
			
		||||
    list_depth = 2
 | 
			
		||||
    md5      = "9c5d5d4fe1e17dd12153f40bc5b6dbc0"
 | 
			
		||||
 | 
			
		||||
    versions = '1.0.3, 1.3.2p1, 1.4.1p1, 3.0.4, 3.1b1'
 | 
			
		||||
 
 | 
			
		||||
@@ -5,7 +5,10 @@ class Mpileaks(Package):
 | 
			
		||||
    url      = "http://www.llnl.gov/mpileaks-1.0.tar.gz"
 | 
			
		||||
    md5      = "foobarbaz"
 | 
			
		||||
 | 
			
		||||
    versions = [1.0, 2.1, 2.2, 2.3]
 | 
			
		||||
    versions = { 1.0 : None,
 | 
			
		||||
                 2.1 : None,
 | 
			
		||||
                 2.2 : None,
 | 
			
		||||
                 2.3 : None }
 | 
			
		||||
 | 
			
		||||
    depends_on("mpich")
 | 
			
		||||
    depends_on("callpath")
 | 
			
		||||
 
 | 
			
		||||
@@ -176,6 +176,8 @@ def wildcard_version(path):
 | 
			
		||||
       that will match this path with any version in its place.
 | 
			
		||||
    """
 | 
			
		||||
    ver, start, end = parse_version_string_with_indices(path)
 | 
			
		||||
    v = Version(ver)
 | 
			
		||||
 | 
			
		||||
    return re.escape(path[:start]) + v.wildcard() + re.escape(path[end:])
 | 
			
		||||
    v = Version(ver)
 | 
			
		||||
    parts = list(re.escape(p) for p in path.split(str(v)))
 | 
			
		||||
 | 
			
		||||
    return  v.wildcard().join(parts)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										13
									
								
								lib/spack/spack/util/crypto.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								lib/spack/spack/util/crypto.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,13 @@
 | 
			
		||||
import hashlib
 | 
			
		||||
from contextlib import closing
 | 
			
		||||
 | 
			
		||||
def md5(filename, block_size=2**20):
 | 
			
		||||
    """Computes the md5 hash of a file."""
 | 
			
		||||
    md5 = hashlib.md5()
 | 
			
		||||
    with closing(open(filename)) as file:
 | 
			
		||||
        while True:
 | 
			
		||||
            data = file.read(block_size)
 | 
			
		||||
            if not data:
 | 
			
		||||
                break
 | 
			
		||||
            md5.update(data)
 | 
			
		||||
        return md5.hexdigest()
 | 
			
		||||
@@ -56,16 +56,3 @@ def stem(path):
 | 
			
		||||
        if re.search(suffix, path):
 | 
			
		||||
            return re.sub(suffix, "", path)
 | 
			
		||||
    return path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def md5(filename, block_size=2**20):
 | 
			
		||||
    """Computes the md5 hash of a file."""
 | 
			
		||||
    import hashlib
 | 
			
		||||
    md5 = hashlib.md5()
 | 
			
		||||
    with closing(open(filename)) as file:
 | 
			
		||||
        while True:
 | 
			
		||||
            data = file.read(block_size)
 | 
			
		||||
            if not data:
 | 
			
		||||
                break
 | 
			
		||||
            md5.update(data)
 | 
			
		||||
        return md5.hexdigest()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										113
									
								
								lib/spack/spack/util/web.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								lib/spack/spack/util/web.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,113 @@
 | 
			
		||||
import re
 | 
			
		||||
import subprocess
 | 
			
		||||
import urllib2
 | 
			
		||||
import urlparse
 | 
			
		||||
from multiprocessing import Pool
 | 
			
		||||
from HTMLParser import HTMLParser
 | 
			
		||||
 | 
			
		||||
import spack
 | 
			
		||||
import spack.tty as tty
 | 
			
		||||
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
 | 
			
		||||
 | 
			
		||||
# Timeout in seconds for web requests
 | 
			
		||||
TIMEOUT = 10
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LinkParser(HTMLParser):
 | 
			
		||||
    """This parser just takes an HTML page and strips out the hrefs on the
 | 
			
		||||
       links.  Good enough for a really simple spider. """
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        HTMLParser.__init__(self)
 | 
			
		||||
        self.links = []
 | 
			
		||||
 | 
			
		||||
    def handle_starttag(self, tag, attrs):
 | 
			
		||||
        if tag == 'a':
 | 
			
		||||
            for attr, val in attrs:
 | 
			
		||||
                if attr == 'href':
 | 
			
		||||
                    self.links.append(val)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _spider(args):
 | 
			
		||||
    """_spider(url, depth, max_depth)
 | 
			
		||||
 | 
			
		||||
       Fetches URL and any pages it links to up to max_depth.  depth should
 | 
			
		||||
       initially be 1, and max_depth includes the root.  This function will
 | 
			
		||||
       print out a warning only if the root can't be fetched; it ignores
 | 
			
		||||
       errors with pages that the root links to.
 | 
			
		||||
 | 
			
		||||
       This will return a list of the pages fetched, in no particular order.
 | 
			
		||||
 | 
			
		||||
       Takes args as a tuple b/c it's intended to be used by a multiprocessing
 | 
			
		||||
       pool.  Firing off all the child links at once makes the fetch MUCH
 | 
			
		||||
       faster for pages with lots of children.
 | 
			
		||||
    """
 | 
			
		||||
    url, depth, max_depth = args
 | 
			
		||||
 | 
			
		||||
    pages = {}
 | 
			
		||||
    try:
 | 
			
		||||
        # Make a HEAD request first to check the content type.  This lets
 | 
			
		||||
        # us ignore tarballs and gigantic files.
 | 
			
		||||
        # It would be nice to do this with the HTTP Accept header to avoid
 | 
			
		||||
        # one round-trip.  However, most servers seem to ignore the header
 | 
			
		||||
        # if you ask for a tarball with Accept: text/html.
 | 
			
		||||
        req = urllib2.Request(url)
 | 
			
		||||
        req.get_method = lambda: "HEAD"
 | 
			
		||||
        resp = urllib2.urlopen(req, timeout=TIMEOUT)
 | 
			
		||||
 | 
			
		||||
        if not resp.headers["Content-type"].startswith('text/html'):
 | 
			
		||||
            print "ignoring page " + url + " with content type " + resp.headers["Content-type"]
 | 
			
		||||
            return pages
 | 
			
		||||
 | 
			
		||||
        # Do the real GET request when we know it's just HTML.
 | 
			
		||||
        req.get_method = lambda: "GET"
 | 
			
		||||
        response = urllib2.urlopen(req, timeout=TIMEOUT)
 | 
			
		||||
        response_url = response.geturl()
 | 
			
		||||
 | 
			
		||||
        # Read the page and and stick it in the map we'll return
 | 
			
		||||
        page = response.read()
 | 
			
		||||
        pages[response_url] = page
 | 
			
		||||
 | 
			
		||||
        # If we're not at max depth, parse out the links in the page
 | 
			
		||||
        if depth < max_depth:
 | 
			
		||||
            link_parser = LinkParser()
 | 
			
		||||
 | 
			
		||||
            subcalls = []
 | 
			
		||||
            link_parser.feed(page)
 | 
			
		||||
            while link_parser.links:
 | 
			
		||||
                raw_link = link_parser.links.pop()
 | 
			
		||||
 | 
			
		||||
                # Skip stuff that looks like an archive
 | 
			
		||||
                if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES):
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                # Evaluate the link relative to the page it came from.
 | 
			
		||||
                abs_link = urlparse.urljoin(response_url, raw_link)
 | 
			
		||||
                subcalls.append((abs_link, depth+1, max_depth))
 | 
			
		||||
 | 
			
		||||
            if subcalls:
 | 
			
		||||
                pool = Pool(processes=len(subcalls))
 | 
			
		||||
                dicts = pool.map(_spider, subcalls)
 | 
			
		||||
                for d in dicts:
 | 
			
		||||
                    pages.update(d)
 | 
			
		||||
 | 
			
		||||
    except urllib2.HTTPError, e:
 | 
			
		||||
        # Only report it if it's the root page.  We ignore errors when spidering.
 | 
			
		||||
        if depth == 1:
 | 
			
		||||
            tty.warn("Could not connect to %s" % url, e.reason,
 | 
			
		||||
                     "Package.available_versions requires an internet connection.",
 | 
			
		||||
                     "Version list may be incomplete.")
 | 
			
		||||
 | 
			
		||||
    return pages
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_pages(root_url, **kwargs):
 | 
			
		||||
    """Gets web pages from a root URL.
 | 
			
		||||
       If depth is specified (e.g., depth=2), then this will also fetches pages
 | 
			
		||||
       linked from the root and its children up to depth.
 | 
			
		||||
 | 
			
		||||
       This will spawn processes to fetch the children, for much improved
 | 
			
		||||
       performance over a sequential fetch.
 | 
			
		||||
    """
 | 
			
		||||
    max_depth = kwargs.setdefault('depth', 1)
 | 
			
		||||
    pages =  _spider((root_url, 1, max_depth))
 | 
			
		||||
    return pages
 | 
			
		||||
		Reference in New Issue
	
	Block a user