Update URL parsing regexes and tests (#13411)

* Update URL parsing regexes and tests

* Get rid of no longer used README

* Merge py-udunits and py-cf-units

* netcdf -> netcdf-c

* setup_environment -> setup_*_environment

* Fix doc tests

* Few last minute fixes

* Simplify prefix removal copypasta
This commit is contained in:
Adam J. Stewart
2019-10-28 20:27:54 -05:00
committed by GitHub
parent 4367e16740
commit 2264e30d99
240 changed files with 773 additions and 805 deletions

View File

@@ -97,7 +97,7 @@ Check Installation
With Spack installed, you should be able to run some basic Spack
commands. For example:
.. command-output:: spack spec netcdf
.. command-output:: spack spec netcdf-c
^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@@ -434,23 +434,10 @@ def name_parsed_correctly(pkg, name):
Returns:
bool: True if the name was correctly parsed, else False
"""
pkg_name = pkg.name
pkg_name = remove_prefix(pkg.name)
name = simplify_name(name)
# After determining a name, `spack create` determines a build system.
# Some build systems prepend a special string to the front of the name.
# Since this can't be guessed from the URL, it would be unfair to say
# that these names are incorrectly parsed, so we remove them.
if pkg_name.startswith('r-'):
pkg_name = pkg_name[2:]
elif pkg_name.startswith('py-'):
pkg_name = pkg_name[3:]
elif pkg_name.startswith('perl-'):
pkg_name = pkg_name[5:]
elif pkg_name.startswith('octave-'):
pkg_name = pkg_name[7:]
return name == pkg_name
@@ -475,8 +462,32 @@ def version_parsed_correctly(pkg, version):
return False
def remove_prefix(pkg_name):
"""Remove build system prefix ('py-', 'perl-', etc.) from a package name.
After determining a name, `spack create` determines a build system.
Some build systems prepend a special string to the front of the name.
Since this can't be guessed from the URL, it would be unfair to say
that these names are incorrectly parsed, so we remove them.
Args:
pkg_name (str): the name of the package
Returns:
str: the name of the package with any build system prefix removed
"""
prefixes = [
'r-', 'py-', 'tcl-', 'lua-', 'perl-', 'ruby-', 'llvm-',
'intel-', 'votca-', 'octave-', 'gtkorvo-'
]
prefix = next((p for p in prefixes if pkg_name.startswith(p)), '')
return pkg_name[len(prefix):]
def remove_separators(version):
"""Removes separator characters ('.', '_', and '-') from a version.
"""Remove separator characters ('.', '_', and '-') from a version.
A version like 1.2.3 may be displayed as 1_2_3 in the URL.
Make sure 1.2.3, 1-2-3, 1_2_3, and 123 are considered equal.

View File

@@ -60,6 +60,8 @@
('cppad-20170114.gpl', 'cppad-20170114'),
# Arch
('pcraster-4.1.0_x86-64', 'pcraster-4.1.0'),
('dislin-11.0.linux.i586_64', 'dislin-11.0'),
('PAGIT.V1.01.64bit', 'PAGIT.V1.01'),
# OS - linux
('astyle_2.04_linux', 'astyle_2.04'),
# OS - unix
@@ -85,20 +87,31 @@
# Combinations of multiple patterns - darwin
('ghc-7.0.4-x86_64-apple-darwin', 'ghc-7.0.4'),
('ghc-7.0.4-i386-apple-darwin', 'ghc-7.0.4'),
# Combinations of multiple patterns - centos
('sratoolkit.2.8.2-1-centos_linux64', 'sratoolkit.2.8.2-1'),
# Combinations of multiple patterns - arch
('VizGlow_v2.2alpha17-R21November2016-Linux-x86_64-Install',
'VizGlow_v2.2alpha17-R21November2016'),
('jdk-8u92-linux-x64', 'jdk-8u92'),
('cuda_6.5.14_linux_64.run', 'cuda_6.5.14'),
('Mathematica_12.0.0_LINUX.sh', 'Mathematica_12.0.0'),
('trf407b.linux64', 'trf407b'),
# Combinations of multiple patterns - with
('mafft-7.221-with-extensions-src', 'mafft-7.221'),
('spark-2.0.0-bin-without-hadoop', 'spark-2.0.0'),
('conduit-v0.3.0-src-with-blt', 'conduit-v0.3.0'),
# Combinations of multiple patterns - rock
('bitlib-23-2.src.rock', 'bitlib-23-2'),
# Combinations of multiple patterns - public
('dakota-6.3-public.src', 'dakota-6.3'),
# Combinations of multiple patterns - universal
('synergy-1.3.6p2-MacOSX-Universal', 'synergy-1.3.6p2'),
# Combinations of multiple patterns - dynamic
('snptest_v2.5.2_linux_x86_64_dynamic', 'snptest_v2.5.2'),
# Combinations of multiple patterns - other
('alglib-3.11.0.cpp.gpl', 'alglib-3.11.0'),
('hpcviewer-2019.08-linux.gtk.x86_64', 'hpcviewer-2019.08'),
('apache-mxnet-src-1.3.0-incubating', 'apache-mxnet-src-1.3.0'),
])
def test_url_strip_version_suffixes(url, expected):
stripped = strip_version_suffixes(url)
@@ -109,24 +122,40 @@ def test_url_strip_version_suffixes(url, expected):
# No suffix
('rgb-1.0.6', '1.0.6', 'rgb'),
('nauty26r7', '26r7', 'nauty'),
('PAGIT.V1.01', '1.01', 'PAGIT'),
('AmpliconNoiseV1.29', '1.29', 'AmpliconNoise'),
# Download type - install
('converge_install_2.3.16', '2.3.16', 'converge'),
# Download type - src
('jpegsrc.v9b', '9b', 'jpeg'),
('blatSrc35', '35', 'blat'),
# Download type - open
('RepeatMasker-open-4-0-7', '4-0-7', 'RepeatMasker'),
# Download type - archive
('coinhsl-archive-2014.01.17', '2014.01.17', 'coinhsl'),
# Download type - std
('ghostscript-fonts-std-8.11', '8.11', 'ghostscript-fonts'),
# Download type - bin
('GapCloser-bin-v1.12-r6', '1.12-r6', 'GapCloser'),
# Download type - software
('orthomclSoftware-v2.0.9', '2.0.9', 'orthomcl'),
# Download version - release
('cbench_release_1.3.0.tar.gz', '1.3.0', 'cbench'),
# Download version - snapshot
('gts-snapshot-121130', '121130', 'gts'),
# Download version - distrib
('zoltan_distrib_v3.83', '3.83', 'zoltan'),
# Download version - latest
('Platypus-latest', 'N/A', 'Platypus'),
# Download version - complex
('qt-everywhere-opensource-src-5.7.0', '5.7.0', 'qt'),
# Arch
('VESTA-x86_64', '3.4.6', 'VESTA'),
# VCS - bazaar
('libvterm-0+bzr681', '681', 'libvterm'),
# License - gpl
('PyQt-x11-gpl-4.11.3', '4.11.3', 'PyQt-x11')
('PyQt-x11-gpl-4.11.3', '4.11.3', 'PyQt'),
('PyQt4_gpl_x11-4.12.3', '4.12.3', 'PyQt4'),
])
def test_url_strip_name_suffixes(url, version, expected):
stripped = strip_name_suffixes(url, version)
@@ -182,6 +211,7 @@ def test_url_parse_offset(name, noffset, ver, voffset, path):
@pytest.mark.parametrize('name,version,url', [
# Common Repositories - github downloads
# name/archive/ver.ver
('nco', '4.6.2', 'https://github.com/nco/nco/archive/4.6.2.tar.gz'),
# name/archive/vver.ver
('vim', '8.0.0134', 'https://github.com/vim/vim/archive/v8.0.0134.tar.gz'),
@@ -257,6 +287,15 @@ def test_url_parse_offset(name, noffset, ver, voffset, path):
# Common Tarball Formats
# 1st Pass: Simplest case
# Assume name contains no digits and version contains no letters
# name-ver.ver
('libpng', '1.6.37', 'http://download.sourceforge.net/libpng/libpng-1.6.37.tar.gz'),
# 2nd Pass: Version only
# Assume version contains no letters
# ver.ver
('eigen', '3.2.7', 'https://bitbucket.org/eigen/eigen/get/3.2.7.tar.bz2'),
# ver.ver-ver
@@ -266,10 +305,17 @@ def test_url_parse_offset(name, noffset, ver, voffset, path):
# vver_ver
('luafilesystem', '1_6_3', 'https://github.com/keplerproject/luafilesystem/archive/v1_6_3.tar.gz'),
# No separators
# 3rd Pass: No separator characters are used
# Assume name contains no digits
# namever
('turbolinux', '702', 'file://{0}/turbolinux702.tar.gz'.format(os.getcwd())),
('nauty', '26r7', 'http://pallini.di.uniroma1.it/nauty26r7.tar.gz'),
# Dashes only
# 4th Pass: A single separator character is used
# Assume name contains no digits
# name-name-ver-ver
('Trilinos', '12-10-1',
'https://github.com/trilinos/Trilinos/archive/trilinos-release-12-10-1.tar.gz'),
('panda', '2016-03-07',
@@ -278,7 +324,7 @@ def test_url_parse_offset(name, noffset, ver, voffset, path):
'http://gts.sourceforge.net/tarballs/gts-snapshot-121130.tar.gz'),
('cdd', '061a',
'http://www.cs.mcgill.ca/~fukuda/download/cdd/cdd-061a.tar.gz'),
# Only underscores
# name_name_ver_ver
('tinyxml', '2_6_2',
'https://sourceforge.net/projects/tinyxml/files/tinyxml/2.6.2/tinyxml_2_6_2.tar.gz'),
('boost', '1_55_0',
@@ -287,9 +333,6 @@ def test_url_parse_offset(name, noffset, ver, voffset, path):
'https://github.com/dhmunro/yorick/archive/y_2_2_04.tar.gz'),
('tbb', '44_20160413',
'https://www.threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb44_20160413oss_src.tgz'),
# Only dots
# name.name.ver.ver
('prank', '150803', 'http://wasabiapp.org/download/prank/prank.source.150803.tgz'),
('jpeg', '9b', 'http://www.ijg.org/files/jpegsrc.v9b.tar.gz'),
@@ -302,61 +345,51 @@ def test_url_parse_offset(name, noffset, ver, voffset, path):
('geant', '4.10.01.p03', 'http://geant4.cern.ch/support/source/geant4.10.01.p03.tar.gz'),
('tcl', '8.6.5', 'http://prdownloads.sourceforge.net/tcl/tcl8.6.5-src.tar.gz'),
# Dash and dots
# 5th Pass: Two separator characters are used
# Name may contain digits, version may contain letters
# name-name-ver.ver
# digit in name
('m4', '1.4.17', 'https://ftp.gnu.org/gnu/m4/m4-1.4.17.tar.gz'),
# letter in version
('gmp', '6.0.0a', 'https://gmplib.org/download/gmp/gmp-6.0.0a.tar.bz2'),
# version starts with 'v'
('LaunchMON', '1.0.2',
'https://github.com/LLNL/LaunchMON/releases/download/v1.0.2/launchmon-v1.0.2.tar.gz'),
# name-ver-ver.ver
('libedit', '20150325-3.1', 'http://thrysoee.dk/editline/libedit-20150325-3.1.tar.gz'),
# Dash and unserscores
# name-name-ver_ver
('icu4c', '57_1', 'http://download.icu-project.org/files/icu4c/57.1/icu4c-57_1-src.tgz'),
# Underscores and dots
# name_name_ver.ver
('superlu_dist', '4.1', 'http://crd-legacy.lbl.gov/~xiaoye/SuperLU/superlu_dist_4.1.tar.gz'),
('pexsi', '0.9.0', 'https://math.berkeley.edu/~linlin/pexsi/download/pexsi_v0.9.0.tar.gz'),
# name_name.ver.ver
('fer', '696', 'ftp://ftp.pmel.noaa.gov/ferret/pub/source/fer_source.v696.tar.gz'),
# Dash dot dah dot
# name_name_ver-ver
('Bridger', '2014-12-01',
'https://downloads.sourceforge.net/project/rnaseqassembly/Bridger_r2014-12-01.tar.gz'),
# name-name-ver.ver-ver.ver
('sowing', '1.1.23-p1', 'http://ftp.mcs.anl.gov/pub/petsc/externalpackages/sowing-1.1.23-p1.tar.gz'),
('bib2xhtml', '3.0-15-gf506', 'http://www.spinellis.gr/sw/textproc/bib2xhtml/bib2xhtml-v3.0-15-gf506.tar.gz'),
# namever.ver-ver.ver
('go', '1.4-bootstrap-20161024', 'https://storage.googleapis.com/golang/go1.4-bootstrap-20161024.tar.gz'),
# Underscore dash dot
# 6th Pass: All three separator characters are used
# Name may contain digits, version may contain letters
# name_name-ver.ver
('the_silver_searcher', '0.32.0', 'http://geoff.greer.fm/ag/releases/the_silver_searcher-0.32.0.tar.gz'),
('sphinx_rtd_theme', '0.1.10a0',
'https://pypi.python.org/packages/source/s/sphinx_rtd_theme/sphinx_rtd_theme-0.1.10a0.tar.gz'),
# Dot underscore dot dash dot
# name.name_ver.ver-ver.ver
('TH.data', '1.0-8', 'https://cran.r-project.org/src/contrib/TH.data_1.0-8.tar.gz'),
('XML', '3.98-1.4', 'https://cran.r-project.org/src/contrib/XML_3.98-1.4.tar.gz'),
# Dash dot underscore dot
# name-name-ver.ver_ver.ver
('pypar', '2.1.5_108',
'https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/pypar/pypar-2.1.5_108.tgz'),
# name-namever.ver_ver.ver
('STAR-CCM+', '11.06.010_02',
'file://{0}/STAR-CCM+11.06.010_02_linux-x86_64.tar.gz'.format(os.getcwd())),
# name-name_name-ver.ver
('PerlIO-utf8_strict', '0.002',
'http://search.cpan.org/CPAN/authors/id/L/LE/LEONT/PerlIO-utf8_strict-0.002.tar.gz'),
# Various extensions
# .tar.gz
@@ -399,18 +432,61 @@ def test_url_parse_offset(name, noffset, ver, voffset, path):
# .txz
('kim-api', '2.1.0', 'https://s3.openkim.org/kim-api/kim-api-2.1.0.txz'),
# Weird URLS
# 8th Pass: Query strings
# github.com/repo/name/releases/download/name-vver/name
('nextflow', '0.20.1', 'https://github.com/nextflow-io/nextflow/releases/download/v0.20.1/nextflow'),
# suffix queries
('swiftsim', '0.3.0', 'http://gitlab.cosma.dur.ac.uk/swift/swiftsim/repository/archive.tar.gz?ref=v0.3.0'),
('swiftsim', '0.3.0', 'https://gitlab.cosma.dur.ac.uk/api/v4/projects/swift%2Fswiftsim/repository/archive.tar.gz?sha=v0.3.0'),
('swiftsim', '0.3.0',
'https://gitlab.cosma.dur.ac.uk/api/v4/projects/swift%2Fswiftsim/repository/archive.tar.gz?sha=v0.3.0'),
('sionlib', '1.7.1', 'http://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.1'),
('jube2', '2.2.2', 'https://apps.fz-juelich.de/jsc/jube/jube2/download.php?version=2.2.2'),
('archive', '1.0.0', 'https://code.ornl.gov/eck/papyrus/repository/archive.tar.bz2?ref=v1.0.0'),
('VecGeom', '0.3.rc',
'https://gitlab.cern.ch/api/v4/projects/VecGeom%2FVecGeom/repository/archive.tar.gz?sha=v0.3.rc'),
('parsplice', '1.1',
'https://gitlab.com/api/v4/projects/exaalt%2Fparsplice/repository/archive.tar.gz?sha=v1.1'),
('busco', '2.0.1', 'https://gitlab.com/api/v4/projects/ezlab%2Fbusco/repository/archive.tar.gz?sha=2.0.1'),
('libaec', '1.0.2',
'https://gitlab.dkrz.de/api/v4/projects/k202009%2Flibaec/repository/archive.tar.gz?sha=v1.0.2'),
('icet', '2.1.1',
'https://gitlab.kitware.com/api/v4/projects/icet%2Ficet/repository/archive.tar.bz2?sha=IceT-2.1.1'),
('vtk-m', '1.3.0',
'https://gitlab.kitware.com/api/v4/projects/vtk%2Fvtk-m/repository/archive.tar.gz?sha=v1.3.0'),
('GATK', '3.8-1-0-gf15c1c3ef',
'https://software.broadinstitute.org/gatk/download/auth?package=GATK-archive&version=3.8-1-0-gf15c1c3ef'),
# stem queries
('slepc', '3.6.2', 'http://slepc.upv.es/download/download.php?filename=slepc-3.6.2.tar.gz'),
('otf', '1.12.5salmon',
'http://wwwpub.zih.tu-dresden.de/%7Emlieber/dcount/dcount.php?package=otf&get=OTF-1.12.5salmon.tar.gz'),
('eospac', '6.4.0beta.1',
'http://laws-green.lanl.gov/projects/data/eos/get_file.php?package=eospac&filename=eospac_v6.4.0beta.1_r20171213193219.tgz'),
('vampirtrace', '5.14.4',
'http://wwwpub.zih.tu-dresden.de/~mlieber/dcount/dcount.php?package=vampirtrace&get=VampirTrace-5.14.4.tar.gz'),
# (we don't actually look for these, they are picked up
# during the preliminary stem parsing)
('octopus', '6.0', 'http://octopus-code.org/down.php?file=6.0/octopus-6.0.tar.gz'),
('cloog', '0.18.1', 'http://www.bastoul.net/cloog/pages/download/count.php3?url=./cloog-0.18.1.tar.gz'),
('libxc', '2.2.2', 'http://www.tddft.org/programs/octopus/down.php?file=libxc/libxc-2.2.2.tar.gz'),
('cistem', '1.0.0-beta',
'https://cistem.org/system/tdf/upload3/cistem-1.0.0-beta-source-code.tar.gz?file=1&type=cistem_details&id=37&force=0'),
('Magics', '4.1.0',
'https://confluence.ecmwf.int/download/attachments/3473464/Magics-4.1.0-Source.tar.gz?api=v2'),
('grib_api', '1.17.0',
'https://software.ecmwf.int/wiki/download/attachments/3473437/grib_api-1.17.0-Source.tar.gz?api=v2'),
('eccodes', '2.2.0',
'https://software.ecmwf.int/wiki/download/attachments/45757960/eccodes-2.2.0-Source.tar.gz?api=v2'),
('SWFFT', '1.0',
'https://xgitlab.cels.anl.gov/api/v4/projects/hacc%2FSWFFT/repository/archive.tar.gz?sha=v1.0'),
# 9th Pass: Version in path
# github.com/repo/name/releases/download/name-vver/name
('nextflow', '0.20.1', 'https://github.com/nextflow-io/nextflow/releases/download/v0.20.1/nextflow'),
# ver/name
('ncbi', '2.2.26', 'ftp://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/ncbi.tar.gz'),
# Other tests for corner cases
# single character name
('R', '3.3.2', 'https://cloud.r-project.org/src/base/R-3/R-3.3.2.tar.gz'),
# name starts with digit

View File

@@ -153,13 +153,14 @@ def strip_version_suffixes(path):
r'[Ii]nstall',
r'all',
r'code',
r'src(_0)?',
r'[Ss]ources?',
r'file',
r'full',
r'single',
r'public',
r'with[a-zA-Z_-]+',
r'rock',
r'src(_0)?',
r'public',
r'bin',
r'binary',
r'run',
@@ -189,15 +190,24 @@ def strip_version_suffixes(path):
r'ia32',
r'intel',
r'amd64',
r'linux64',
r'x64',
r'64bit',
r'x86[_-]64',
r'i586_64',
r'x86',
r'i[36]86',
r'ppc64(le)?',
r'armv?(7l|6l|64)',
# Other
r'cpp',
r'gtk',
r'incubating',
# OS
r'[Ll]inux(_64)?',
r'LINUX',
r'[Uu]ni?x',
r'[Ss]un[Oo][Ss]',
r'[Mm]ac[Oo][Ss][Xx]?',
@@ -208,14 +218,18 @@ def strip_version_suffixes(path):
r'[Ww]in(64|32)?',
r'[Cc]ygwin(64|32)?',
r'[Mm]ingw',
r'centos',
# Arch
# Needs to come before and after OS, appears in both orders
r'ia32',
r'intel',
r'amd64',
r'linux64',
r'x64',
r'64bit',
r'x86[_-]64',
r'i586_64',
r'x86',
r'i[36]86',
r'ppc64(le)?',
@@ -270,31 +284,41 @@ def strip_name_suffixes(path, version):
# name-ver
# name_ver
# name.ver
r'[._-]v?' + str(version) + '.*',
r'[._-][rvV]?' + str(version) + '.*',
# namever
str(version) + '.*',
r'V?' + str(version) + '.*',
# Download type
r'install',
r'src',
r'[Ss]rc',
r'(open)?[Ss]ources?',
r'[._-]open',
r'[._-]archive',
r'[._-]std',
r'[._-]bin',
r'Software',
# Download version
r'release',
r'snapshot',
r'distrib',
r'everywhere',
r'latest',
# Arch
r'Linux64',
r'Linux(64)?',
r'x86_64',
# VCS
r'0\+bzr',
# License
r'gpl',
# Needs to come before and after gpl, appears in both orders
r'[._-]x11',
r'gpl',
]
for regex in suffix_regexes:
@@ -407,7 +431,7 @@ def parse_version_offset(path):
# 3. names can contain A-Z, a-z, 0-9, '+', separators
# 4. versions can contain A-Z, a-z, 0-9, separators
# 5. versions always start with a digit
# 6. versions are often prefixed by a 'v' character
# 6. versions are often prefixed by a 'v' or 'r' character
# 7. separators are most reliable to determine name/version boundaries
# List of the following format:
@@ -450,7 +474,7 @@ def parse_version_offset(path):
(r'^[a-zA-Z+-]*(\d[\da-zA-Z-]*)$', stem),
# name_name_ver_ver
# e.g. tinyxml_2_6_2, boost_1_55_0, tbb2017_20161128, v1_6_3
# e.g. tinyxml_2_6_2, boost_1_55_0, tbb2017_20161128
(r'^[a-zA-Z+_]*(\d[\da-zA-Z_]*)$', stem),
# name.name.ver.ver
@@ -476,6 +500,10 @@ def parse_version_offset(path):
# e.g. fer_source.v696
(r'^[a-zA-Z\d+_]+\.v?(\d[\da-zA-Z.]*)$', stem),
# name_ver-ver
# e.g. Bridger_r2014-12-01
(r'^[a-zA-Z\d+]+_r?(\d[\da-zA-Z-]*)$', stem),
# name-name-ver.ver-ver.ver
# e.g. sowing-1.1.23-p1, bib2xhtml-v3.0-15-gf506, 4.6.3-alpha04
(r'^(?:[a-zA-Z\d+-]+-)?v?(\d[\da-zA-Z.-]*)$', stem),
@@ -507,19 +535,17 @@ def parse_version_offset(path):
# e.g. STAR-CCM+11.06.010_02
(r'^[a-zA-Z+-]+(\d[\da-zA-Z._]*)$', stem),
# name-name_name-ver.ver
# e.g. PerlIO-utf8_strict-0.002
(r'^[a-zA-Z\d+_-]+-v?(\d[\da-zA-Z.]*)$', stem),
# 7th Pass: Specific VCS
# bazaar
# e.g. libvterm-0+bzr681
(r'bzr(\d[\da-zA-Z._-]*)$', stem),
# 8th Pass: Version in path
# github.com/repo/name/releases/download/vver/name
# e.g. https://github.com/nextflow-io/nextflow/releases/download/v0.20.1/nextflow
(r'github\.com/[^/]+/[^/]+/releases/download/[a-zA-Z+._-]*v?(\d[\da-zA-Z._-]*)/', path), # noqa
# 9th Pass: Query strings
# 8th Pass: Query strings
# e.g. https://gitlab.cosma.dur.ac.uk/api/v4/projects/swift%2Fswiftsim/repository/archive.tar.gz?sha=v0.3.0
(r'\?sha=[a-zA-Z+._-]*v?(\d[\da-zA-Z._-]*)$', suffix),
@@ -528,13 +554,24 @@ def parse_version_offset(path):
(r'\?ref=[a-zA-Z+._-]*v?(\d[\da-zA-Z._-]*)$', suffix),
# e.g. http://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.1
(r'\?version=v?(\d[\da-zA-Z._-]*)$', suffix),
# e.g. https://software.broadinstitute.org/gatk/download/auth?package=GATK-archive&version=3.8-1-0-gf15c1c3ef
(r'[?&]version=v?(\d[\da-zA-Z._-]*)$', suffix),
# e.g. http://slepc.upv.es/download/download.php?filename=slepc-3.6.2.tar.gz
(r'\?filename=[a-zA-Z\d+-]+-v?(\d[\da-zA-Z.]*)$', stem),
# e.g. http://laws-green.lanl.gov/projects/data/eos/get_file.php?package=eospac&filename=eospac_v6.4.0beta.1_r20171213193219.tgz
(r'[?&]filename=[a-zA-Z\d+-]+[_-]v?(\d[\da-zA-Z.]*)', stem),
# e.g. http://wwwpub.zih.tu-dresden.de/%7Emlieber/dcount/dcount.php?package=otf&get=OTF-1.12.5salmon.tar.gz
(r'\?package=[a-zA-Z\d+-]+&get=[a-zA-Z\d+-]+-v?(\d[\da-zA-Z.]*)$', stem), # noqa
(r'&get=[a-zA-Z\d+-]+-v?(\d[\da-zA-Z.]*)$', stem), # noqa
# 9th Pass: Version in path
# github.com/repo/name/releases/download/vver/name
# e.g. https://github.com/nextflow-io/nextflow/releases/download/v0.20.1/nextflow
(r'github\.com/[^/]+/[^/]+/releases/download/[a-zA-Z+._-]*v?(\d[\da-zA-Z._-]*)/', path), # noqa
# e.g. ftp://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/ncbi.tar.gz
(r'(\d[\da-zA-Z._-]*)/[^/]+$', path),
]
for i, version_regex in enumerate(version_regexes):
@@ -662,6 +699,9 @@ def parse_name_offset(path, v=None):
# e.g. http://wwwpub.zih.tu-dresden.de/%7Emlieber/dcount/dcount.php?package=otf&get=OTF-1.12.5salmon.tar.gz
(r'\?package=([A-Za-z\d+-]+)', stem),
# ?package=name-version
(r'\?package=([A-Za-z\d]+)', suffix),
# download.php
# e.g. http://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.1
(r'([^/]+)/download.php$', path),