add spack.util.url.git_url_parse()
method and tests
Add a method to parse git URLs (but not arbitrary file paths). This method parses git URLs into scheme, username, hostname, port, and path. It will raise an error if its argument is not proper git URL. - [x] add `spack.util.url.git_url_parse` method - [x] add 31 tests for `git_url_parse`
This commit is contained in:
@@ -7,6 +7,8 @@
|
||||
import os
|
||||
import os.path
|
||||
|
||||
import pytest
|
||||
|
||||
import spack.paths
|
||||
import spack.util.url as url_util
|
||||
|
||||
@@ -303,3 +305,73 @@ def test_url_join_absolute_paths():
|
||||
|
||||
assert(url_util.join(*args, resolve_href=False) ==
|
||||
'http://example.com/path/resource')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("url,parts", [
|
||||
("ssh://user@host.xz:500/path/to/repo.git/",
|
||||
("ssh", "user", "host.xz", 500, "/path/to/repo.git")),
|
||||
("ssh://user@host.xz/path/to/repo.git/",
|
||||
("ssh", "user", "host.xz", None, "/path/to/repo.git")),
|
||||
("ssh://host.xz:500/path/to/repo.git/",
|
||||
("ssh", None, "host.xz", 500, "/path/to/repo.git")),
|
||||
("ssh://host.xz/path/to/repo.git/",
|
||||
("ssh", None, "host.xz", None, "/path/to/repo.git")),
|
||||
("ssh://user@host.xz/path/to/repo.git/",
|
||||
("ssh", "user", "host.xz", None, "/path/to/repo.git")),
|
||||
("ssh://host.xz/path/to/repo.git/",
|
||||
("ssh", None, "host.xz", None, "/path/to/repo.git")),
|
||||
("ssh://user@host.xz/~user/path/to/repo.git/",
|
||||
("ssh", "user", "host.xz", None, "~user/path/to/repo.git")),
|
||||
("ssh://host.xz/~user/path/to/repo.git/",
|
||||
("ssh", None, "host.xz", None, "~user/path/to/repo.git")),
|
||||
("ssh://user@host.xz/~/path/to/repo.git",
|
||||
("ssh", "user", "host.xz", None, "~/path/to/repo.git")),
|
||||
("ssh://host.xz/~/path/to/repo.git",
|
||||
("ssh", None, "host.xz", None, "~/path/to/repo.git")),
|
||||
("git@github.com:spack/spack.git",
|
||||
(None, "git", "github.com", None, "spack/spack.git")),
|
||||
("user@host.xz:/path/to/repo.git/",
|
||||
(None, "user", "host.xz", None, "/path/to/repo.git")),
|
||||
("host.xz:/path/to/repo.git/",
|
||||
(None, None, "host.xz", None, "/path/to/repo.git")),
|
||||
("user@host.xz:~user/path/to/repo.git/",
|
||||
(None, "user", "host.xz", None, "~user/path/to/repo.git")),
|
||||
("host.xz:~user/path/to/repo.git/",
|
||||
(None, None, "host.xz", None, "~user/path/to/repo.git")),
|
||||
("user@host.xz:path/to/repo.git",
|
||||
(None, "user", "host.xz", None, "path/to/repo.git")),
|
||||
("host.xz:path/to/repo.git",
|
||||
(None, None, "host.xz", None, "path/to/repo.git")),
|
||||
("rsync://host.xz/path/to/repo.git/",
|
||||
("rsync", None, "host.xz", None, "/path/to/repo.git")),
|
||||
("git://host.xz/path/to/repo.git/",
|
||||
("git", None, "host.xz", None, "/path/to/repo.git")),
|
||||
("git://host.xz/~user/path/to/repo.git/",
|
||||
("git", None, "host.xz", None, "~user/path/to/repo.git")),
|
||||
("http://host.xz/path/to/repo.git/",
|
||||
("http", None, "host.xz", None, "/path/to/repo.git")),
|
||||
("https://host.xz/path/to/repo.git/",
|
||||
("https", None, "host.xz", None, "/path/to/repo.git")),
|
||||
("https://github.com/spack/spack",
|
||||
("https", None, "github.com", None, "/spack/spack")),
|
||||
("https://github.com/spack/spack/",
|
||||
("https", None, "github.com", None, "/spack/spack")),
|
||||
("file:///path/to/repo.git/",
|
||||
("file", None, None, None, "/path/to/repo.git")),
|
||||
("file://~/path/to/repo.git/",
|
||||
("file", None, None, None, "~/path/to/repo.git")),
|
||||
# bad ports should give us None
|
||||
("ssh://host.xz:port/path/to/repo.git/", None),
|
||||
# bad ports should give us None
|
||||
("ssh://host-foo.xz:port/path/to/repo.git/", None),
|
||||
# regular file paths should give us None
|
||||
("/path/to/repo.git/", None),
|
||||
("path/to/repo.git/", None),
|
||||
("~/path/to/repo.git", None),
|
||||
])
|
||||
def test_git_url_parse(url, parts):
|
||||
if parts is None:
|
||||
with pytest.raises(ValueError):
|
||||
url_util.parse_git_url(url)
|
||||
else:
|
||||
assert parts == url_util.parse_git_url(url)
|
||||
|
@@ -248,3 +248,56 @@ def _join(base_url, path, *extra, **kwargs):
|
||||
params=params,
|
||||
query=query,
|
||||
fragment=None))
|
||||
|
||||
|
||||
git_re = (
|
||||
r"^(?:([a-z]+)://)?" # 1. optional scheme
|
||||
r"(?:([^@]+)@)?" # 2. optional user
|
||||
r"([^:/~]+)?" # 3. optional hostname
|
||||
r"(?(1)(?::([^:/]+))?|:)" # 4. :<optional port> if scheme else :
|
||||
r"(.*[^/])/?$" # 5. path
|
||||
)
|
||||
|
||||
|
||||
def parse_git_url(url):
|
||||
"""Parse git URL into components.
|
||||
|
||||
This parses URLs that look like:
|
||||
|
||||
* ``https://host.com:443/path/to/repo.git``, or
|
||||
* ``git@host.com:path/to/repo.git
|
||||
|
||||
Anything not matching those patterns is likely a local
|
||||
file or invalid.
|
||||
|
||||
Returned components are as follows (optional values can be ``None``):
|
||||
|
||||
1. ``scheme`` (optional): git, ssh, http, https
|
||||
2. ``user`` (optional): ``git@`` for github, username for http or ssh
|
||||
3. ``hostname``: domain of server
|
||||
4. ``port`` (optional): port on server
|
||||
5. ``path``: path on the server, e.g. spack/spack
|
||||
|
||||
Returns:
|
||||
(tuple): tuple containing URL components as above
|
||||
|
||||
Raises ``ValueError`` for invalid URLs.
|
||||
"""
|
||||
match = re.match(git_re, url)
|
||||
if not match:
|
||||
raise ValueError("bad git URL: %s" % url)
|
||||
|
||||
# initial parse
|
||||
scheme, user, hostname, port, path = match.groups()
|
||||
|
||||
# special handling for ~ paths (they're never absolute)
|
||||
if path.startswith("/~"):
|
||||
path = path[1:]
|
||||
|
||||
if port is not None:
|
||||
try:
|
||||
port = int(port)
|
||||
except ValueError:
|
||||
raise ValueError("bad port in git url: %s" % url)
|
||||
|
||||
return (scheme, user, hostname, port, path)
|
||||
|
Reference in New Issue
Block a user