Compare commits

...

1 Commits

Author SHA1 Message Date
Todd Gamblin
0d092d671f add spack.util.url.git_url_parse() method and tests
Add a method to parse git URLs (but not arbitrary file paths). This method parses git
URLs into scheme, username, hostname, port, and path. It will raise an error if its
argument is not proper git URL.

- [x] add `spack.util.url.git_url_parse` method
- [x] add 31 tests for `git_url_parse`
2021-09-13 21:46:52 -07:00
2 changed files with 125 additions and 0 deletions

View File

@@ -7,6 +7,8 @@
import os
import os.path
import pytest
import spack.paths
import spack.util.url as url_util
@@ -303,3 +305,73 @@ def test_url_join_absolute_paths():
assert(url_util.join(*args, resolve_href=False) ==
'http://example.com/path/resource')
@pytest.mark.parametrize("url,parts", [
("ssh://user@host.xz:500/path/to/repo.git/",
("ssh", "user", "host.xz", 500, "/path/to/repo.git")),
("ssh://user@host.xz/path/to/repo.git/",
("ssh", "user", "host.xz", None, "/path/to/repo.git")),
("ssh://host.xz:500/path/to/repo.git/",
("ssh", None, "host.xz", 500, "/path/to/repo.git")),
("ssh://host.xz/path/to/repo.git/",
("ssh", None, "host.xz", None, "/path/to/repo.git")),
("ssh://user@host.xz/path/to/repo.git/",
("ssh", "user", "host.xz", None, "/path/to/repo.git")),
("ssh://host.xz/path/to/repo.git/",
("ssh", None, "host.xz", None, "/path/to/repo.git")),
("ssh://user@host.xz/~user/path/to/repo.git/",
("ssh", "user", "host.xz", None, "~user/path/to/repo.git")),
("ssh://host.xz/~user/path/to/repo.git/",
("ssh", None, "host.xz", None, "~user/path/to/repo.git")),
("ssh://user@host.xz/~/path/to/repo.git",
("ssh", "user", "host.xz", None, "~/path/to/repo.git")),
("ssh://host.xz/~/path/to/repo.git",
("ssh", None, "host.xz", None, "~/path/to/repo.git")),
("git@github.com:spack/spack.git",
(None, "git", "github.com", None, "spack/spack.git")),
("user@host.xz:/path/to/repo.git/",
(None, "user", "host.xz", None, "/path/to/repo.git")),
("host.xz:/path/to/repo.git/",
(None, None, "host.xz", None, "/path/to/repo.git")),
("user@host.xz:~user/path/to/repo.git/",
(None, "user", "host.xz", None, "~user/path/to/repo.git")),
("host.xz:~user/path/to/repo.git/",
(None, None, "host.xz", None, "~user/path/to/repo.git")),
("user@host.xz:path/to/repo.git",
(None, "user", "host.xz", None, "path/to/repo.git")),
("host.xz:path/to/repo.git",
(None, None, "host.xz", None, "path/to/repo.git")),
("rsync://host.xz/path/to/repo.git/",
("rsync", None, "host.xz", None, "/path/to/repo.git")),
("git://host.xz/path/to/repo.git/",
("git", None, "host.xz", None, "/path/to/repo.git")),
("git://host.xz/~user/path/to/repo.git/",
("git", None, "host.xz", None, "~user/path/to/repo.git")),
("http://host.xz/path/to/repo.git/",
("http", None, "host.xz", None, "/path/to/repo.git")),
("https://host.xz/path/to/repo.git/",
("https", None, "host.xz", None, "/path/to/repo.git")),
("https://github.com/spack/spack",
("https", None, "github.com", None, "/spack/spack")),
("https://github.com/spack/spack/",
("https", None, "github.com", None, "/spack/spack")),
("file:///path/to/repo.git/",
("file", None, None, None, "/path/to/repo.git")),
("file://~/path/to/repo.git/",
("file", None, None, None, "~/path/to/repo.git")),
# bad ports should give us None
("ssh://host.xz:port/path/to/repo.git/", None),
# bad ports should give us None
("ssh://host-foo.xz:port/path/to/repo.git/", None),
# regular file paths should give us None
("/path/to/repo.git/", None),
("path/to/repo.git/", None),
("~/path/to/repo.git", None),
])
def test_git_url_parse(url, parts):
if parts is None:
with pytest.raises(ValueError):
url_util.parse_git_url(url)
else:
assert parts == url_util.parse_git_url(url)

View File

@@ -248,3 +248,56 @@ def _join(base_url, path, *extra, **kwargs):
params=params,
query=query,
fragment=None))
git_re = (
r"^(?:([a-z]+)://)?" # 1. optional scheme
r"(?:([^@]+)@)?" # 2. optional user
r"([^:/~]+)?" # 3. optional hostname
r"(?(1)(?::([^:/]+))?|:)" # 4. :<optional port> if scheme else :
r"(.*[^/])/?$" # 5. path
)
def parse_git_url(url):
"""Parse git URL into components.
This parses URLs that look like:
* ``https://host.com:443/path/to/repo.git``, or
* ``git@host.com:path/to/repo.git
Anything not matching those patterns is likely a local
file or invalid.
Returned components are as follows (optional values can be ``None``):
1. ``scheme`` (optional): git, ssh, http, https
2. ``user`` (optional): ``git@`` for github, username for http or ssh
3. ``hostname``: domain of server
4. ``port`` (optional): port on server
5. ``path``: path on the server, e.g. spack/spack
Returns:
(tuple): tuple containing URL components as above
Raises ``ValueError`` for invalid URLs.
"""
match = re.match(git_re, url)
if not match:
raise ValueError("bad git URL: %s" % url)
# initial parse
scheme, user, hostname, port, path = match.groups()
# special handling for ~ paths (they're never absolute)
if path.startswith("/~"):
path = path[1:]
if port is not None:
try:
port = int(port)
except ValueError:
raise ValueError("bad port in git url: %s" % url)
return (scheme, user, hostname, port, path)