util/web.py: parse new GitLab JS dropdown links (#45764)
Co-authored-by: Harmen Stoppels <harmenstoppels@gmail.com>
This commit is contained in:
parent
f0f9a16e4f
commit
553cc3b70a
10
lib/spack/spack/test/data/web/index_with_javascript.html
Normal file
10
lib/spack/spack/test/data/web/index_with_javascript.html
Normal file
@ -0,0 +1,10 @@
|
||||
<html>
|
||||
<head>
|
||||
This is the root page.
|
||||
</head>
|
||||
<body>
|
||||
This is a page with a Vue javascript drop down with links as used in GitLab.
|
||||
|
||||
<div class="js-source-code-dropdown" data-css-class="" data-download-artifacts="[]" data-download-links="[{"text":"tar.gz","path":"/foo-5.0.0.tar.gz"}]"></div>
|
||||
</body>
|
||||
</html>
|
@ -37,6 +37,7 @@ def _create_url(relative_url):
|
||||
page_4 = _create_url("4.html")
|
||||
|
||||
root_with_fragment = _create_url("index_with_fragment.html")
|
||||
root_with_javascript = _create_url("index_with_javascript.html")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -148,6 +149,11 @@ def test_find_versions_of_archive_with_fragment():
|
||||
assert Version("5.0.0") in versions
|
||||
|
||||
|
||||
def test_find_versions_of_archive_with_javascript():
|
||||
versions = spack.url.find_versions_of_archive(root_tarball, root_with_javascript, list_depth=0)
|
||||
assert Version("5.0.0") in versions
|
||||
|
||||
|
||||
def test_get_header():
|
||||
headers = {"Content-type": "text/plain"}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
import concurrent.futures
|
||||
import email.message
|
||||
import errno
|
||||
import json
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
@ -152,7 +153,8 @@ class HTMLParseError(Exception):
|
||||
|
||||
class LinkParser(HTMLParser):
|
||||
"""This parser just takes an HTML page and strips out the hrefs on the
|
||||
links. Good enough for a really simple spider."""
|
||||
links, as well as some javascript tags used on GitLab servers.
|
||||
Good enough for a really simple spider."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@ -160,9 +162,18 @@ def __init__(self):
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == "a":
|
||||
for attr, val in attrs:
|
||||
if attr == "href":
|
||||
self.links.append(val)
|
||||
self.links.extend(val for key, val in attrs if key == "href")
|
||||
|
||||
# GitLab uses a javascript function to place dropdown links:
|
||||
# <div class="js-source-code-dropdown" ...
|
||||
# data-download-links="[{"path":"/graphviz/graphviz/-/archive/12.0.0/graphviz-12.0.0.zip",...},...]"/>
|
||||
if tag == "div" and ("class", "js-source-code-dropdown") in attrs:
|
||||
try:
|
||||
links_str = next(val for key, val in attrs if key == "data-download-links")
|
||||
links = json.loads(links_str)
|
||||
self.links.extend(x["path"] for x in links)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class ExtractMetadataParser(HTMLParser):
|
||||
|
Loading…
Reference in New Issue
Block a user