Sanitize filepath from URL (#30625)
Spack's staging logic constructs a file path based on a URL. The URL may contain characters which are not allowed in valid file paths on the system (e.g. Windows prohibits ':' and '?' among others). This commit adds a function to remove such offending characters (but otherwise preserves the URL string when constructing a file path).
This commit is contained in:
parent
03c54aebdd
commit
13b0e73a4e
@ -363,12 +363,13 @@ def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
def expected_archive_files(self):
|
||||
"""Possible archive file paths."""
|
||||
paths = []
|
||||
|
||||
fnames = []
|
||||
expanded = True
|
||||
if isinstance(self.default_fetcher, fs.URLFetchStrategy):
|
||||
expanded = self.default_fetcher.expand_archive
|
||||
fnames.append(os.path.basename(self.default_fetcher.url))
|
||||
clean_url = os.path.basename(
|
||||
sup.sanitize_file_path(self.default_fetcher.url))
|
||||
fnames.append(clean_url)
|
||||
|
||||
if self.mirror_paths:
|
||||
fnames.extend(os.path.basename(x) for x in self.mirror_paths)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
@ -12,11 +13,8 @@
|
||||
import spack.config
|
||||
import spack.util.path as sup
|
||||
|
||||
# This module pertains to path string padding manipulation specifically
|
||||
# which is used for binary caching. This functionality is not supported
|
||||
# on Windows as of yet.
|
||||
pytestmark = pytest.mark.skipif(sys.platform == 'win32',
|
||||
reason="Tests fail on Windows")
|
||||
is_windows = sys.platform == 'win32'
|
||||
|
||||
|
||||
#: Some lines with lots of placeholders
|
||||
padded_lines = [
|
||||
@ -34,74 +32,87 @@
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines))
|
||||
def test_padding_substitution(padded, fixed):
|
||||
"""Ensure that all padded lines are unpadded correctly."""
|
||||
assert fixed == sup.padding_filter(padded)
|
||||
def test_sanitze_file_path(tmpdir):
|
||||
"""Test filtering illegal characters out of potential file paths"""
|
||||
# *nix illegal files characters are '/' and none others
|
||||
illegal_file_path = str(tmpdir) + '//' + 'abcdefghi.txt'
|
||||
if is_windows:
|
||||
# Windows has a larger set of illegal characters
|
||||
illegal_file_path = os.path.join(tmpdir, 'a<b>cd?e:f"g|h*i.txt')
|
||||
real_path = sup.sanitize_file_path(illegal_file_path)
|
||||
assert real_path == os.path.join(str(tmpdir), 'abcdefghi.txt')
|
||||
|
||||
|
||||
def test_no_substitution():
|
||||
"""Ensure that a line not containing one full path placeholder is not modified."""
|
||||
partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
assert sup.padding_filter(partial) == partial
|
||||
# This class pertains to path string padding manipulation specifically
|
||||
# which is used for binary caching. This functionality is not supported
|
||||
# on Windows as of yet.
|
||||
@pytest.mark.skipif(is_windows,
|
||||
reason='Padding funtionality unsupported on Windows')
|
||||
class TestPathPadding():
|
||||
@pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines))
|
||||
def test_padding_substitution(self, padded, fixed):
|
||||
"""Ensure that all padded lines are unpadded correctly."""
|
||||
assert fixed == sup.padding_filter(padded)
|
||||
|
||||
def test_no_substitution(self):
|
||||
"""Ensure that a line not containing one full path placeholder
|
||||
is not modified."""
|
||||
partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
assert sup.padding_filter(partial) == partial
|
||||
|
||||
def test_short_substitution():
|
||||
"""Ensure that a single placeholder path component is replaced"""
|
||||
short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
assert short_subst == sup.padding_filter(short)
|
||||
def test_short_substitution(self):
|
||||
"""Ensure that a single placeholder path component is replaced"""
|
||||
short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
assert short_subst == sup.padding_filter(short)
|
||||
|
||||
def test_partial_substitution(self):
|
||||
"""Ensure that a single placeholder path component is replaced"""
|
||||
short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
assert short_subst == sup.padding_filter(short)
|
||||
|
||||
def test_partial_substitution():
|
||||
"""Ensure that a single placeholder path component is replaced"""
|
||||
short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
|
||||
assert short_subst == sup.padding_filter(short)
|
||||
def test_longest_prefix_re(self):
|
||||
"""Test that longest_prefix_re generates correct regular expressions."""
|
||||
assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
|
||||
"string", capture=True
|
||||
)
|
||||
assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
|
||||
"string", capture=False
|
||||
)
|
||||
|
||||
def test_output_filtering(self, capfd, install_mockery, mutable_config):
|
||||
"""Test filtering padding out of tty messages."""
|
||||
long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200)
|
||||
padding_string = "[padded-to-%d-chars]" % len(long_path)
|
||||
|
||||
def test_longest_prefix_re():
|
||||
"""Test that longest_prefix_re generates correct regular expressions."""
|
||||
assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
|
||||
"string", capture=True
|
||||
)
|
||||
assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
|
||||
"string", capture=False
|
||||
)
|
||||
# test filtering when padding is enabled
|
||||
with spack.config.override('config:install_tree', {"padded_length": 256}):
|
||||
# tty.msg with filtering on the first argument
|
||||
with sup.filter_padding():
|
||||
tty.msg("here is a long path: %s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string in out
|
||||
|
||||
# tty.msg with filtering on a laterargument
|
||||
with sup.filter_padding():
|
||||
tty.msg("here is a long path:", "%s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string in out
|
||||
|
||||
def test_output_filtering(capfd, install_mockery, mutable_config):
|
||||
"""Test filtering padding out of tty messages."""
|
||||
long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200)
|
||||
padding_string = "[padded-to-%d-chars]" % len(long_path)
|
||||
# tty.error with filtering on the first argument
|
||||
with sup.filter_padding():
|
||||
tty.error("here is a long path: %s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string in err
|
||||
|
||||
# test filtering when padding is enabled
|
||||
with spack.config.override('config:install_tree', {"padded_length": 256}):
|
||||
# tty.msg with filtering on the first argument
|
||||
with sup.filter_padding():
|
||||
tty.msg("here is a long path: %s/with/a/suffix" % long_path)
|
||||
# tty.error with filtering on a later argument
|
||||
with sup.filter_padding():
|
||||
tty.error("here is a long path:", "%s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string in err
|
||||
|
||||
# test no filtering
|
||||
tty.msg("here is a long path: %s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string in out
|
||||
|
||||
# tty.msg with filtering on a laterargument
|
||||
with sup.filter_padding():
|
||||
tty.msg("here is a long path:", "%s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string in out
|
||||
|
||||
# tty.error with filtering on the first argument
|
||||
with sup.filter_padding():
|
||||
tty.error("here is a long path: %s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string in err
|
||||
|
||||
# tty.error with filtering on a later argument
|
||||
with sup.filter_padding():
|
||||
tty.error("here is a long path:", "%s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string in err
|
||||
|
||||
# test no filtering
|
||||
tty.msg("here is a long path: %s/with/a/suffix" % long_path)
|
||||
out, err = capfd.readouterr()
|
||||
assert padding_string not in out
|
||||
assert padding_string not in out
|
||||
|
@ -87,6 +87,42 @@ def path_to_os_path(*pths):
|
||||
return ret_pths
|
||||
|
||||
|
||||
def sanitize_file_path(pth):
|
||||
"""
|
||||
Formats strings to contain only characters that can
|
||||
be used to generate legal file paths.
|
||||
|
||||
Criteria for legal files based on
|
||||
https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations
|
||||
|
||||
Args:
|
||||
pth: string containing path to be created
|
||||
on the host filesystem
|
||||
|
||||
Return:
|
||||
sanitized string that can legally be made into a path
|
||||
"""
|
||||
# on unix, splitting path by seperators will remove
|
||||
# instances of illegal characters on join
|
||||
pth_cmpnts = pth.split(os.path.sep)
|
||||
|
||||
if is_windows:
|
||||
drive_match = r'[a-zA-Z]:'
|
||||
is_abs = bool(re.match(drive_match, pth_cmpnts[0]))
|
||||
drive = pth_cmpnts[0] + os.path.sep if is_abs else ''
|
||||
pth_cmpnts = pth_cmpnts[1:] if drive else pth_cmpnts
|
||||
illegal_chars = r'[<>?:"|*\\]'
|
||||
else:
|
||||
drive = '/' if not pth_cmpnts[0] else ''
|
||||
illegal_chars = r'[/]'
|
||||
|
||||
pth = []
|
||||
for cmp in pth_cmpnts:
|
||||
san_cmp = re.sub(illegal_chars, '', cmp)
|
||||
pth.append(san_cmp)
|
||||
return drive + os.path.join(*pth)
|
||||
|
||||
|
||||
def system_path_filter(_func=None, arg_slice=None):
|
||||
"""
|
||||
Filters function arguments to account for platform path separators.
|
||||
|
Loading…
Reference in New Issue
Block a user