Sanitize filepath from URL (#30625)

Spack's staging logic constructs a file path based on a URL. The URL
may contain characters which are not allowed in valid file paths on
the system (e.g. Windows prohibits ':' and '?' among others). This
commit adds a function to remove such offending characters (but
otherwise preserves the URL string when constructing a file path).
This commit is contained in:
John W. Parent 2022-06-04 18:06:43 -04:00 committed by GitHub
parent 03c54aebdd
commit 13b0e73a4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 114 additions and 66 deletions

View File

@ -363,12 +363,13 @@ def __exit__(self, exc_type, exc_val, exc_tb):
def expected_archive_files(self):
"""Possible archive file paths."""
paths = []
fnames = []
expanded = True
if isinstance(self.default_fetcher, fs.URLFetchStrategy):
expanded = self.default_fetcher.expand_archive
fnames.append(os.path.basename(self.default_fetcher.url))
clean_url = os.path.basename(
sup.sanitize_file_path(self.default_fetcher.url))
fnames.append(clean_url)
if self.mirror_paths:
fnames.extend(os.path.basename(x) for x in self.mirror_paths)

View File

@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
import sys
import pytest
@ -12,11 +13,8 @@
import spack.config
import spack.util.path as sup
# This module pertains to path string padding manipulation specifically
# which is used for binary caching. This functionality is not supported
# on Windows as of yet.
pytestmark = pytest.mark.skipif(sys.platform == 'win32',
reason="Tests fail on Windows")
is_windows = sys.platform == 'win32'
#: Some lines with lots of placeholders
padded_lines = [
@ -34,33 +32,47 @@
]
def test_sanitze_file_path(tmpdir):
"""Test filtering illegal characters out of potential file paths"""
# *nix illegal files characters are '/' and none others
illegal_file_path = str(tmpdir) + '//' + 'abcdefghi.txt'
if is_windows:
# Windows has a larger set of illegal characters
illegal_file_path = os.path.join(tmpdir, 'a<b>cd?e:f"g|h*i.txt')
real_path = sup.sanitize_file_path(illegal_file_path)
assert real_path == os.path.join(str(tmpdir), 'abcdefghi.txt')
# This class pertains to path string padding manipulation specifically
# which is used for binary caching. This functionality is not supported
# on Windows as of yet.
@pytest.mark.skipif(is_windows,
reason='Padding funtionality unsupported on Windows')
class TestPathPadding():
@pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines))
def test_padding_substitution(padded, fixed):
def test_padding_substitution(self, padded, fixed):
"""Ensure that all padded lines are unpadded correctly."""
assert fixed == sup.padding_filter(padded)
def test_no_substitution():
"""Ensure that a line not containing one full path placeholder is not modified."""
def test_no_substitution(self):
"""Ensure that a line not containing one full path placeholder
is not modified."""
partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
assert sup.padding_filter(partial) == partial
def test_short_substitution():
def test_short_substitution(self):
"""Ensure that a single placeholder path component is replaced"""
short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
assert short_subst == sup.padding_filter(short)
def test_partial_substitution():
def test_partial_substitution(self):
"""Ensure that a single placeholder path component is replaced"""
short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
assert short_subst == sup.padding_filter(short)
def test_longest_prefix_re():
def test_longest_prefix_re(self):
"""Test that longest_prefix_re generates correct regular expressions."""
assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
"string", capture=True
@ -69,8 +81,7 @@ def test_longest_prefix_re():
"string", capture=False
)
def test_output_filtering(capfd, install_mockery, mutable_config):
def test_output_filtering(self, capfd, install_mockery, mutable_config):
"""Test filtering padding out of tty messages."""
long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200)
padding_string = "[padded-to-%d-chars]" % len(long_path)

View File

@ -87,6 +87,42 @@ def path_to_os_path(*pths):
return ret_pths
def sanitize_file_path(pth):
"""
Formats strings to contain only characters that can
be used to generate legal file paths.
Criteria for legal files based on
https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations
Args:
pth: string containing path to be created
on the host filesystem
Return:
sanitized string that can legally be made into a path
"""
# on unix, splitting path by seperators will remove
# instances of illegal characters on join
pth_cmpnts = pth.split(os.path.sep)
if is_windows:
drive_match = r'[a-zA-Z]:'
is_abs = bool(re.match(drive_match, pth_cmpnts[0]))
drive = pth_cmpnts[0] + os.path.sep if is_abs else ''
pth_cmpnts = pth_cmpnts[1:] if drive else pth_cmpnts
illegal_chars = r'[<>?:"|*\\]'
else:
drive = '/' if not pth_cmpnts[0] else ''
illegal_chars = r'[/]'
pth = []
for cmp in pth_cmpnts:
san_cmp = re.sub(illegal_chars, '', cmp)
pth.append(san_cmp)
return drive + os.path.join(*pth)
def system_path_filter(_func=None, arg_slice=None):
"""
Filters function arguments to account for platform path separators.