Sanitize filepath from URL (#30625)
Spack's staging logic constructs a file path based on a URL. The URL may contain characters which are not allowed in valid file paths on the system (e.g. Windows prohibits ':' and '?' among others). This commit adds a function to remove such offending characters (but otherwise preserves the URL string when constructing a file path).
This commit is contained in:
		| @@ -363,12 +363,13 @@ def __exit__(self, exc_type, exc_val, exc_tb): | ||||
|     def expected_archive_files(self): | ||||
|         """Possible archive file paths.""" | ||||
|         paths = [] | ||||
| 
 | ||||
|         fnames = [] | ||||
|         expanded = True | ||||
|         if isinstance(self.default_fetcher, fs.URLFetchStrategy): | ||||
|             expanded = self.default_fetcher.expand_archive | ||||
|             fnames.append(os.path.basename(self.default_fetcher.url)) | ||||
|             clean_url = os.path.basename( | ||||
|                 sup.sanitize_file_path(self.default_fetcher.url)) | ||||
|             fnames.append(clean_url) | ||||
| 
 | ||||
|         if self.mirror_paths: | ||||
|             fnames.extend(os.path.basename(x) for x in self.mirror_paths) | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
| # | ||||
| # SPDX-License-Identifier: (Apache-2.0 OR MIT) | ||||
| 
 | ||||
| import os | ||||
| import sys | ||||
| 
 | ||||
| import pytest | ||||
| @@ -12,11 +13,8 @@ | ||||
| import spack.config | ||||
| import spack.util.path as sup | ||||
| 
 | ||||
| # This module pertains to path string padding manipulation specifically | ||||
| # which is used for binary caching. This functionality is not supported | ||||
| # on Windows as of yet. | ||||
| pytestmark = pytest.mark.skipif(sys.platform == 'win32', | ||||
|                                 reason="Tests fail on Windows") | ||||
| is_windows = sys.platform == 'win32' | ||||
| 
 | ||||
| 
 | ||||
| #: Some lines with lots of placeholders | ||||
| padded_lines = [ | ||||
| @@ -34,74 +32,87 @@ | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines)) | ||||
| def test_padding_substitution(padded, fixed): | ||||
|     """Ensure that all padded lines are unpadded correctly.""" | ||||
|     assert fixed == sup.padding_filter(padded) | ||||
| def test_sanitze_file_path(tmpdir): | ||||
|     """Test filtering illegal characters out of potential file paths""" | ||||
|     # *nix illegal files characters are '/' and none others | ||||
|     illegal_file_path = str(tmpdir) + '//' + 'abcdefghi.txt' | ||||
|     if is_windows: | ||||
|         # Windows has a larger set of illegal characters | ||||
|         illegal_file_path = os.path.join(tmpdir, 'a<b>cd?e:f"g|h*i.txt') | ||||
|     real_path = sup.sanitize_file_path(illegal_file_path) | ||||
|     assert real_path == os.path.join(str(tmpdir), 'abcdefghi.txt') | ||||
| 
 | ||||
| 
 | ||||
| def test_no_substitution(): | ||||
|     """Ensure that a line not containing one full path placeholder is not modified.""" | ||||
|     partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|     assert sup.padding_filter(partial) == partial | ||||
| # This class pertains to path string padding manipulation specifically | ||||
| # which is used for binary caching. This functionality is not supported | ||||
| # on Windows as of yet. | ||||
| @pytest.mark.skipif(is_windows, | ||||
|                     reason='Padding funtionality unsupported on Windows') | ||||
| class TestPathPadding(): | ||||
|     @pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines)) | ||||
|     def test_padding_substitution(self, padded, fixed): | ||||
|         """Ensure that all padded lines are unpadded correctly.""" | ||||
|         assert fixed == sup.padding_filter(padded) | ||||
| 
 | ||||
|     def test_no_substitution(self): | ||||
|         """Ensure that a line not containing one full path placeholder | ||||
|            is not modified.""" | ||||
|         partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|         assert sup.padding_filter(partial) == partial | ||||
| 
 | ||||
| def test_short_substitution(): | ||||
|     """Ensure that a single placeholder path component is replaced""" | ||||
|     short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|     short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|     assert short_subst == sup.padding_filter(short) | ||||
|     def test_short_substitution(self): | ||||
|         """Ensure that a single placeholder path component is replaced""" | ||||
|         short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|         short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|         assert short_subst == sup.padding_filter(short) | ||||
| 
 | ||||
|     def test_partial_substitution(self): | ||||
|         """Ensure that a single placeholder path component is replaced""" | ||||
|         short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|         short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|         assert short_subst == sup.padding_filter(short) | ||||
| 
 | ||||
| def test_partial_substitution(): | ||||
|     """Ensure that a single placeholder path component is replaced""" | ||||
|     short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|     short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||
|     assert short_subst == sup.padding_filter(short) | ||||
|     def test_longest_prefix_re(self): | ||||
|         """Test that longest_prefix_re generates correct regular expressions.""" | ||||
|         assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( | ||||
|             "string", capture=True | ||||
|         ) | ||||
|         assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( | ||||
|             "string", capture=False | ||||
|         ) | ||||
| 
 | ||||
|     def test_output_filtering(self, capfd, install_mockery, mutable_config): | ||||
|         """Test filtering padding out of tty messages.""" | ||||
|         long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200) | ||||
|         padding_string = "[padded-to-%d-chars]" % len(long_path) | ||||
| 
 | ||||
| def test_longest_prefix_re(): | ||||
|     """Test that longest_prefix_re generates correct regular expressions.""" | ||||
|     assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( | ||||
|         "string", capture=True | ||||
|     ) | ||||
|     assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( | ||||
|         "string", capture=False | ||||
|     ) | ||||
|         # test filtering when padding is enabled | ||||
|         with spack.config.override('config:install_tree', {"padded_length": 256}): | ||||
|             # tty.msg with filtering on the first argument | ||||
|             with sup.filter_padding(): | ||||
|                 tty.msg("here is a long path: %s/with/a/suffix" % long_path) | ||||
|             out, err = capfd.readouterr() | ||||
|             assert padding_string in out | ||||
| 
 | ||||
|             # tty.msg with filtering on a laterargument | ||||
|             with sup.filter_padding(): | ||||
|                 tty.msg("here is a long path:", "%s/with/a/suffix" % long_path) | ||||
|             out, err = capfd.readouterr() | ||||
|             assert padding_string in out | ||||
| 
 | ||||
| def test_output_filtering(capfd, install_mockery, mutable_config): | ||||
|     """Test filtering padding out of tty messages.""" | ||||
|     long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200) | ||||
|     padding_string = "[padded-to-%d-chars]" % len(long_path) | ||||
|             # tty.error with filtering on the first argument | ||||
|             with sup.filter_padding(): | ||||
|                 tty.error("here is a long path: %s/with/a/suffix" % long_path) | ||||
|             out, err = capfd.readouterr() | ||||
|             assert padding_string in err | ||||
| 
 | ||||
|     # test filtering when padding is enabled | ||||
|     with spack.config.override('config:install_tree', {"padded_length": 256}): | ||||
|         # tty.msg with filtering on the first argument | ||||
|         with sup.filter_padding(): | ||||
|             tty.msg("here is a long path: %s/with/a/suffix" % long_path) | ||||
|             # tty.error with filtering on a later argument | ||||
|             with sup.filter_padding(): | ||||
|                 tty.error("here is a long path:", "%s/with/a/suffix" % long_path) | ||||
|             out, err = capfd.readouterr() | ||||
|             assert padding_string in err | ||||
| 
 | ||||
|         # test no filtering | ||||
|         tty.msg("here is a long path: %s/with/a/suffix" % long_path) | ||||
|         out, err = capfd.readouterr() | ||||
|         assert padding_string in out | ||||
| 
 | ||||
|         # tty.msg with filtering on a laterargument | ||||
|         with sup.filter_padding(): | ||||
|             tty.msg("here is a long path:", "%s/with/a/suffix" % long_path) | ||||
|         out, err = capfd.readouterr() | ||||
|         assert padding_string in out | ||||
| 
 | ||||
|         # tty.error with filtering on the first argument | ||||
|         with sup.filter_padding(): | ||||
|             tty.error("here is a long path: %s/with/a/suffix" % long_path) | ||||
|         out, err = capfd.readouterr() | ||||
|         assert padding_string in err | ||||
| 
 | ||||
|         # tty.error with filtering on a later argument | ||||
|         with sup.filter_padding(): | ||||
|             tty.error("here is a long path:", "%s/with/a/suffix" % long_path) | ||||
|         out, err = capfd.readouterr() | ||||
|         assert padding_string in err | ||||
| 
 | ||||
|     # test no filtering | ||||
|     tty.msg("here is a long path: %s/with/a/suffix" % long_path) | ||||
|     out, err = capfd.readouterr() | ||||
|     assert padding_string not in out | ||||
|         assert padding_string not in out | ||||
|   | ||||
| @@ -87,6 +87,42 @@ def path_to_os_path(*pths): | ||||
|     return ret_pths | ||||
| 
 | ||||
| 
 | ||||
| def sanitize_file_path(pth): | ||||
|     """ | ||||
|     Formats strings to contain only characters that can | ||||
|     be used to generate legal file paths. | ||||
| 
 | ||||
|     Criteria for legal files based on | ||||
|     https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations | ||||
| 
 | ||||
|     Args: | ||||
|         pth: string containing path to be created | ||||
|             on the host filesystem | ||||
| 
 | ||||
|     Return: | ||||
|         sanitized string that can legally be made into a path | ||||
|     """ | ||||
|     # on unix, splitting path by seperators will remove | ||||
|     # instances of illegal characters on join | ||||
|     pth_cmpnts = pth.split(os.path.sep) | ||||
| 
 | ||||
|     if is_windows: | ||||
|         drive_match = r'[a-zA-Z]:' | ||||
|         is_abs = bool(re.match(drive_match, pth_cmpnts[0])) | ||||
|         drive = pth_cmpnts[0] + os.path.sep if is_abs else '' | ||||
|         pth_cmpnts = pth_cmpnts[1:] if drive else pth_cmpnts | ||||
|         illegal_chars = r'[<>?:"|*\\]' | ||||
|     else: | ||||
|         drive = '/' if not pth_cmpnts[0] else '' | ||||
|         illegal_chars = r'[/]' | ||||
| 
 | ||||
|     pth = [] | ||||
|     for cmp in pth_cmpnts: | ||||
|         san_cmp = re.sub(illegal_chars, '', cmp) | ||||
|         pth.append(san_cmp) | ||||
|     return drive + os.path.join(*pth) | ||||
| 
 | ||||
| 
 | ||||
| def system_path_filter(_func=None, arg_slice=None): | ||||
|     """ | ||||
|     Filters function arguments to account for platform path separators. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 John W. Parent
					John W. Parent