Sanitize filepath from URL (#30625)
Spack's staging logic constructs a file path based on a URL. The URL may contain characters which are not allowed in valid file paths on the system (e.g. Windows prohibits ':' and '?' among others). This commit adds a function to remove such offending characters (but otherwise preserves the URL string when constructing a file path).
This commit is contained in:
		| @@ -363,12 +363,13 @@ def __exit__(self, exc_type, exc_val, exc_tb): | |||||||
|     def expected_archive_files(self): |     def expected_archive_files(self): | ||||||
|         """Possible archive file paths.""" |         """Possible archive file paths.""" | ||||||
|         paths = [] |         paths = [] | ||||||
| 
 |  | ||||||
|         fnames = [] |         fnames = [] | ||||||
|         expanded = True |         expanded = True | ||||||
|         if isinstance(self.default_fetcher, fs.URLFetchStrategy): |         if isinstance(self.default_fetcher, fs.URLFetchStrategy): | ||||||
|             expanded = self.default_fetcher.expand_archive |             expanded = self.default_fetcher.expand_archive | ||||||
|             fnames.append(os.path.basename(self.default_fetcher.url)) |             clean_url = os.path.basename( | ||||||
|  |                 sup.sanitize_file_path(self.default_fetcher.url)) | ||||||
|  |             fnames.append(clean_url) | ||||||
| 
 | 
 | ||||||
|         if self.mirror_paths: |         if self.mirror_paths: | ||||||
|             fnames.extend(os.path.basename(x) for x in self.mirror_paths) |             fnames.extend(os.path.basename(x) for x in self.mirror_paths) | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ | |||||||
| # | # | ||||||
| # SPDX-License-Identifier: (Apache-2.0 OR MIT) | # SPDX-License-Identifier: (Apache-2.0 OR MIT) | ||||||
| 
 | 
 | ||||||
|  | import os | ||||||
| import sys | import sys | ||||||
| 
 | 
 | ||||||
| import pytest | import pytest | ||||||
| @@ -12,11 +13,8 @@ | |||||||
| import spack.config | import spack.config | ||||||
| import spack.util.path as sup | import spack.util.path as sup | ||||||
| 
 | 
 | ||||||
| # This module pertains to path string padding manipulation specifically | is_windows = sys.platform == 'win32' | ||||||
| # which is used for binary caching. This functionality is not supported | 
 | ||||||
| # on Windows as of yet. |  | ||||||
| pytestmark = pytest.mark.skipif(sys.platform == 'win32', |  | ||||||
|                                 reason="Tests fail on Windows") |  | ||||||
| 
 | 
 | ||||||
| #: Some lines with lots of placeholders | #: Some lines with lots of placeholders | ||||||
| padded_lines = [ | padded_lines = [ | ||||||
| @@ -34,74 +32,87 @@ | |||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines)) | def test_sanitze_file_path(tmpdir): | ||||||
| def test_padding_substitution(padded, fixed): |     """Test filtering illegal characters out of potential file paths""" | ||||||
|     """Ensure that all padded lines are unpadded correctly.""" |     # *nix illegal files characters are '/' and none others | ||||||
|     assert fixed == sup.padding_filter(padded) |     illegal_file_path = str(tmpdir) + '//' + 'abcdefghi.txt' | ||||||
|  |     if is_windows: | ||||||
|  |         # Windows has a larger set of illegal characters | ||||||
|  |         illegal_file_path = os.path.join(tmpdir, 'a<b>cd?e:f"g|h*i.txt') | ||||||
|  |     real_path = sup.sanitize_file_path(illegal_file_path) | ||||||
|  |     assert real_path == os.path.join(str(tmpdir), 'abcdefghi.txt') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_no_substitution(): | # This class pertains to path string padding manipulation specifically | ||||||
|     """Ensure that a line not containing one full path placeholder is not modified.""" | # which is used for binary caching. This functionality is not supported | ||||||
|     partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | # on Windows as of yet. | ||||||
|     assert sup.padding_filter(partial) == partial | @pytest.mark.skipif(is_windows, | ||||||
|  |                     reason='Padding funtionality unsupported on Windows') | ||||||
|  | class TestPathPadding(): | ||||||
|  |     @pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines)) | ||||||
|  |     def test_padding_substitution(self, padded, fixed): | ||||||
|  |         """Ensure that all padded lines are unpadded correctly.""" | ||||||
|  |         assert fixed == sup.padding_filter(padded) | ||||||
| 
 | 
 | ||||||
|  |     def test_no_substitution(self): | ||||||
|  |         """Ensure that a line not containing one full path placeholder | ||||||
|  |            is not modified.""" | ||||||
|  |         partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||||
|  |         assert sup.padding_filter(partial) == partial | ||||||
| 
 | 
 | ||||||
| def test_short_substitution(): |     def test_short_substitution(self): | ||||||
|     """Ensure that a single placeholder path component is replaced""" |         """Ensure that a single placeholder path component is replaced""" | ||||||
|     short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 |         short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||||
|     short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 |         short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||||
|     assert short_subst == sup.padding_filter(short) |         assert short_subst == sup.padding_filter(short) | ||||||
| 
 | 
 | ||||||
|  |     def test_partial_substitution(self): | ||||||
|  |         """Ensure that a single placeholder path component is replaced""" | ||||||
|  |         short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||||
|  |         short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 | ||||||
|  |         assert short_subst == sup.padding_filter(short) | ||||||
| 
 | 
 | ||||||
| def test_partial_substitution(): |     def test_longest_prefix_re(self): | ||||||
|     """Ensure that a single placeholder path component is replaced""" |         """Test that longest_prefix_re generates correct regular expressions.""" | ||||||
|     short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 |         assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( | ||||||
|     short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'"  # noqa: E501 |             "string", capture=True | ||||||
|     assert short_subst == sup.padding_filter(short) |         ) | ||||||
|  |         assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( | ||||||
|  |             "string", capture=False | ||||||
|  |         ) | ||||||
| 
 | 
 | ||||||
|  |     def test_output_filtering(self, capfd, install_mockery, mutable_config): | ||||||
|  |         """Test filtering padding out of tty messages.""" | ||||||
|  |         long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200) | ||||||
|  |         padding_string = "[padded-to-%d-chars]" % len(long_path) | ||||||
| 
 | 
 | ||||||
| def test_longest_prefix_re(): |         # test filtering when padding is enabled | ||||||
|     """Test that longest_prefix_re generates correct regular expressions.""" |         with spack.config.override('config:install_tree', {"padded_length": 256}): | ||||||
|     assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( |             # tty.msg with filtering on the first argument | ||||||
|         "string", capture=True |             with sup.filter_padding(): | ||||||
|     ) |                 tty.msg("here is a long path: %s/with/a/suffix" % long_path) | ||||||
|     assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( |             out, err = capfd.readouterr() | ||||||
|         "string", capture=False |             assert padding_string in out | ||||||
|     ) |  | ||||||
| 
 | 
 | ||||||
|  |             # tty.msg with filtering on a laterargument | ||||||
|  |             with sup.filter_padding(): | ||||||
|  |                 tty.msg("here is a long path:", "%s/with/a/suffix" % long_path) | ||||||
|  |             out, err = capfd.readouterr() | ||||||
|  |             assert padding_string in out | ||||||
| 
 | 
 | ||||||
| def test_output_filtering(capfd, install_mockery, mutable_config): |             # tty.error with filtering on the first argument | ||||||
|     """Test filtering padding out of tty messages.""" |             with sup.filter_padding(): | ||||||
|     long_path = "/" + "/".join([sup.SPACK_PATH_PADDING_CHARS] * 200) |                 tty.error("here is a long path: %s/with/a/suffix" % long_path) | ||||||
|     padding_string = "[padded-to-%d-chars]" % len(long_path) |             out, err = capfd.readouterr() | ||||||
|  |             assert padding_string in err | ||||||
| 
 | 
 | ||||||
|     # test filtering when padding is enabled |             # tty.error with filtering on a later argument | ||||||
|     with spack.config.override('config:install_tree', {"padded_length": 256}): |             with sup.filter_padding(): | ||||||
|         # tty.msg with filtering on the first argument |                 tty.error("here is a long path:", "%s/with/a/suffix" % long_path) | ||||||
|         with sup.filter_padding(): |             out, err = capfd.readouterr() | ||||||
|             tty.msg("here is a long path: %s/with/a/suffix" % long_path) |             assert padding_string in err | ||||||
|  | 
 | ||||||
|  |         # test no filtering | ||||||
|  |         tty.msg("here is a long path: %s/with/a/suffix" % long_path) | ||||||
|         out, err = capfd.readouterr() |         out, err = capfd.readouterr() | ||||||
|         assert padding_string in out |         assert padding_string not in out | ||||||
| 
 |  | ||||||
|         # tty.msg with filtering on a laterargument |  | ||||||
|         with sup.filter_padding(): |  | ||||||
|             tty.msg("here is a long path:", "%s/with/a/suffix" % long_path) |  | ||||||
|         out, err = capfd.readouterr() |  | ||||||
|         assert padding_string in out |  | ||||||
| 
 |  | ||||||
|         # tty.error with filtering on the first argument |  | ||||||
|         with sup.filter_padding(): |  | ||||||
|             tty.error("here is a long path: %s/with/a/suffix" % long_path) |  | ||||||
|         out, err = capfd.readouterr() |  | ||||||
|         assert padding_string in err |  | ||||||
| 
 |  | ||||||
|         # tty.error with filtering on a later argument |  | ||||||
|         with sup.filter_padding(): |  | ||||||
|             tty.error("here is a long path:", "%s/with/a/suffix" % long_path) |  | ||||||
|         out, err = capfd.readouterr() |  | ||||||
|         assert padding_string in err |  | ||||||
| 
 |  | ||||||
|     # test no filtering |  | ||||||
|     tty.msg("here is a long path: %s/with/a/suffix" % long_path) |  | ||||||
|     out, err = capfd.readouterr() |  | ||||||
|     assert padding_string not in out |  | ||||||
|   | |||||||
| @@ -87,6 +87,42 @@ def path_to_os_path(*pths): | |||||||
|     return ret_pths |     return ret_pths | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def sanitize_file_path(pth): | ||||||
|  |     """ | ||||||
|  |     Formats strings to contain only characters that can | ||||||
|  |     be used to generate legal file paths. | ||||||
|  | 
 | ||||||
|  |     Criteria for legal files based on | ||||||
|  |     https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations | ||||||
|  | 
 | ||||||
|  |     Args: | ||||||
|  |         pth: string containing path to be created | ||||||
|  |             on the host filesystem | ||||||
|  | 
 | ||||||
|  |     Return: | ||||||
|  |         sanitized string that can legally be made into a path | ||||||
|  |     """ | ||||||
|  |     # on unix, splitting path by seperators will remove | ||||||
|  |     # instances of illegal characters on join | ||||||
|  |     pth_cmpnts = pth.split(os.path.sep) | ||||||
|  | 
 | ||||||
|  |     if is_windows: | ||||||
|  |         drive_match = r'[a-zA-Z]:' | ||||||
|  |         is_abs = bool(re.match(drive_match, pth_cmpnts[0])) | ||||||
|  |         drive = pth_cmpnts[0] + os.path.sep if is_abs else '' | ||||||
|  |         pth_cmpnts = pth_cmpnts[1:] if drive else pth_cmpnts | ||||||
|  |         illegal_chars = r'[<>?:"|*\\]' | ||||||
|  |     else: | ||||||
|  |         drive = '/' if not pth_cmpnts[0] else '' | ||||||
|  |         illegal_chars = r'[/]' | ||||||
|  | 
 | ||||||
|  |     pth = [] | ||||||
|  |     for cmp in pth_cmpnts: | ||||||
|  |         san_cmp = re.sub(illegal_chars, '', cmp) | ||||||
|  |         pth.append(san_cmp) | ||||||
|  |     return drive + os.path.join(*pth) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def system_path_filter(_func=None, arg_slice=None): | def system_path_filter(_func=None, arg_slice=None): | ||||||
|     """ |     """ | ||||||
|     Filters function arguments to account for platform path separators. |     Filters function arguments to account for platform path separators. | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 John W. Parent
					John W. Parent