|  |  |  | @@ -3,12 +3,13 @@ | 
		
	
		
			
				|  |  |  |  | # | 
		
	
		
			
				|  |  |  |  | # SPDX-License-Identifier: (Apache-2.0 OR MIT) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | import errno | 
		
	
		
			
				|  |  |  |  | import inspect | 
		
	
		
			
				|  |  |  |  | import io | 
		
	
		
			
				|  |  |  |  | import os | 
		
	
		
			
				|  |  |  |  | import re | 
		
	
		
			
				|  |  |  |  | import shutil | 
		
	
		
			
				|  |  |  |  | import sys | 
		
	
		
			
				|  |  |  |  | from typing import BinaryIO, Callable, Dict, List, Optional | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | import llnl.url | 
		
	
		
			
				|  |  |  |  | from llnl.util import tty | 
		
	
	
		
			
				
					
					|  |  |  | @@ -19,42 +20,29 @@ | 
		
	
		
			
				|  |  |  |  | try: | 
		
	
		
			
				|  |  |  |  |     import bz2  # noqa | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     _bz2_support = True | 
		
	
		
			
				|  |  |  |  |     BZ2_SUPPORTED = True | 
		
	
		
			
				|  |  |  |  | except ImportError: | 
		
	
		
			
				|  |  |  |  |     _bz2_support = False | 
		
	
		
			
				|  |  |  |  |     BZ2_SUPPORTED = False | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | try: | 
		
	
		
			
				|  |  |  |  |     import gzip  # noqa | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     _gzip_support = True | 
		
	
		
			
				|  |  |  |  |     GZIP_SUPPORTED = True | 
		
	
		
			
				|  |  |  |  | except ImportError: | 
		
	
		
			
				|  |  |  |  |     _gzip_support = False | 
		
	
		
			
				|  |  |  |  |     GZIP_SUPPORTED = False | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | try: | 
		
	
		
			
				|  |  |  |  |     import lzma  # noqa # novermin | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     _lzma_support = True | 
		
	
		
			
				|  |  |  |  |     LZMA_SUPPORTED = True | 
		
	
		
			
				|  |  |  |  | except ImportError: | 
		
	
		
			
				|  |  |  |  |     _lzma_support = False | 
		
	
		
			
				|  |  |  |  |     LZMA_SUPPORTED = False | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def is_lzma_supported(): | 
		
	
		
			
				|  |  |  |  |     return _lzma_support | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def is_gzip_supported(): | 
		
	
		
			
				|  |  |  |  |     return _gzip_support | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def is_bz2_supported(): | 
		
	
		
			
				|  |  |  |  |     return _bz2_support | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _system_untar(archive_file, remove_archive_file=False): | 
		
	
		
			
				|  |  |  |  |     """Returns path to unarchived tar file. | 
		
	
		
			
				|  |  |  |  |     Untars archive via system tar. | 
		
	
		
			
				|  |  |  |  | def _system_untar(archive_file: str, remove_archive_file: bool = False) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to unarchived tar file. Untars archive via system tar. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Args: | 
		
	
		
			
				|  |  |  |  |         archive_file (str): absolute path to the archive to be extracted. | 
		
	
	
		
			
				
					
					|  |  |  | @@ -69,6 +57,11 @@ def _system_untar(archive_file, remove_archive_file=False): | 
		
	
		
			
				|  |  |  |  |         archive_file = archive_file_no_ext + "-input" | 
		
	
		
			
				|  |  |  |  |         shutil.move(archive_file_no_ext, archive_file) | 
		
	
		
			
				|  |  |  |  |     tar = which("tar", required=True) | 
		
	
		
			
				|  |  |  |  |     # GNU tar's --no-same-owner is not as portable, -o works for BSD tar too. This flag is relevant | 
		
	
		
			
				|  |  |  |  |     # when extracting archives as root, where tar attempts to set original ownership of files. This | 
		
	
		
			
				|  |  |  |  |     # is redundant when distributing tarballs, as the tarballs are created on different systems | 
		
	
		
			
				|  |  |  |  |     # than where they are extracted. In certain cases like rootless containers, setting original | 
		
	
		
			
				|  |  |  |  |     # ownership is known to fail, so we need to disable it. | 
		
	
		
			
				|  |  |  |  |     tar.add_default_arg("-oxf") | 
		
	
		
			
				|  |  |  |  |     tar(archive_file) | 
		
	
		
			
				|  |  |  |  |     if remove_archive_file: | 
		
	
	
		
			
				
					
					|  |  |  | @@ -79,21 +72,21 @@ def _system_untar(archive_file, remove_archive_file=False): | 
		
	
		
			
				|  |  |  |  |     return outfile | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _bunzip2(archive_file): | 
		
	
		
			
				|  |  |  |  | def _bunzip2(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed file. | 
		
	
		
			
				|  |  |  |  |     Uses Python's bz2 module to decompress bz2 compressed archives | 
		
	
		
			
				|  |  |  |  |     Fall back to system utility failing to find Python module `bz2` | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Args: | 
		
	
		
			
				|  |  |  |  |         archive_file (str): absolute path to the bz2 archive to be decompressed | 
		
	
		
			
				|  |  |  |  |         archive_file: absolute path to the bz2 archive to be decompressed | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     if is_bz2_supported(): | 
		
	
		
			
				|  |  |  |  |     if BZ2_SUPPORTED: | 
		
	
		
			
				|  |  |  |  |         return _py_bunzip(archive_file) | 
		
	
		
			
				|  |  |  |  |     else: | 
		
	
		
			
				|  |  |  |  |         return _system_bunzip(archive_file) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _py_bunzip(archive_file): | 
		
	
		
			
				|  |  |  |  | def _py_bunzip(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed file. | 
		
	
		
			
				|  |  |  |  |     Decompresses bz2 compressed archives/files via python's bz2 module""" | 
		
	
		
			
				|  |  |  |  |     decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "bz2")) | 
		
	
	
		
			
				
					
					|  |  |  | @@ -106,7 +99,7 @@ def _py_bunzip(archive_file): | 
		
	
		
			
				|  |  |  |  |     return archive_out | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _system_bunzip(archive_file): | 
		
	
		
			
				|  |  |  |  | def _system_bunzip(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed file. | 
		
	
		
			
				|  |  |  |  |     Decompresses bz2 compressed archives/files via system bzip2 utility""" | 
		
	
		
			
				|  |  |  |  |     compressed_file_name = os.path.basename(archive_file) | 
		
	
	
		
			
				
					
					|  |  |  | @@ -121,25 +114,20 @@ def _system_bunzip(archive_file): | 
		
	
		
			
				|  |  |  |  |     return archive_out | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _gunzip(archive_file): | 
		
	
		
			
				|  |  |  |  |     """Returns path to gunzip'd file | 
		
	
		
			
				|  |  |  |  |     Decompresses `.gz` extensions. Prefer native Python `gzip` module. | 
		
	
		
			
				|  |  |  |  |     Failing back to system utility gunzip. | 
		
	
		
			
				|  |  |  |  |     Like gunzip, but extracts in the current working directory | 
		
	
		
			
				|  |  |  |  |     instead of in-place. | 
		
	
		
			
				|  |  |  |  | def _gunzip(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to gunzip'd file. Decompresses `.gz` extensions. Prefer native Python | 
		
	
		
			
				|  |  |  |  |     `gzip` module. Falling back to system utility gunzip. Like gunzip, but extracts in the current | 
		
	
		
			
				|  |  |  |  |     working directory instead of in-place. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Args: | 
		
	
		
			
				|  |  |  |  |         archive_file (str): absolute path of the file to be decompressed | 
		
	
		
			
				|  |  |  |  |         archive_file: absolute path of the file to be decompressed | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     if is_gzip_supported(): | 
		
	
		
			
				|  |  |  |  |         return _py_gunzip(archive_file) | 
		
	
		
			
				|  |  |  |  |     else: | 
		
	
		
			
				|  |  |  |  |         return _system_gunzip(archive_file) | 
		
	
		
			
				|  |  |  |  |     return _py_gunzip(archive_file) if GZIP_SUPPORTED else _system_gunzip(archive_file) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _py_gunzip(archive_file): | 
		
	
		
			
				|  |  |  |  |     """Returns path to gunzip'd file | 
		
	
		
			
				|  |  |  |  |     Decompresses `.gz` compressed archvies via python gzip module""" | 
		
	
		
			
				|  |  |  |  | def _py_gunzip(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to gunzip'd file. Decompresses `.gz` compressed archvies via python gzip | 
		
	
		
			
				|  |  |  |  |     module""" | 
		
	
		
			
				|  |  |  |  |     decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "gz")) | 
		
	
		
			
				|  |  |  |  |     working_dir = os.getcwd() | 
		
	
		
			
				|  |  |  |  |     destination_abspath = os.path.join(working_dir, decompressed_file) | 
		
	
	
		
			
				
					
					|  |  |  | @@ -150,9 +138,8 @@ def _py_gunzip(archive_file): | 
		
	
		
			
				|  |  |  |  |     return destination_abspath | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _system_gunzip(archive_file): | 
		
	
		
			
				|  |  |  |  |     """Returns path to gunzip'd file | 
		
	
		
			
				|  |  |  |  |     Decompresses `.gz` compressed files via system gzip""" | 
		
	
		
			
				|  |  |  |  | def _system_gunzip(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to gunzip'd file. Decompresses `.gz` compressed files via system gzip""" | 
		
	
		
			
				|  |  |  |  |     archive_file_no_ext = llnl.url.strip_compression_extension(archive_file) | 
		
	
		
			
				|  |  |  |  |     if archive_file_no_ext == archive_file: | 
		
	
		
			
				|  |  |  |  |         # the zip file has no extension. On Unix gunzip cannot unzip onto itself | 
		
	
	
		
			
				
					
					|  |  |  | @@ -170,50 +157,38 @@ def _system_gunzip(archive_file): | 
		
	
		
			
				|  |  |  |  |     return destination_abspath | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _unzip(archive_file): | 
		
	
		
			
				|  |  |  |  |     """Returns path to extracted zip archive | 
		
	
		
			
				|  |  |  |  |     Extract Zipfile, searching for unzip system executable | 
		
	
		
			
				|  |  |  |  |     If unavailable, search for 'tar' executable on system and use instead | 
		
	
		
			
				|  |  |  |  | def _unzip(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to extracted zip archive. Extract Zipfile, searching for unzip system | 
		
	
		
			
				|  |  |  |  |     executable. If unavailable, search for 'tar' executable on system and use instead. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Args: | 
		
	
		
			
				|  |  |  |  |         archive_file (str): absolute path of the file to be decompressed | 
		
	
		
			
				|  |  |  |  |         archive_file: absolute path of the file to be decompressed | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     extracted_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="zip")) | 
		
	
		
			
				|  |  |  |  |     if sys.platform == "win32": | 
		
	
		
			
				|  |  |  |  |         return _system_untar(archive_file) | 
		
	
		
			
				|  |  |  |  |     else: | 
		
	
		
			
				|  |  |  |  |         exe = "unzip" | 
		
	
		
			
				|  |  |  |  |         arg = "-q" | 
		
	
		
			
				|  |  |  |  |         unzip = which(exe, required=True) | 
		
	
		
			
				|  |  |  |  |         unzip.add_default_arg(arg) | 
		
	
		
			
				|  |  |  |  |         unzip(archive_file) | 
		
	
		
			
				|  |  |  |  |     return extracted_file | 
		
	
		
			
				|  |  |  |  |     unzip = which("unzip", required=True) | 
		
	
		
			
				|  |  |  |  |     unzip.add_default_arg("-q") | 
		
	
		
			
				|  |  |  |  |     unzip(archive_file) | 
		
	
		
			
				|  |  |  |  |     return os.path.basename(llnl.url.strip_extension(archive_file, extension="zip")) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _system_unZ(archive_file): | 
		
	
		
			
				|  |  |  |  | def _system_unZ(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed file | 
		
	
		
			
				|  |  |  |  |     Decompress UNIX compress style compression | 
		
	
		
			
				|  |  |  |  |     Utilizes gunzip on unix and 7zip on Windows | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     if sys.platform == "win32": | 
		
	
		
			
				|  |  |  |  |         result = _system_7zip(archive_file) | 
		
	
		
			
				|  |  |  |  |     else: | 
		
	
		
			
				|  |  |  |  |         result = _system_gunzip(archive_file) | 
		
	
		
			
				|  |  |  |  |     return result | 
		
	
		
			
				|  |  |  |  |         return _system_7zip(archive_file) | 
		
	
		
			
				|  |  |  |  |     return _system_gunzip(archive_file) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _lzma_decomp(archive_file): | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed xz file. | 
		
	
		
			
				|  |  |  |  |     Decompress lzma compressed files. Prefer Python native | 
		
	
		
			
				|  |  |  |  |     lzma module, but fall back on command line xz tooling | 
		
	
		
			
				|  |  |  |  |     to find available Python support.""" | 
		
	
		
			
				|  |  |  |  |     if is_lzma_supported(): | 
		
	
		
			
				|  |  |  |  |         return _py_lzma(archive_file) | 
		
	
		
			
				|  |  |  |  |     else: | 
		
	
		
			
				|  |  |  |  |         return _xz(archive_file) | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed xz file. Decompress lzma compressed files. Prefer Python native | 
		
	
		
			
				|  |  |  |  |     lzma module, but fall back on command line xz tooling to find available Python support.""" | 
		
	
		
			
				|  |  |  |  |     return _py_lzma(archive_file) if LZMA_SUPPORTED else _xz(archive_file) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _win_compressed_tarball_handler(decompressor): | 
		
	
		
			
				|  |  |  |  | def _win_compressed_tarball_handler(decompressor: Callable[[str], str]) -> Callable[[str], str]: | 
		
	
		
			
				|  |  |  |  |     """Returns function pointer to two stage decompression | 
		
	
		
			
				|  |  |  |  |     and extraction method | 
		
	
		
			
				|  |  |  |  |     Decompress and extract compressed tarballs on Windows. | 
		
	
	
		
			
				
					
					|  |  |  | @@ -227,7 +202,7 @@ def _win_compressed_tarball_handler(decompressor): | 
		
	
		
			
				|  |  |  |  |     can be installed manually or via spack | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     def unarchive(archive_file): | 
		
	
		
			
				|  |  |  |  |     def unarchive(archive_file: str): | 
		
	
		
			
				|  |  |  |  |         # perform intermediate extraction step | 
		
	
		
			
				|  |  |  |  |         # record name of new archive so we can extract | 
		
	
		
			
				|  |  |  |  |         decomped_tarball = decompressor(archive_file) | 
		
	
	
		
			
				
					
					|  |  |  | @@ -238,9 +213,9 @@ def unarchive(archive_file): | 
		
	
		
			
				|  |  |  |  |     return unarchive | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _py_lzma(archive_file): | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed .xz files | 
		
	
		
			
				|  |  |  |  |     Decompress lzma compressed .xz files via python lzma module""" | 
		
	
		
			
				|  |  |  |  | def _py_lzma(archive_file: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed .xz files. Decompress lzma compressed .xz files via Python | 
		
	
		
			
				|  |  |  |  |     lzma module.""" | 
		
	
		
			
				|  |  |  |  |     decompressed_file = os.path.basename(llnl.url.strip_compression_extension(archive_file, "xz")) | 
		
	
		
			
				|  |  |  |  |     archive_out = os.path.join(os.getcwd(), decompressed_file) | 
		
	
		
			
				|  |  |  |  |     with open(archive_out, "wb") as ar: | 
		
	
	
		
			
				
					
					|  |  |  | @@ -250,10 +225,8 @@ def _py_lzma(archive_file): | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _xz(archive_file): | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed xz files | 
		
	
		
			
				|  |  |  |  |     Decompress lzma compressed .xz files via xz command line | 
		
	
		
			
				|  |  |  |  |     tool. | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     """Returns path to decompressed xz files. Decompress lzma compressed .xz files via xz command | 
		
	
		
			
				|  |  |  |  |     line tool.""" | 
		
	
		
			
				|  |  |  |  |     decompressed_file = os.path.basename(llnl.url.strip_extension(archive_file, extension="xz")) | 
		
	
		
			
				|  |  |  |  |     working_dir = os.getcwd() | 
		
	
		
			
				|  |  |  |  |     destination_abspath = os.path.join(working_dir, decompressed_file) | 
		
	
	
		
			
				
					
					|  |  |  | @@ -292,19 +265,17 @@ def _system_7zip(archive_file): | 
		
	
		
			
				|  |  |  |  |     return outfile | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def decompressor_for(path, extension=None): | 
		
	
		
			
				|  |  |  |  | def decompressor_for(path: str, extension: Optional[str] = None): | 
		
	
		
			
				|  |  |  |  |     """Returns appropriate decompression/extraction algorithm function pointer | 
		
	
		
			
				|  |  |  |  |     for provided extension. If extension is none, it is computed | 
		
	
		
			
				|  |  |  |  |     from the `path` and the decompression function is derived | 
		
	
		
			
				|  |  |  |  |     from that information.""" | 
		
	
		
			
				|  |  |  |  |     if not extension: | 
		
	
		
			
				|  |  |  |  |         extension = extension_from_file(path, decompress=True) | 
		
	
		
			
				|  |  |  |  |         extension = extension_from_magic_numbers(path, decompress=True) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     if not llnl.url.allowed_archive(extension): | 
		
	
		
			
				|  |  |  |  |     if not extension or not llnl.url.allowed_archive(extension): | 
		
	
		
			
				|  |  |  |  |         raise CommandNotFoundError( | 
		
	
		
			
				|  |  |  |  |             "Cannot extract archive, \ | 
		
	
		
			
				|  |  |  |  | unrecognized file extension: '%s'" | 
		
	
		
			
				|  |  |  |  |             % extension | 
		
	
		
			
				|  |  |  |  |             f"Cannot extract {path}, unrecognized file extension: '{extension}'" | 
		
	
		
			
				|  |  |  |  |         ) | 
		
	
		
			
				|  |  |  |  |     if sys.platform == "win32": | 
		
	
		
			
				|  |  |  |  |         return decompressor_for_win(extension) | 
		
	
	
		
			
				
					
					|  |  |  | @@ -312,58 +283,37 @@ def decompressor_for(path, extension=None): | 
		
	
		
			
				|  |  |  |  |         return decompressor_for_nix(extension) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def decompressor_for_nix(extension): | 
		
	
		
			
				|  |  |  |  |     """Returns a function pointer to appropriate decompression | 
		
	
		
			
				|  |  |  |  |     algorithm based on extension type and unix specific considerations | 
		
	
		
			
				|  |  |  |  |     i.e. a reasonable expectation system utils like gzip, bzip2, and xz are | 
		
	
		
			
				|  |  |  |  |     available | 
		
	
		
			
				|  |  |  |  | def decompressor_for_nix(extension: str) -> Callable[[str], str]: | 
		
	
		
			
				|  |  |  |  |     """Returns a function pointer to appropriate decompression algorithm based on extension type | 
		
	
		
			
				|  |  |  |  |     and unix specific considerations i.e. a reasonable expectation system utils like gzip, bzip2, | 
		
	
		
			
				|  |  |  |  |     and xz are available | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Args: | 
		
	
		
			
				|  |  |  |  |         path (str): path of the archive file requiring decompression | 
		
	
		
			
				|  |  |  |  |         extension: path of the archive file requiring decompression | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     if re.match(r"zip$", extension): | 
		
	
		
			
				|  |  |  |  |         return _unzip | 
		
	
		
			
				|  |  |  |  |     extension_to_decompressor: Dict[str, Callable[[str], str]] = { | 
		
	
		
			
				|  |  |  |  |         "zip": _unzip, | 
		
	
		
			
				|  |  |  |  |         "gz": _gunzip, | 
		
	
		
			
				|  |  |  |  |         "bz2": _bunzip2, | 
		
	
		
			
				|  |  |  |  |         "Z": _system_unZ,  # no builtin support for .Z files | 
		
	
		
			
				|  |  |  |  |         "xz": _lzma_decomp, | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     if re.match(r"gz$", extension): | 
		
	
		
			
				|  |  |  |  |         return _gunzip | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     if re.match(r"bz2$", extension): | 
		
	
		
			
				|  |  |  |  |         return _bunzip2 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # Python does not have native support | 
		
	
		
			
				|  |  |  |  |     # of any kind for .Z files. In these cases, | 
		
	
		
			
				|  |  |  |  |     # we rely on external tools such as tar, | 
		
	
		
			
				|  |  |  |  |     # 7z, or uncompressZ | 
		
	
		
			
				|  |  |  |  |     if re.match(r"Z$", extension): | 
		
	
		
			
				|  |  |  |  |         return _system_unZ | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # Python and platform may not have support for lzma | 
		
	
		
			
				|  |  |  |  |     # compression. If no lzma support, use tools available on systems | 
		
	
		
			
				|  |  |  |  |     if re.match(r"xz$", extension): | 
		
	
		
			
				|  |  |  |  |         return _lzma_decomp | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     return _system_untar | 
		
	
		
			
				|  |  |  |  |     return extension_to_decompressor.get(extension, _system_untar) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _determine_py_decomp_archive_strategy(extension): | 
		
	
		
			
				|  |  |  |  | def _determine_py_decomp_archive_strategy(extension: str) -> Optional[Callable[[str], str]]: | 
		
	
		
			
				|  |  |  |  |     """Returns appropriate python based decompression strategy | 
		
	
		
			
				|  |  |  |  |     based on extension type""" | 
		
	
		
			
				|  |  |  |  |     # Only rely on Python decompression support for gz | 
		
	
		
			
				|  |  |  |  |     if re.match(r"gz$", extension): | 
		
	
		
			
				|  |  |  |  |         return _py_gunzip | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # Only rely on Python decompression support for bzip2 | 
		
	
		
			
				|  |  |  |  |     if re.match(r"bz2$", extension): | 
		
	
		
			
				|  |  |  |  |         return _py_bunzip | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # Only rely on Python decompression support for xz | 
		
	
		
			
				|  |  |  |  |     if re.match(r"xz$", extension): | 
		
	
		
			
				|  |  |  |  |         return _py_lzma | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     return None | 
		
	
		
			
				|  |  |  |  |     extension_to_decompressor: Dict[str, Callable[[str], str]] = { | 
		
	
		
			
				|  |  |  |  |         "gz": _py_gunzip, | 
		
	
		
			
				|  |  |  |  |         "bz2": _py_bunzip, | 
		
	
		
			
				|  |  |  |  |         "xz": _py_lzma, | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |     return extension_to_decompressor.get(extension, None) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def decompressor_for_win(extension): | 
		
	
		
			
				|  |  |  |  | def decompressor_for_win(extension: str) -> Callable[[str], str]: | 
		
	
		
			
				|  |  |  |  |     """Returns a function pointer to appropriate decompression | 
		
	
		
			
				|  |  |  |  |     algorithm based on extension type and Windows specific considerations | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
	
		
			
				
					
					|  |  |  | @@ -371,34 +321,32 @@ def decompressor_for_win(extension): | 
		
	
		
			
				|  |  |  |  |     So we must rely exclusively on Python module support for all compression | 
		
	
		
			
				|  |  |  |  |     operations, tar for tarballs and zip files, and 7zip for Z compressed archives | 
		
	
		
			
				|  |  |  |  |     and files as Python does not provide support for the UNIX compress algorithm | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Args: | 
		
	
		
			
				|  |  |  |  |         path (str): path of the archive file requiring decompression | 
		
	
		
			
				|  |  |  |  |         extension (str): extension | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     extension = llnl.url.expand_contracted_extension(extension) | 
		
	
		
			
				|  |  |  |  |     # Windows native tar can handle .zip extensions, use standard | 
		
	
		
			
				|  |  |  |  |     # unzip method | 
		
	
		
			
				|  |  |  |  |     if re.match(r"zip$", extension): | 
		
	
		
			
				|  |  |  |  |         return _unzip | 
		
	
		
			
				|  |  |  |  |     extension_to_decompressor: Dict[str, Callable[[str], str]] = { | 
		
	
		
			
				|  |  |  |  |         # Windows native tar can handle .zip extensions, use standard unzip method | 
		
	
		
			
				|  |  |  |  |         "zip": _unzip, | 
		
	
		
			
				|  |  |  |  |         # if extension is standard tarball, invoke Windows native tar | 
		
	
		
			
				|  |  |  |  |         "tar": _system_untar, | 
		
	
		
			
				|  |  |  |  |         # Python does not have native support of any kind for .Z files. In these cases, we rely on | 
		
	
		
			
				|  |  |  |  |         # 7zip, which must be installed outside of Spack and added to the PATH or externally | 
		
	
		
			
				|  |  |  |  |         # detected | 
		
	
		
			
				|  |  |  |  |         "Z": _system_unZ, | 
		
	
		
			
				|  |  |  |  |         "xz": _lzma_decomp, | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # if extension is standard tarball, invoke Windows native tar | 
		
	
		
			
				|  |  |  |  |     if re.match(r"tar$", extension): | 
		
	
		
			
				|  |  |  |  |         return _system_untar | 
		
	
		
			
				|  |  |  |  |     decompressor = extension_to_decompressor.get(extension) | 
		
	
		
			
				|  |  |  |  |     if decompressor: | 
		
	
		
			
				|  |  |  |  |         return decompressor | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # Python does not have native support | 
		
	
		
			
				|  |  |  |  |     # of any kind for .Z files. In these cases, | 
		
	
		
			
				|  |  |  |  |     # we rely on 7zip, which must be installed outside | 
		
	
		
			
				|  |  |  |  |     # of spack and added to the PATH or externally detected | 
		
	
		
			
				|  |  |  |  |     if re.match(r"Z$", extension): | 
		
	
		
			
				|  |  |  |  |         return _system_unZ | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # Windows vendors no native decompression tools, attempt to derive | 
		
	
		
			
				|  |  |  |  |     # python based decompression strategy | 
		
	
		
			
				|  |  |  |  |     # Expand extension from contracted extension i.e. tar.gz from .tgz | 
		
	
		
			
				|  |  |  |  |     # no-op on non contracted extensions | 
		
	
		
			
				|  |  |  |  |     # Windows vendors no native decompression tools, attempt to derive Python based decompression | 
		
	
		
			
				|  |  |  |  |     # strategy. Expand extension from abbreviated ones, i.e. tar.gz from .tgz | 
		
	
		
			
				|  |  |  |  |     compression_extension = llnl.url.compression_ext_from_compressed_archive(extension) | 
		
	
		
			
				|  |  |  |  |     decompressor = _determine_py_decomp_archive_strategy(compression_extension) | 
		
	
		
			
				|  |  |  |  |     decompressor = ( | 
		
	
		
			
				|  |  |  |  |         _determine_py_decomp_archive_strategy(compression_extension) | 
		
	
		
			
				|  |  |  |  |         if compression_extension | 
		
	
		
			
				|  |  |  |  |         else None | 
		
	
		
			
				|  |  |  |  |     ) | 
		
	
		
			
				|  |  |  |  |     if not decompressor: | 
		
	
		
			
				|  |  |  |  |         raise SpackError( | 
		
	
		
			
				|  |  |  |  |             "Spack was unable to determine a proper decompression strategy for" | 
		
	
	
		
			
				
					
					|  |  |  | @@ -412,103 +360,75 @@ def decompressor_for_win(extension): | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | class FileTypeInterface: | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     Base interface class for describing and querying file type information. | 
		
	
		
			
				|  |  |  |  |     FileType describes information about a single file type | 
		
	
		
			
				|  |  |  |  |     such as extension, and byte header properties, and provides an interface | 
		
	
		
			
				|  |  |  |  |     to check a given file against said type based on magic number. | 
		
	
		
			
				|  |  |  |  |     """Base interface class for describing and querying file type information. FileType describes | 
		
	
		
			
				|  |  |  |  |     information about a single file type such as typical extension and byte header properties, | 
		
	
		
			
				|  |  |  |  |     and provides an interface to check a given file against said type based on magic number. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     This class should be subclassed each time a new type is to be | 
		
	
		
			
				|  |  |  |  |     described. | 
		
	
		
			
				|  |  |  |  |     This class should be subclassed each time a new type is to be described. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Note: This class should not be used directly as it does not define any specific | 
		
	
		
			
				|  |  |  |  |     file. Attempts to directly use this class will fail, as it does not define | 
		
	
		
			
				|  |  |  |  |     a magic number or extension string. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Subclasses should each describe a different | 
		
	
		
			
				|  |  |  |  |     type of file. In order to do so, they must define | 
		
	
		
			
				|  |  |  |  |     the extension string, magic number, and header offset (if non zero). | 
		
	
		
			
				|  |  |  |  |     If a class has multiple magic numbers, it will need to | 
		
	
		
			
				|  |  |  |  |     override the method describin that file types magic numbers and | 
		
	
		
			
				|  |  |  |  |     the method that checks a types magic numbers against a given file's. | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     Subclasses should each describe a different type of file. In order to do so, they must define | 
		
	
		
			
				|  |  |  |  |     the extension string, magic number, and header offset (if non zero). If a class has multiple | 
		
	
		
			
				|  |  |  |  |     magic numbers, it will need to override the method describing that file type's magic numbers | 
		
	
		
			
				|  |  |  |  |     and the method that checks a types magic numbers against a given file's.""" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     OFFSET = 0 | 
		
	
		
			
				|  |  |  |  |     compressed = False | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def name(): | 
		
	
		
			
				|  |  |  |  |         raise NotImplementedError | 
		
	
		
			
				|  |  |  |  |     extension: str | 
		
	
		
			
				|  |  |  |  |     name: str | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @classmethod | 
		
	
		
			
				|  |  |  |  |     def magic_number(cls): | 
		
	
		
			
				|  |  |  |  |     def magic_numbers(cls) -> List[bytes]: | 
		
	
		
			
				|  |  |  |  |         """Return a list of all potential magic numbers for a filetype""" | 
		
	
		
			
				|  |  |  |  |         return [x[1] for x in inspect.getmembers(cls) if x[0].startswith("_MAGIC_NUMBER")] | 
		
	
		
			
				|  |  |  |  |         return [ | 
		
	
		
			
				|  |  |  |  |             value for name, value in inspect.getmembers(cls) if name.startswith("_MAGIC_NUMBER") | 
		
	
		
			
				|  |  |  |  |         ] | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @classmethod | 
		
	
		
			
				|  |  |  |  |     def header_size(cls): | 
		
	
		
			
				|  |  |  |  |     def header_size(cls) -> int: | 
		
	
		
			
				|  |  |  |  |         """Return size of largest magic number associated with file type""" | 
		
	
		
			
				|  |  |  |  |         return max([len(x) for x in cls.magic_number()]) | 
		
	
		
			
				|  |  |  |  |         return max(len(x) for x in cls.magic_numbers()) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @classmethod | 
		
	
		
			
				|  |  |  |  |     def _bytes_check(cls, magic_bytes): | 
		
	
		
			
				|  |  |  |  |         for magic in cls.magic_number(): | 
		
	
		
			
				|  |  |  |  |             if magic_bytes.startswith(magic): | 
		
	
		
			
				|  |  |  |  |                 return True | 
		
	
		
			
				|  |  |  |  |         return False | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @classmethod | 
		
	
		
			
				|  |  |  |  |     def is_file_of_type(cls, iostream): | 
		
	
		
			
				|  |  |  |  |         """Query byte stream for appropriate magic number | 
		
	
		
			
				|  |  |  |  |     def matches_magic(self, stream: BinaryIO) -> bool: | 
		
	
		
			
				|  |  |  |  |         """Returns true if the stream matches the current file type by any of its magic numbers. | 
		
	
		
			
				|  |  |  |  |         Resets stream to original position. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |         Args: | 
		
	
		
			
				|  |  |  |  |             iostream: file byte stream | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |         Returns: | 
		
	
		
			
				|  |  |  |  |             Bool denoting whether file is of class file type | 
		
	
		
			
				|  |  |  |  |             based on magic number | 
		
	
		
			
				|  |  |  |  |             stream: file byte stream | 
		
	
		
			
				|  |  |  |  |         """ | 
		
	
		
			
				|  |  |  |  |         if not iostream: | 
		
	
		
			
				|  |  |  |  |             return False | 
		
	
		
			
				|  |  |  |  |         # move to location of magic bytes | 
		
	
		
			
				|  |  |  |  |         iostream.seek(cls.OFFSET) | 
		
	
		
			
				|  |  |  |  |         magic_bytes = iostream.read(cls.header_size()) | 
		
	
		
			
				|  |  |  |  |         # return to beginning of file | 
		
	
		
			
				|  |  |  |  |         iostream.seek(0) | 
		
	
		
			
				|  |  |  |  |         if cls._bytes_check(magic_bytes): | 
		
	
		
			
				|  |  |  |  |             return True | 
		
	
		
			
				|  |  |  |  |         return False | 
		
	
		
			
				|  |  |  |  |         offset = stream.tell() | 
		
	
		
			
				|  |  |  |  |         stream.seek(self.OFFSET) | 
		
	
		
			
				|  |  |  |  |         magic_bytes = stream.read(self.header_size()) | 
		
	
		
			
				|  |  |  |  |         stream.seek(offset) | 
		
	
		
			
				|  |  |  |  |         return any(magic_bytes.startswith(magic) for magic in self.magic_numbers()) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | class CompressedFileTypeInterface(FileTypeInterface): | 
		
	
		
			
				|  |  |  |  |     """Interface class for FileTypes that include compression information""" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     compressed = True | 
		
	
		
			
				|  |  |  |  |     def peek(self, stream: BinaryIO, num_bytes: int) -> Optional[io.BytesIO]: | 
		
	
		
			
				|  |  |  |  |         """This method returns the first num_bytes of a decompressed stream. Returns None if no | 
		
	
		
			
				|  |  |  |  |         builtin support for decompression.""" | 
		
	
		
			
				|  |  |  |  |         return None | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def decomp_in_memory(stream): | 
		
	
		
			
				|  |  |  |  |         """This method decompresses and loads the first 200 or so bytes of a compressed file | 
		
	
		
			
				|  |  |  |  |         to check for compressed archives. This does not decompress the entire file and should | 
		
	
		
			
				|  |  |  |  |         not be used for direct expansion of archives/compressed files | 
		
	
		
			
				|  |  |  |  |         """ | 
		
	
		
			
				|  |  |  |  |         raise NotImplementedError("Implementation by compression subclass required") | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def _decompressed_peek( | 
		
	
		
			
				|  |  |  |  |     decompressed_stream: io.BufferedIOBase, stream: BinaryIO, num_bytes: int | 
		
	
		
			
				|  |  |  |  | ) -> io.BytesIO: | 
		
	
		
			
				|  |  |  |  |     # Read the first num_bytes of the decompressed stream, do not advance the stream position. | 
		
	
		
			
				|  |  |  |  |     pos = stream.tell() | 
		
	
		
			
				|  |  |  |  |     data = decompressed_stream.read(num_bytes) | 
		
	
		
			
				|  |  |  |  |     stream.seek(pos) | 
		
	
		
			
				|  |  |  |  |     return io.BytesIO(data) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | class BZipFileType(CompressedFileTypeInterface): | 
		
	
		
			
				|  |  |  |  |     _MAGIC_NUMBER = b"\x42\x5a\x68" | 
		
	
		
			
				|  |  |  |  |     extension = "bz2" | 
		
	
		
			
				|  |  |  |  |     name = "bzip2 compressed data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def name(): | 
		
	
		
			
				|  |  |  |  |         return "bzip2 compressed data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def decomp_in_memory(stream): | 
		
	
		
			
				|  |  |  |  |         if is_bz2_supported(): | 
		
	
		
			
				|  |  |  |  |             # checking for underlying archive, only decomp as many bytes | 
		
	
		
			
				|  |  |  |  |             # as is absolutely neccesary for largest archive header (tar) | 
		
	
		
			
				|  |  |  |  |             comp_stream = stream.read(TarFileType.OFFSET + TarFileType.header_size()) | 
		
	
		
			
				|  |  |  |  |             return io.BytesIO(initial_bytes=bz2.BZ2Decompressor().decompress(comp_stream)) | 
		
	
		
			
				|  |  |  |  |     def peek(self, stream: BinaryIO, num_bytes: int) -> Optional[io.BytesIO]: | 
		
	
		
			
				|  |  |  |  |         if BZ2_SUPPORTED: | 
		
	
		
			
				|  |  |  |  |             return _decompressed_peek(bz2.BZ2File(stream), stream, num_bytes) | 
		
	
		
			
				|  |  |  |  |         return None | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
	
		
			
				
					
					|  |  |  | @@ -516,57 +436,28 @@ class ZCompressedFileType(CompressedFileTypeInterface): | 
		
	
		
			
				|  |  |  |  |     _MAGIC_NUMBER_LZW = b"\x1f\x9d" | 
		
	
		
			
				|  |  |  |  |     _MAGIC_NUMBER_LZH = b"\x1f\xa0" | 
		
	
		
			
				|  |  |  |  |     extension = "Z" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def name(): | 
		
	
		
			
				|  |  |  |  |         return "compress'd data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def decomp_in_memory(stream): | 
		
	
		
			
				|  |  |  |  |         # python has no method of decompressing `.Z` files in memory | 
		
	
		
			
				|  |  |  |  |         return None | 
		
	
		
			
				|  |  |  |  |     name = "compress'd data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | class GZipFileType(CompressedFileTypeInterface): | 
		
	
		
			
				|  |  |  |  |     _MAGIC_NUMBER = b"\x1f\x8b\x08" | 
		
	
		
			
				|  |  |  |  |     extension = "gz" | 
		
	
		
			
				|  |  |  |  |     name = "gzip compressed data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def name(): | 
		
	
		
			
				|  |  |  |  |         return "gzip compressed data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def decomp_in_memory(stream): | 
		
	
		
			
				|  |  |  |  |         if is_gzip_supported(): | 
		
	
		
			
				|  |  |  |  |             # checking for underlying archive, only decomp as many bytes | 
		
	
		
			
				|  |  |  |  |             # as is absolutely neccesary for largest archive header (tar) | 
		
	
		
			
				|  |  |  |  |             return io.BytesIO( | 
		
	
		
			
				|  |  |  |  |                 initial_bytes=gzip.GzipFile(fileobj=stream).read( | 
		
	
		
			
				|  |  |  |  |                     TarFileType.OFFSET + TarFileType.header_size() | 
		
	
		
			
				|  |  |  |  |                 ) | 
		
	
		
			
				|  |  |  |  |             ) | 
		
	
		
			
				|  |  |  |  |     def peek(self, stream: BinaryIO, num_bytes: int) -> Optional[io.BytesIO]: | 
		
	
		
			
				|  |  |  |  |         if GZIP_SUPPORTED: | 
		
	
		
			
				|  |  |  |  |             return _decompressed_peek(gzip.GzipFile(fileobj=stream), stream, num_bytes) | 
		
	
		
			
				|  |  |  |  |         return None | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | class LzmaFileType(CompressedFileTypeInterface): | 
		
	
		
			
				|  |  |  |  |     _MAGIC_NUMBER = b"\xfd7zXZ" | 
		
	
		
			
				|  |  |  |  |     extension = "xz" | 
		
	
		
			
				|  |  |  |  |     name = "xz compressed data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def name(): | 
		
	
		
			
				|  |  |  |  |         return "xz compressed data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def decomp_in_memory(stream): | 
		
	
		
			
				|  |  |  |  |         if is_lzma_supported(): | 
		
	
		
			
				|  |  |  |  |             # checking for underlying archive, only decomp as many bytes | 
		
	
		
			
				|  |  |  |  |             # as is absolutely neccesary for largest archive header (tar) | 
		
	
		
			
				|  |  |  |  |             max_size = TarFileType.OFFSET + TarFileType.header_size() | 
		
	
		
			
				|  |  |  |  |             return io.BytesIO( | 
		
	
		
			
				|  |  |  |  |                 initial_bytes=lzma.LZMADecompressor().decompress( | 
		
	
		
			
				|  |  |  |  |                     stream.read(max_size), max_length=max_size | 
		
	
		
			
				|  |  |  |  |                 ) | 
		
	
		
			
				|  |  |  |  |             ) | 
		
	
		
			
				|  |  |  |  |     def peek(self, stream: BinaryIO, num_bytes: int) -> Optional[io.BytesIO]: | 
		
	
		
			
				|  |  |  |  |         if LZMA_SUPPORTED: | 
		
	
		
			
				|  |  |  |  |             return _decompressed_peek(lzma.LZMAFile(stream), stream, num_bytes) | 
		
	
		
			
				|  |  |  |  |         return None | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
	
		
			
				
					
					|  |  |  | @@ -575,111 +466,111 @@ class TarFileType(FileTypeInterface): | 
		
	
		
			
				|  |  |  |  |     _MAGIC_NUMBER_GNU = b"ustar  \0" | 
		
	
		
			
				|  |  |  |  |     _MAGIC_NUMBER_POSIX = b"ustar\x0000" | 
		
	
		
			
				|  |  |  |  |     extension = "tar" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def name(): | 
		
	
		
			
				|  |  |  |  |         return "tar archive" | 
		
	
		
			
				|  |  |  |  |     name = "tar archive" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | class ZipFleType(FileTypeInterface): | 
		
	
		
			
				|  |  |  |  |     _MAGIC_NUMBER = b"PK\003\004" | 
		
	
		
			
				|  |  |  |  |     extension = "zip" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     @staticmethod | 
		
	
		
			
				|  |  |  |  |     def name(): | 
		
	
		
			
				|  |  |  |  |         return "Zip archive data" | 
		
	
		
			
				|  |  |  |  |     name = "Zip archive data" | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | # collection of valid Spack recognized archive and compression | 
		
	
		
			
				|  |  |  |  | # file type identifier classes. | 
		
	
		
			
				|  |  |  |  | VALID_FILETYPES = [ | 
		
	
		
			
				|  |  |  |  |     BZipFileType, | 
		
	
		
			
				|  |  |  |  |     ZCompressedFileType, | 
		
	
		
			
				|  |  |  |  |     GZipFileType, | 
		
	
		
			
				|  |  |  |  |     LzmaFileType, | 
		
	
		
			
				|  |  |  |  |     TarFileType, | 
		
	
		
			
				|  |  |  |  |     ZipFleType, | 
		
	
		
			
				|  |  |  |  | #: Maximum number of bytes to read from a file to determine any archive type. Tar is the largest. | 
		
	
		
			
				|  |  |  |  | MAX_BYTES_ARCHIVE_HEADER = TarFileType.OFFSET + TarFileType.header_size() | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | #: Collection of supported archive and compression file type identifier classes. | 
		
	
		
			
				|  |  |  |  | SUPPORTED_FILETYPES: List[FileTypeInterface] = [ | 
		
	
		
			
				|  |  |  |  |     BZipFileType(), | 
		
	
		
			
				|  |  |  |  |     ZCompressedFileType(), | 
		
	
		
			
				|  |  |  |  |     GZipFileType(), | 
		
	
		
			
				|  |  |  |  |     LzmaFileType(), | 
		
	
		
			
				|  |  |  |  |     TarFileType(), | 
		
	
		
			
				|  |  |  |  |     ZipFleType(), | 
		
	
		
			
				|  |  |  |  | ] | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def extension_from_stream(stream, decompress=False): | 
		
	
		
			
				|  |  |  |  |     """Return extension represented by stream corresponding to archive file | 
		
	
		
			
				|  |  |  |  |     If stream does not represent an archive type recongized by Spack | 
		
	
		
			
				|  |  |  |  |     (see `spack.util.compression.ALLOWED_ARCHIVE_TYPES`) method will return None | 
		
	
		
			
				|  |  |  |  | def _extension_of_compressed_file( | 
		
	
		
			
				|  |  |  |  |     file_type: CompressedFileTypeInterface, stream: BinaryIO | 
		
	
		
			
				|  |  |  |  | ) -> Optional[str]: | 
		
	
		
			
				|  |  |  |  |     """Retrieves the extension of a file after decompression from its magic numbers, if it can be | 
		
	
		
			
				|  |  |  |  |     decompressed.""" | 
		
	
		
			
				|  |  |  |  |     # To classify the file we only need to decompress the first so many bytes. | 
		
	
		
			
				|  |  |  |  |     decompressed_magic = file_type.peek(stream, MAX_BYTES_ARCHIVE_HEADER) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Extension type is derived by searching for identifying bytes | 
		
	
		
			
				|  |  |  |  |     in file stream. | 
		
	
		
			
				|  |  |  |  |     if not decompressed_magic: | 
		
	
		
			
				|  |  |  |  |         return None | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     return extension_from_magic_numbers_by_stream(decompressed_magic, decompress=False) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def extension_from_magic_numbers_by_stream( | 
		
	
		
			
				|  |  |  |  |     stream: BinaryIO, decompress: bool = False | 
		
	
		
			
				|  |  |  |  | ) -> Optional[str]: | 
		
	
		
			
				|  |  |  |  |     """Returns the typical extension for the opened file, without leading ``.``, based on its magic | 
		
	
		
			
				|  |  |  |  |     numbers. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     If the stream does not represent file type recongized by Spack (see | 
		
	
		
			
				|  |  |  |  |     :py:data:`SUPPORTED_FILETYPES`), the method will return None | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Args: | 
		
	
		
			
				|  |  |  |  |         stream : stream representing a file on system | 
		
	
		
			
				|  |  |  |  |         decompress (bool) : if True, compressed files are checked | 
		
	
		
			
				|  |  |  |  |                             for archive types beneath compression i.e. tar.gz | 
		
	
		
			
				|  |  |  |  |                             default is False, otherwise, return top level type i.e. gz | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Return: | 
		
	
		
			
				|  |  |  |  |         A string represting corresponding archive extension | 
		
	
		
			
				|  |  |  |  |             or None as relevant. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     for arc_type in VALID_FILETYPES: | 
		
	
		
			
				|  |  |  |  |         if arc_type.is_file_of_type(stream): | 
		
	
		
			
				|  |  |  |  |             suffix_ext = arc_type.extension | 
		
	
		
			
				|  |  |  |  |             prefix_ext = "" | 
		
	
		
			
				|  |  |  |  |             if arc_type.compressed and decompress: | 
		
	
		
			
				|  |  |  |  |                 # stream represents compressed file | 
		
	
		
			
				|  |  |  |  |                 # get decompressed stream (if possible) | 
		
	
		
			
				|  |  |  |  |                 decomp_stream = arc_type.decomp_in_memory(stream) | 
		
	
		
			
				|  |  |  |  |                 prefix_ext = extension_from_stream(decomp_stream, decompress=decompress) | 
		
	
		
			
				|  |  |  |  |                 if not prefix_ext: | 
		
	
		
			
				|  |  |  |  |                     # We were unable to decompress or unable to derive | 
		
	
		
			
				|  |  |  |  |                     # a nested extension from decompressed file. | 
		
	
		
			
				|  |  |  |  |                     # Try to use filename parsing to check for | 
		
	
		
			
				|  |  |  |  |                     # potential nested extensions if there are any | 
		
	
		
			
				|  |  |  |  |                     tty.debug( | 
		
	
		
			
				|  |  |  |  |                         "Cannot derive file extension from magic number;" | 
		
	
		
			
				|  |  |  |  |                         " falling back to regex path parsing." | 
		
	
		
			
				|  |  |  |  |                     ) | 
		
	
		
			
				|  |  |  |  |                     return llnl.url.extension_from_path(stream.name) | 
		
	
		
			
				|  |  |  |  |             resultant_ext = suffix_ext if not prefix_ext else ".".join([prefix_ext, suffix_ext]) | 
		
	
		
			
				|  |  |  |  |             tty.debug("File extension %s successfully derived by magic number." % resultant_ext) | 
		
	
		
			
				|  |  |  |  |             return resultant_ext | 
		
	
		
			
				|  |  |  |  |         stream: stream representing a file on system | 
		
	
		
			
				|  |  |  |  |         decompress: if True, compressed files are checked for archive types beneath compression. | 
		
	
		
			
				|  |  |  |  |             For example tar.gz if True versus only gz if False.""" | 
		
	
		
			
				|  |  |  |  |     for file_type in SUPPORTED_FILETYPES: | 
		
	
		
			
				|  |  |  |  |         if not file_type.matches_magic(stream): | 
		
	
		
			
				|  |  |  |  |             continue | 
		
	
		
			
				|  |  |  |  |         ext = file_type.extension | 
		
	
		
			
				|  |  |  |  |         if decompress and isinstance(file_type, CompressedFileTypeInterface): | 
		
	
		
			
				|  |  |  |  |             uncompressed_ext = _extension_of_compressed_file(file_type, stream) | 
		
	
		
			
				|  |  |  |  |             if not uncompressed_ext: | 
		
	
		
			
				|  |  |  |  |                 tty.debug( | 
		
	
		
			
				|  |  |  |  |                     "Cannot derive file extension from magic number;" | 
		
	
		
			
				|  |  |  |  |                     " falling back to original file name." | 
		
	
		
			
				|  |  |  |  |                 ) | 
		
	
		
			
				|  |  |  |  |                 return llnl.url.extension_from_path(stream.name) | 
		
	
		
			
				|  |  |  |  |             ext = f"{uncompressed_ext}.{ext}" | 
		
	
		
			
				|  |  |  |  |         tty.debug(f"File extension {ext} successfully derived by magic number.") | 
		
	
		
			
				|  |  |  |  |         return ext | 
		
	
		
			
				|  |  |  |  |     return None | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def extension_from_file(file, decompress=False): | 
		
	
		
			
				|  |  |  |  |     """Return extension from archive file path | 
		
	
		
			
				|  |  |  |  |     Extension is derived based on magic number parsing similar | 
		
	
		
			
				|  |  |  |  |     to the `file` utility. Attempts to return abbreviated file extensions | 
		
	
		
			
				|  |  |  |  |     whenever a file has an abbreviated extension such as `.tgz` or `.txz`. | 
		
	
		
			
				|  |  |  |  |     This distinction in abbreivated extension names is accomplished | 
		
	
		
			
				|  |  |  |  |     by string parsing. | 
		
	
		
			
				|  |  |  |  | def _maybe_abbreviate_extension(path: str, extension: str) -> str: | 
		
	
		
			
				|  |  |  |  |     """If the file is a compressed tar archive, return the abbreviated extension t[xz|gz|bz2|bz] | 
		
	
		
			
				|  |  |  |  |     instead of tar.[xz|gz|bz2|bz] if the file's original name also has an abbreviated extension.""" | 
		
	
		
			
				|  |  |  |  |     if not extension.startswith("tar."): | 
		
	
		
			
				|  |  |  |  |         return extension | 
		
	
		
			
				|  |  |  |  |     abbr = f"t{extension[4:]}" | 
		
	
		
			
				|  |  |  |  |     return abbr if llnl.url.has_extension(path, abbr) else extension | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  | def extension_from_magic_numbers(path: str, decompress: bool = False) -> Optional[str]: | 
		
	
		
			
				|  |  |  |  |     """Return typical extension without leading ``.`` of a compressed file or archive at the given | 
		
	
		
			
				|  |  |  |  |     path, based on its magic numbers, similar to the `file` utility. Notice that the extension | 
		
	
		
			
				|  |  |  |  |     returned from this function may not coincide with the file's given extension. | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     Args: | 
		
	
		
			
				|  |  |  |  |         file (os.PathLike): path descibing file on system for which ext | 
		
	
		
			
				|  |  |  |  |             will be determined. | 
		
	
		
			
				|  |  |  |  |         decompress (bool): If True, method will peek into compressed | 
		
	
		
			
				|  |  |  |  |             files to check for archive file types. default is False. | 
		
	
		
			
				|  |  |  |  |             If false, method will be unable to distinguish `.tar.gz` from `.gz` | 
		
	
		
			
				|  |  |  |  |             or similar. | 
		
	
		
			
				|  |  |  |  |     Return: | 
		
	
		
			
				|  |  |  |  |         Spack recognized archive file extension as determined by file's magic number and | 
		
	
		
			
				|  |  |  |  |          file name. If file is not on system or is of an type not recognized by Spack as | 
		
	
		
			
				|  |  |  |  |          an archive or compression type, None is returned. | 
		
	
		
			
				|  |  |  |  |         path: file to determine extension of | 
		
	
		
			
				|  |  |  |  |         decompress: If True, method will peek into decompressed file to check for archive file | 
		
	
		
			
				|  |  |  |  |             types. If False, the method will return only the top-level extension (for example | 
		
	
		
			
				|  |  |  |  |             ``gz`` and not ``tar.gz``). | 
		
	
		
			
				|  |  |  |  |     Returns: | 
		
	
		
			
				|  |  |  |  |         Spack recognized archive file extension as determined by file's magic number and file name. | 
		
	
		
			
				|  |  |  |  |         If file is not on system or is of a type not recognized by Spack as an archive or | 
		
	
		
			
				|  |  |  |  |         compression type, None is returned. If the file is classified as a compressed tarball, the | 
		
	
		
			
				|  |  |  |  |         extension is abbreviated (for instance ``tgz`` not ``tar.gz``) if that matches the file's | 
		
	
		
			
				|  |  |  |  |         given extension. | 
		
	
		
			
				|  |  |  |  |     """ | 
		
	
		
			
				|  |  |  |  |     if os.path.exists(file): | 
		
	
		
			
				|  |  |  |  |         with open(file, "rb") as f: | 
		
	
		
			
				|  |  |  |  |             ext = extension_from_stream(f, decompress) | 
		
	
		
			
				|  |  |  |  |             # based on magic number, file is compressed | 
		
	
		
			
				|  |  |  |  |             # tar archive. Check to see if file is abbreviated as | 
		
	
		
			
				|  |  |  |  |             # t[xz|gz|bz2|bz] | 
		
	
		
			
				|  |  |  |  |             if ext and ext.startswith("tar."): | 
		
	
		
			
				|  |  |  |  |                 suf = ext.split(".")[1] | 
		
	
		
			
				|  |  |  |  |                 abbr = "t" + suf | 
		
	
		
			
				|  |  |  |  |                 if llnl.url.has_extension(file, abbr): | 
		
	
		
			
				|  |  |  |  |                     return abbr | 
		
	
		
			
				|  |  |  |  |             if not ext: | 
		
	
		
			
				|  |  |  |  |                 # If unable to parse extension from stream, | 
		
	
		
			
				|  |  |  |  |                 # attempt to fall back to string parsing | 
		
	
		
			
				|  |  |  |  |                 ext = llnl.url.extension_from_path(file) | 
		
	
		
			
				|  |  |  |  |             return ext | 
		
	
		
			
				|  |  |  |  |     return None | 
		
	
		
			
				|  |  |  |  |     try: | 
		
	
		
			
				|  |  |  |  |         with open(path, "rb") as f: | 
		
	
		
			
				|  |  |  |  |             ext = extension_from_magic_numbers_by_stream(f, decompress) | 
		
	
		
			
				|  |  |  |  |     except OSError as e: | 
		
	
		
			
				|  |  |  |  |         if e.errno == errno.ENOENT: | 
		
	
		
			
				|  |  |  |  |             return None | 
		
	
		
			
				|  |  |  |  |         raise | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # Return the extension derived from the magic number if possible. | 
		
	
		
			
				|  |  |  |  |     if ext: | 
		
	
		
			
				|  |  |  |  |         return _maybe_abbreviate_extension(path, ext) | 
		
	
		
			
				|  |  |  |  | 
 | 
		
	
		
			
				|  |  |  |  |     # Otherwise, use the extension from the file name. | 
		
	
		
			
				|  |  |  |  |     return llnl.url.extension_from_path(path) | 
		
	
	
		
			
				
					
					| 
							
							
							
						 |  |  |   |