diff --git a/lib/spack/spack/cmd/license.py b/lib/spack/spack/cmd/license.py index 9e86ea3c88e..3460a947c1c 100644 --- a/lib/spack/spack/cmd/license.py +++ b/lib/spack/spack/cmd/license.py @@ -5,8 +5,9 @@ import enum import os import re -from collections import defaultdict -from typing import Dict +import shutil +import tempfile +from typing import List, Optional, Tuple import llnl.util.tty as tty @@ -17,7 +18,7 @@ level = "long" #: SPDX license id must appear in the first lines of a file -license_lines = 6 +license_lines = 7 #: Spack's license identifier apache2_mit_spdx = "(Apache-2.0 OR MIT)" @@ -36,6 +37,7 @@ r"^lib/spack/spack_installable/main\.py$", r"^lib/spack/spack/(?!(test/)?util/unparse).*\.py$", r"^lib/spack/spack/.*\.sh$", + r"^lib/spack/spack/.*-test-script$", # for testing r"^lib/spack/spack/.*\.lp$", r"^lib/spack/llnl/.*\.py$", # special case some test data files that have license headers @@ -102,72 +104,77 @@ class ErrorType(enum.Enum): r"SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)", ] +#: lines for `spack license fix` +fixed_lines = [ + "# Copyright Spack Project Developers. See COPYRIGHT file for details.", + "#", + "# SPDX-License-Identifier: (Apache-2.0 OR MIT)", +] + class LicenseError: - error_counts: Dict[ErrorType, int] + errors: List[Tuple[ErrorType, str]] def __init__(self): - self.error_counts = defaultdict(int) + self.errors = [] - def add_error(self, error): - self.error_counts[error] += 1 + def add_error(self, error: ErrorType, path: str) -> None: + self.errors.append((error, path)) - def has_errors(self): - return sum(self.error_counts.values()) > 0 + def has_errors(self) -> bool: + return bool(self.errors) - def error_messages(self): - total = sum(self.error_counts.values()) - missing = self.error_counts[ErrorType.GENERAL_MISMATCH] - lines = self.error_counts[ErrorType.NOT_IN_FIRST_N_LINES] - spdx_mismatch = self.error_counts[ErrorType.SPDX_MISMATCH] - return ( - f"{total} improperly licensed files", + def print_and_die(self) -> None: + spdx_mismatch = missing = first_n_lines = 0 + for err, path in self.errors: + if err == ErrorType.SPDX_MISMATCH: + print(f"{path}: SPDX license identifier mismatch (expected {apache2_mit_spdx})") + spdx_mismatch += 1 + elif err == ErrorType.GENERAL_MISMATCH: + print(f"{path}: license header at top of file does not match expected format") + missing += 1 + elif err == ErrorType.NOT_IN_FIRST_N_LINES: + print(f"{path}: License not found in first {license_lines} lines") + first_n_lines += 1 + + tty.die( + f"{len(self.errors)} improperly licensed files", f"files with wrong SPDX-License-Identifier: {spdx_mismatch}", - f"files without license in first {license_lines} lines: {lines}", + f"files without license in first {license_lines} lines: {first_n_lines}", f"files not containing expected license: {missing}", + "", + "Try running `spack license fix` to fix these files.", ) -def _check_license(lines, path): - def sanitize(line): - return re.sub(r"^[\s#\%\.\:]*", "", line).rstrip() +def _check_license(lines: List[str], path: str) -> Optional[ErrorType]: + sanitized = [re.sub(r"^[\s#\%\.\:]*", "", line).rstrip() for line in lines] - for i, line in enumerate(lines): - if all( - re.match(regex, sanitize(lines[i + j])) for j, regex in enumerate(license_line_regexes) - ): - return + # if start and end of license are not somewhere in the first n lines, say we didn't + # see a license header at all. + if not ( + any(line.startswith("Copyright") for line in sanitized) + and any(line.startswith("SPDX") for line in sanitized) + ): + return ErrorType.NOT_IN_FIRST_N_LINES - if i >= (license_lines - len(license_line_regexes)): - print(f"{path}: License not found in first {license_lines} lines") - return ErrorType.NOT_IN_FIRST_N_LINES + # compare sliding window of sanitized lines with license regexes -- correct case + for i in range(len(sanitized) - len(license_line_regexes) + 1): + if all(re.match(regex, sanitized[i + j]) for j, regex in enumerate(license_line_regexes)): + return None - # If the SPDX identifier is present, then there is a mismatch (since it - # did not match the above regex) - def wrong_spdx_identifier(line, path): + # If the SPDX identifier is present, then report that specifically + for line in lines: m = re.search(r"SPDX-License-Identifier: ([^\n]*)", line) if m and m.group(1) != apache2_mit_spdx: - print( - f"{path}: SPDX license identifier mismatch " - f"(expecting {apache2_mit_spdx}, found {m.group(1)})" - ) return ErrorType.SPDX_MISMATCH - checks = [wrong_spdx_identifier] - - for line in lines: - for check in checks: - error = check(line, path) - if error: - return error - - print(f"{path}: the license header at the top of the file does not match the expected format") + # if there's some other format issue, say the license doesn't look familiar. return ErrorType.GENERAL_MISMATCH -def verify(args): - """verify that files in spack have the right license header""" - +def _find_license_errors(args) -> LicenseError: + """Find all license errors and return a LicenseError object.""" license_errors = LicenseError() for relpath in _licensed_files(args): @@ -177,14 +184,110 @@ def verify(args): error = _check_license(lines, path) if error: - license_errors.add_error(error) + license_errors.add_error(error, path) + return license_errors + + +def verify(args): + """verify that files in spack have the right license header""" + license_errors = _find_license_errors(args) if license_errors.has_errors(): - tty.die(*license_errors.error_messages()) + license_errors.print_and_die() else: tty.msg("No license issues found.") +def _fix_path(path: str) -> List[str]: + """Fix the license of a spack file using some simple heuristics. + + This runs `spack license verify` and fixes the bad files (if it can). + + 1. If there already appears to alrady be a familiar-looking license header, + replace that license header with the canonical one. + 2. If there is no license header in a file, attempt to add one, taking into account + shebangs for scripts. + + Returns: + List of fixed lines, if a fix was possible, otherwise an empty list. + """ + lines = open(path, encoding="utf-8").read().split("\n") + + # only try to fix python files / scripts + if not (path.endswith(".py") or path.endswith(".sh") or (lines and lines[0].startswith("#!"))): + return [] + + # easy case: license looks mostly familiar + start = next((i for i, line in enumerate(lines) if re.match(r"#\s*Copyright", line)), -1) + end = next((i for i, line in enumerate(lines) if re.match(r"#\s*SPDX-", line)), -1) + + # here we just replace a bad license with the fixed one + if start >= 0 and end >= 0: + # filter out weird cases and make sure we mostly know what we're fixing + if ( + end < start + or end - start > 6 + or not all(lines[i].startswith("#") for i in range(start, end)) + ): + return [] + + if start < (license_lines - len(license_line_regexes)): + # replace license where it is + lines[start : end + 1] = fixed_lines + else: + # move license to beginning of file + del lines[start : end + 1] + + start = 0 + while any(lines[start].startswith(s) for s in ("#!", "# -*-")): + start += 1 + + lines[start:start] = fixed_lines + + return lines + + # no license in the file yet, so we add it + if start == -1 and end == -1: + start = 0 + while any(lines[start].startswith(s) for s in ("#!", "# -*-")): + start += 1 + + # add an empty line if needed + if not re.match(r"#\s*$", lines[start]): + lines[start:start] = "#" + start += 1 + + lines[start:start] = fixed_lines + return lines + + return [] + + +def fix(args): + """Fix files without proper licenses.""" + license_errors = _find_license_errors(args) + if not license_errors.has_errors(): + tty.msg("No license issues found.") + return + + returncode = 0 + for error_type, path in license_errors.errors: + lines = _fix_path(path) + if not lines: + print(f"I don't know how to fix {path}") + returncode = 1 + continue + + parent = os.path.dirname(path) + with tempfile.NamedTemporaryFile("w", dir=parent, delete=False) as temp: + temp.write("\n".join(lines)) + shutil.copymode(path, temp.name) + os.rename(temp.name, path) + print(f"Fixed {path}") + + return returncode + + def setup_parser(subparser): subparser.add_argument( "--root", @@ -196,10 +299,11 @@ def setup_parser(subparser): sp = subparser.add_subparsers(metavar="SUBCOMMAND", dest="license_command") sp.add_parser("list-files", help=list_files.__doc__) sp.add_parser("verify", help=verify.__doc__) + sp.add_parser("fix", help=fix.__doc__) def license(parser, args): licensed_files[:] = [re.compile(regex) for regex in licensed_files] - commands = {"list-files": list_files, "verify": verify} + commands = {"list-files": list_files, "verify": verify, "fix": fix} return commands[args.license_command](args) diff --git a/lib/spack/spack/test/cmd/license.py b/lib/spack/spack/test/cmd/license.py index f516fcc53e9..e9036e118c1 100644 --- a/lib/spack/spack/test/cmd/license.py +++ b/lib/spack/spack/test/cmd/license.py @@ -4,10 +4,11 @@ import os import re +import textwrap import pytest -from llnl.util.filesystem import mkdirp, touch +from llnl.util.filesystem import mkdirp import spack.paths from spack.main import SpackCommand @@ -24,57 +25,172 @@ def test_list_files(): assert os.path.abspath(__file__) in files -def test_verify(tmpdir): - source_dir = tmpdir.join("lib", "spack", "spack") - mkdirp(str(source_dir)) - - no_header = source_dir.join("no_header.py") - touch(str(no_header)) - - lgpl_header = source_dir.join("lgpl_header.py") - with lgpl_header.open("w") as f: - f.write( - """\ -# Copyright Spack Project Developers. See COPYRIGHT file for details. -# -# SPDX-License-Identifier: LGPL-2.1-only -""" - ) - - not_in_first_n_lines = source_dir.join("not_in_first_n_lines.py") - with not_in_first_n_lines.open("w") as f: - f.write( - """\ -# -# -# -# -# Copyright Spack Project Developers. See COPYRIGHT file for details. -# -# SPDX-License-Identifier: (Apache-2.0 OR MIT) - -""" - ) - - correct_header = source_dir.join("correct_header.py") - with correct_header.open("w") as f: - f.write( - """\ +GOOD_HEADER = """\ # Copyright Spack Project Developers. See COPYRIGHT file for details. # # SPDX-License-Identifier: (Apache-2.0 OR MIT) """ - ) - out = license("--root", str(tmpdir), "verify", fail_on_error=False) - assert str(no_header) in out - assert str(lgpl_header) in out - assert str(not_in_first_n_lines) in out - assert str(correct_header) not in out - assert "3 improperly licensed files" in out - assert re.search(r"files not containing expected license:\s*1", out) - assert re.search(r"files with wrong SPDX-License-Identifier:\s*1", out) - assert re.search(r"files without license in first 6 lines:\s*1", out) +parameters = [ + ( + "wrong_spdx.py", + r"files with wrong SPDX-License-Identifier:\s*1", + textwrap.dedent( + """\ + # Copyright Spack Project Developers. See COPYRIGHT file for details. + # + # SPDX-License-Identifier: LGPL-2.1-only + """ + ), + GOOD_HEADER, + False, + ), + ( + "empty_lines.py", + r"files without license in first 7 lines:\s*1", + textwrap.dedent( + """\ + # + # + # + # + # + # Copyright Spack Project Developers. See COPYRIGHT file for details. + # + # SPDX-License-Identifier: (Apache-2.0 OR MIT) + """ + ), + GOOD_HEADER, + False, + ), + ( + "wrong_devs.py", + r"files not containing expected license:\s*1", + textwrap.dedent( + """\ + # Copyright Not The Right Developers. See BROKEN file for details. + # + # SPDX-License-Identifier: (Apache-2.0 OR MIT) + """ + ), + GOOD_HEADER, + False, + ), + ( + "old_llnl.py", + r"files not containing expected license:\s*1", + textwrap.dedent( + """\ + # Copyright 2013-2024 Lawrence Livermore National Security, LLC and other + # Spack Project Developers. See top-level COPYRIGHT file for details. + # + # SPDX-License-Identifier: (Apache-2.0 OR MIT) + """ + ), + GOOD_HEADER, + False, + ), + ("no_header.py", r"files without license in first 7 lines:\s*1", "", GOOD_HEADER, False), + ( + "test-script", + "", + "#!/usr/bin/env python3\n#\n" + GOOD_HEADER, + "#!/usr/bin/env python3\n#\n" + GOOD_HEADER, + True, + ), + ( + "python-lang-test-script", + "", + "#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER, + "#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER, + True, + ), + ("unfixable-test-script", "", "", "", False), # because script + no shebang + ( + "bad-test-script", + r"files not containing expected license:\s*1", + textwrap.dedent( + """\ + #!/usr/bin/env python3 + # + # Copyright 2013-2024 Lawrence Livermore National Security, LLC and other + # Spack Project Developers. See top-level COPYRIGHT file for details. + # + # SPDX-License-Identifier: (Apache-2.0 OR MIT) + """ + ), + "#!/usr/bin/env python3\n#\n" + GOOD_HEADER, + False, + ), + ( + "bad-python-lang-test-script", + r"files not containing expected license:\s*1", + textwrap.dedent( + """\ + #!/usr/bin/env python3 + # -*- python -*- + # + # Copyright 2013-2024 Lawrence Livermore National Security, LLC and other + # Spack Project Developers. See top-level COPYRIGHT file for details. + # + # SPDX-License-Identifier: (Apache-2.0 OR MIT) + """ + ), + "#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER, + False, + ), + ("good.py", "", GOOD_HEADER, GOOD_HEADER, True), +] - assert license.returncode == 1 + +@pytest.mark.parametrize( + "filename,expected_txt,header,fixed_header,good", + parameters, + ids=[param[0] for param in parameters], +) +class TestLicenses: + + def _setup_license_root(self, tmpdir, header, filename): + source_dir = tmpdir / "lib" / "spack" / "spack" + mkdirp(str(source_dir)) + + source_file = source_dir / filename + with source_file.open("w") as f: + f.write(header) + + return source_file + + def test_license_verify(self, filename, expected_txt, header, fixed_header, good, tmpdir): + source_file = self._setup_license_root(tmpdir, header, filename) + + out = license("--root", str(tmpdir), "verify", fail_on_error=False) + + if not good: + assert str(source_file) in out + assert "1 improperly licensed file" in out + assert re.search(expected_txt, out) + assert license.returncode == 1 + else: + assert license.returncode == 0 + + def test_license_fix(self, filename, expected_txt, header, fixed_header, good, tmpdir): + source_file = self._setup_license_root(tmpdir, header, filename) + + out = license("--root", str(tmpdir), "fix", fail_on_error=False) + + if good: + assert str(source_file) not in out + assert license.returncode == 0 + return + + if fixed_header: + assert f"Fixed {str(source_file)}" in out + assert license.returncode == 0 + + license("--root", str(tmpdir), "verify", fail_on_error=False) + assert license.returncode == 0 + + else: + assert f"I don't know how to fix {str(source_file)}" in out + assert license.returncode == 1 diff --git a/share/spack/spack-completion.bash b/share/spack/spack-completion.bash index f2a3c7be1dc..ef2bd511b4e 100644 --- a/share/spack/spack-completion.bash +++ b/share/spack/spack-completion.bash @@ -1346,7 +1346,7 @@ _spack_license() { then SPACK_COMPREPLY="-h --help --root" else - SPACK_COMPREPLY="list-files verify" + SPACK_COMPREPLY="list-files verify fix" fi } @@ -1358,6 +1358,10 @@ _spack_license_verify() { SPACK_COMPREPLY="-h --help" } +_spack_license_fix() { + SPACK_COMPREPLY="-h --help" +} + _spack_list() { if $list_options then diff --git a/share/spack/spack-completion.fish b/share/spack/spack-completion.fish index b4d4f7932b7..159389730e8 100644 --- a/share/spack/spack-completion.fish +++ b/share/spack/spack-completion.fish @@ -2105,6 +2105,7 @@ complete -c spack -n '__fish_spack_using_command install' -l deprecated -d 'allo set -g __fish_spack_optspecs_spack_license h/help root= complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a list-files -d 'list files in spack that should have license headers' complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a verify -d 'verify that files in spack have the right license header' +complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a fix -d 'Fix files without proper licenses.' complete -c spack -n '__fish_spack_using_command license' -s h -l help -f -a help complete -c spack -n '__fish_spack_using_command license' -s h -l help -d 'show this help message and exit' complete -c spack -n '__fish_spack_using_command license' -l root -r -f -a root @@ -2120,6 +2121,11 @@ set -g __fish_spack_optspecs_spack_license_verify h/help complete -c spack -n '__fish_spack_using_command license verify' -s h -l help -f -a help complete -c spack -n '__fish_spack_using_command license verify' -s h -l help -d 'show this help message and exit' +# spack license fix +set -g __fish_spack_optspecs_spack_license_fix h/help +complete -c spack -n '__fish_spack_using_command license fix' -s h -l help -f -a help +complete -c spack -n '__fish_spack_using_command license fix' -s h -l help -d 'show this help message and exit' + # spack list set -g __fish_spack_optspecs_spack_list h/help r/repo= d/search-description format= v/virtuals t/tag= count update= complete -c spack -n '__fish_spack_using_command_pos_remainder 0 list' -f -a '(__fish_spack_packages)'