Compare commits

...

2 Commits

Author SHA1 Message Date
Todd Gamblin
9ee32b060b
WIP 2025-03-26 01:16:32 -07:00
Todd Gamblin
37c63aa22d
spack license fix command
Add a `spack license fix` command that automatically fixes license headers.

This will:
- [x] convert old headers (i.e., ones from before #48352) to new ones like #48352
- [x] add headers to python files and shell scripts
- [x] handle shebangs, emacs `-*- language -*-` markers, etc.
- [x] address some common problems, e.g. if the header isn't in the first 7 lines

This should make it easier for maintainers to work with contributors to update
their license headers (tell them to run `spack license fix`).

Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>
2025-03-26 00:59:04 -07:00
4 changed files with 358 additions and 99 deletions

View File

@ -5,8 +5,9 @@
import enum
import os
import re
from collections import defaultdict
from typing import Dict
import shutil
import tempfile
from typing import List, Optional, Tuple
import llnl.util.tty as tty
@ -17,7 +18,7 @@
level = "long"
#: SPDX license id must appear in the first <license_lines> lines of a file
license_lines = 6
license_lines = 7
#: Spack's license identifier
apache2_mit_spdx = "(Apache-2.0 OR MIT)"
@ -36,6 +37,7 @@
r"^lib/spack/spack_installable/main\.py$",
r"^lib/spack/spack/(?!(test/)?util/unparse).*\.py$",
r"^lib/spack/spack/.*\.sh$",
r"^lib/spack/spack/.*-test-script$", # for testing
r"^lib/spack/spack/.*\.lp$",
r"^lib/spack/llnl/.*\.py$",
# special case some test data files that have license headers
@ -102,72 +104,77 @@ class ErrorType(enum.Enum):
r"SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)",
]
#: lines for `spack license fix`
fixed_lines = [
"# Copyright Spack Project Developers. See COPYRIGHT file for details.",
"#",
"# SPDX-License-Identifier: (Apache-2.0 OR MIT)",
]
class LicenseError:
error_counts: Dict[ErrorType, int]
errors: List[Tuple[ErrorType, str]]
def __init__(self):
self.error_counts = defaultdict(int)
self.errors = []
def add_error(self, error):
self.error_counts[error] += 1
def add_error(self, error: ErrorType, path: str) -> None:
self.errors.append((error, path))
def has_errors(self):
return sum(self.error_counts.values()) > 0
def has_errors(self) -> bool:
return bool(self.errors)
def error_messages(self):
total = sum(self.error_counts.values())
missing = self.error_counts[ErrorType.GENERAL_MISMATCH]
lines = self.error_counts[ErrorType.NOT_IN_FIRST_N_LINES]
spdx_mismatch = self.error_counts[ErrorType.SPDX_MISMATCH]
return (
f"{total} improperly licensed files",
def print_and_die(self) -> None:
spdx_mismatch = missing = first_n_lines = 0
for err, path in self.errors:
if err == ErrorType.SPDX_MISMATCH:
print(f"{path}: SPDX license identifier mismatch (expected {apache2_mit_spdx})")
spdx_mismatch += 1
elif err == ErrorType.GENERAL_MISMATCH:
print(f"{path}: license header at top of file does not match expected format")
missing += 1
elif err == ErrorType.NOT_IN_FIRST_N_LINES:
print(f"{path}: License not found in first {license_lines} lines")
first_n_lines += 1
tty.die(
f"{len(self.errors)} improperly licensed files",
f"files with wrong SPDX-License-Identifier: {spdx_mismatch}",
f"files without license in first {license_lines} lines: {lines}",
f"files without license in first {license_lines} lines: {first_n_lines}",
f"files not containing expected license: {missing}",
"",
"Try running `spack license fix` to fix these files.",
)
def _check_license(lines, path):
def sanitize(line):
return re.sub(r"^[\s#\%\.\:]*", "", line).rstrip()
def _check_license(lines: List[str], path: str) -> Optional[ErrorType]:
sanitized = [re.sub(r"^[\s#\%\.\:]*", "", line).rstrip() for line in lines]
for i, line in enumerate(lines):
if all(
re.match(regex, sanitize(lines[i + j])) for j, regex in enumerate(license_line_regexes)
):
return
# if start and end of license are not somewhere in the first n lines, say we didn't
# see a license header at all.
if not (
any(line.startswith("Copyright") for line in sanitized)
and any(line.startswith("SPDX") for line in sanitized)
):
return ErrorType.NOT_IN_FIRST_N_LINES
if i >= (license_lines - len(license_line_regexes)):
print(f"{path}: License not found in first {license_lines} lines")
return ErrorType.NOT_IN_FIRST_N_LINES
# compare sliding window of sanitized lines with license regexes -- correct case
for i in range(len(sanitized) - len(license_line_regexes) + 1):
if all(re.match(regex, sanitized[i + j]) for j, regex in enumerate(license_line_regexes)):
return None
# If the SPDX identifier is present, then there is a mismatch (since it
# did not match the above regex)
def wrong_spdx_identifier(line, path):
# If the SPDX identifier is present, then report that specifically
for line in lines:
m = re.search(r"SPDX-License-Identifier: ([^\n]*)", line)
if m and m.group(1) != apache2_mit_spdx:
print(
f"{path}: SPDX license identifier mismatch "
f"(expecting {apache2_mit_spdx}, found {m.group(1)})"
)
return ErrorType.SPDX_MISMATCH
checks = [wrong_spdx_identifier]
for line in lines:
for check in checks:
error = check(line, path)
if error:
return error
print(f"{path}: the license header at the top of the file does not match the expected format")
# if there's some other format issue, say the license doesn't look familiar.
return ErrorType.GENERAL_MISMATCH
def verify(args):
"""verify that files in spack have the right license header"""
def _find_license_errors(args) -> LicenseError:
"""Find all license errors and return a LicenseError object."""
license_errors = LicenseError()
for relpath in _licensed_files(args):
@ -177,14 +184,123 @@ def verify(args):
error = _check_license(lines, path)
if error:
license_errors.add_error(error)
license_errors.add_error(error, path)
return license_errors
def verify(args):
"""verify that files in spack have the right license header"""
license_errors = _find_license_errors(args)
if license_errors.has_errors():
tty.die(*license_errors.error_messages())
license_errors.print_and_die()
else:
tty.msg("No license issues found.")
def _fix_path(path: str) -> List[str]:
"""Fix the license of a spack file using some simple heuristics.
This runs `spack license verify` and fixes the bad files (if it can).
1. If there already appears to alrady be a familiar-looking license header,
replace that license header with the canonical one.
2. If there is no license header in a file, attempt to add one, taking into account
shebangs for scripts.
Returns:
List of fixed lines, if a fix was possible, otherwise an empty list.
"""
comment, fixed = "#", fixed_lines
if path.endswith(".lp"):
fixed = [line.replace(comment, "%") for line in fixed_lines]
comment = "%"
try:
if path.endswith(".py") or path.endswith(".sh"):
with open(path, encoding="utf-8") as f:
lines = f.read().split("\n")
else:
with open(path, encoding="utf-8") as f:
if f.read(2) != "#!":
return []
f.seek(0)
lines = f.read().split("\n")
except UnicodeDecodeError:
return []
# easy case: license looks mostly familiar
start = next(
(i for i, line in enumerate(lines) if re.match(rf"{comment}\s*Copyright", line)), -1
)
end = next((i for i, line in enumerate(lines) if re.match(rf"{comment}\s*SPDX-", line)), -1)
# here we just replace a bad license with the fixed one
if start >= 0 and end >= 0:
# filter out weird cases and make sure we mostly know what we're fixing
if (
end < start
or end - start > 6
or not all(lines[i].startswith(comment) for i in range(start, end))
):
return []
if start < (license_lines - len(license_line_regexes)):
# replace license where it is
lines[start : end + 1] = fixed
else:
# move license to beginning of file
del lines[start : end + 1]
start = 0
while any(lines[start].startswith(s) for s in ("#!", f"{comment} -*-")):
start += 1
lines[start:start] = fixed
return lines
# no license in the file yet, so we add it
if start == -1 and end == -1:
start = 0
while any(lines[start].startswith(s) for s in ("#!", f"{comment} -*-")):
start += 1
# add an empty line if needed
if not re.match(rf"{comment}\s*$", lines[start]):
lines[start:start] = comment
start += 1
lines[start:start] = fixed_lines
return lines
return []
def fix(args):
"""Fix files without proper licenses."""
license_errors = _find_license_errors(args)
if not license_errors.has_errors():
tty.msg("No license issues found.")
return
returncode = 0
for error_type, path in license_errors.errors:
lines = _fix_path(path)
if not lines:
print(f"I don't know how to fix {path}")
returncode = 1
continue
parent = os.path.dirname(path)
with tempfile.NamedTemporaryFile("w", dir=parent, delete=False) as temp:
temp.write("\n".join(lines))
shutil.copymode(path, temp.name)
os.rename(temp.name, path)
print(f"Fixed {path}")
return returncode
def setup_parser(subparser):
subparser.add_argument(
"--root",
@ -196,10 +312,11 @@ def setup_parser(subparser):
sp = subparser.add_subparsers(metavar="SUBCOMMAND", dest="license_command")
sp.add_parser("list-files", help=list_files.__doc__)
sp.add_parser("verify", help=verify.__doc__)
sp.add_parser("fix", help=fix.__doc__)
def license(parser, args):
licensed_files[:] = [re.compile(regex) for regex in licensed_files]
commands = {"list-files": list_files, "verify": verify}
commands = {"list-files": list_files, "verify": verify, "fix": fix}
return commands[args.license_command](args)

View File

@ -4,10 +4,11 @@
import os
import re
import textwrap
import pytest
from llnl.util.filesystem import mkdirp, touch
from llnl.util.filesystem import mkdirp
import spack.paths
from spack.main import SpackCommand
@ -24,57 +25,188 @@ def test_list_files():
assert os.path.abspath(__file__) in files
def test_verify(tmpdir):
source_dir = tmpdir.join("lib", "spack", "spack")
mkdirp(str(source_dir))
no_header = source_dir.join("no_header.py")
touch(str(no_header))
lgpl_header = source_dir.join("lgpl_header.py")
with lgpl_header.open("w") as f:
f.write(
"""\
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: LGPL-2.1-only
"""
)
not_in_first_n_lines = source_dir.join("not_in_first_n_lines.py")
with not_in_first_n_lines.open("w") as f:
f.write(
"""\
#
#
#
#
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
)
correct_header = source_dir.join("correct_header.py")
with correct_header.open("w") as f:
f.write(
"""\
GOOD_HEADER = """\
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
)
out = license("--root", str(tmpdir), "verify", fail_on_error=False)
assert str(no_header) in out
assert str(lgpl_header) in out
assert str(not_in_first_n_lines) in out
assert str(correct_header) not in out
assert "3 improperly licensed files" in out
assert re.search(r"files not containing expected license:\s*1", out)
assert re.search(r"files with wrong SPDX-License-Identifier:\s*1", out)
assert re.search(r"files without license in first 6 lines:\s*1", out)
parameters = [
(
"wrong_spdx.py",
r"files with wrong SPDX-License-Identifier:\s*1",
textwrap.dedent(
"""\
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: LGPL-2.1-only
"""
),
GOOD_HEADER,
False,
),
(
"empty_lines.py",
r"files without license in first 7 lines:\s*1",
textwrap.dedent(
"""\
#
#
#
#
#
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
GOOD_HEADER,
False,
),
(
"wrong_devs.py",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
# Copyright Not The Right Developers. See BROKEN file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
GOOD_HEADER,
False,
),
(
"old_llnl.py",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
GOOD_HEADER,
False,
),
(
"old_llnl.lp",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
% Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
% Spack Project Developers. See top-level COPYRIGHT file for details.
%
% SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
GOOD_HEADER.replace("#", "%"),
False,
),
("no_header.py", r"files without license in first 7 lines:\s*1", "", GOOD_HEADER, False),
(
"test-script",
"",
"#!/usr/bin/env python3\n#\n" + GOOD_HEADER,
"#!/usr/bin/env python3\n#\n" + GOOD_HEADER,
True,
),
(
"python-lang-test-script",
"",
"#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER,
"#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER,
True,
),
("unfixable-test-script", "", "", "", False), # because script + no shebang
(
"bad-test-script",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
#!/usr/bin/env python3
#
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
"#!/usr/bin/env python3\n#\n" + GOOD_HEADER,
False,
),
(
"bad-python-lang-test-script",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
#!/usr/bin/env python3
# -*- python -*-
#
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
"#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER,
False,
),
("good.py", "", GOOD_HEADER, GOOD_HEADER, True),
("good.lp", "", GOOD_HEADER.replace("#", "%"), GOOD_HEADER.replace("#", "%"), True),
]
assert license.returncode == 1
@pytest.mark.parametrize(
"filename,expected_txt,header,fixed_header,good",
parameters,
ids=[param[0] for param in parameters],
)
class TestLicenses:
def _setup_license_root(self, tmpdir, header, filename):
source_dir = tmpdir / "lib" / "spack" / "spack"
mkdirp(str(source_dir))
source_file = source_dir / filename
with source_file.open("w") as f:
f.write(header)
return source_file
def test_license_verify(self, filename, expected_txt, header, fixed_header, good, tmpdir):
source_file = self._setup_license_root(tmpdir, header, filename)
out = license("--root", str(tmpdir), "verify", fail_on_error=False)
if not good:
assert str(source_file) in out
assert "1 improperly licensed file" in out
assert re.search(expected_txt, out)
assert license.returncode == 1
else:
assert license.returncode == 0
def test_license_fix(self, filename, expected_txt, header, fixed_header, good, tmpdir):
source_file = self._setup_license_root(tmpdir, header, filename)
out = license("--root", str(tmpdir), "fix", fail_on_error=False)
if good:
assert str(source_file) not in out
assert license.returncode == 0
return
if fixed_header:
print("OUT", out)
assert f"Fixed {str(source_file)}" in out
assert license.returncode == 0
license("--root", str(tmpdir), "verify", fail_on_error=False)
assert license.returncode == 0
else:
assert f"I don't know how to fix {str(source_file)}" in out
assert license.returncode == 1

View File

@ -1346,7 +1346,7 @@ _spack_license() {
then
SPACK_COMPREPLY="-h --help --root"
else
SPACK_COMPREPLY="list-files verify"
SPACK_COMPREPLY="list-files verify fix"
fi
}
@ -1358,6 +1358,10 @@ _spack_license_verify() {
SPACK_COMPREPLY="-h --help"
}
_spack_license_fix() {
SPACK_COMPREPLY="-h --help"
}
_spack_list() {
if $list_options
then

View File

@ -2105,6 +2105,7 @@ complete -c spack -n '__fish_spack_using_command install' -l deprecated -d 'allo
set -g __fish_spack_optspecs_spack_license h/help root=
complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a list-files -d 'list files in spack that should have license headers'
complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a verify -d 'verify that files in spack have the right license header'
complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a fix -d 'Fix files without proper licenses.'
complete -c spack -n '__fish_spack_using_command license' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command license' -s h -l help -d 'show this help message and exit'
complete -c spack -n '__fish_spack_using_command license' -l root -r -f -a root
@ -2120,6 +2121,11 @@ set -g __fish_spack_optspecs_spack_license_verify h/help
complete -c spack -n '__fish_spack_using_command license verify' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command license verify' -s h -l help -d 'show this help message and exit'
# spack license fix
set -g __fish_spack_optspecs_spack_license_fix h/help
complete -c spack -n '__fish_spack_using_command license fix' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command license fix' -s h -l help -d 'show this help message and exit'
# spack list
set -g __fish_spack_optspecs_spack_list h/help r/repo= d/search-description format= v/virtuals t/tag= count update=
complete -c spack -n '__fish_spack_using_command_pos_remainder 0 list' -f -a '(__fish_spack_packages)'