Compare commits

...

2 Commits

Author SHA1 Message Date
Todd Gamblin
9ee32b060b
WIP 2025-03-26 01:16:32 -07:00
Todd Gamblin
37c63aa22d
spack license fix command
Add a `spack license fix` command that automatically fixes license headers.

This will:
- [x] convert old headers (i.e., ones from before #48352) to new ones like #48352
- [x] add headers to python files and shell scripts
- [x] handle shebangs, emacs `-*- language -*-` markers, etc.
- [x] address some common problems, e.g. if the header isn't in the first 7 lines

This should make it easier for maintainers to work with contributors to update
their license headers (tell them to run `spack license fix`).

Signed-off-by: Todd Gamblin <tgamblin@llnl.gov>
2025-03-26 00:59:04 -07:00
4 changed files with 358 additions and 99 deletions

View File

@ -5,8 +5,9 @@
import enum import enum
import os import os
import re import re
from collections import defaultdict import shutil
from typing import Dict import tempfile
from typing import List, Optional, Tuple
import llnl.util.tty as tty import llnl.util.tty as tty
@ -17,7 +18,7 @@
level = "long" level = "long"
#: SPDX license id must appear in the first <license_lines> lines of a file #: SPDX license id must appear in the first <license_lines> lines of a file
license_lines = 6 license_lines = 7
#: Spack's license identifier #: Spack's license identifier
apache2_mit_spdx = "(Apache-2.0 OR MIT)" apache2_mit_spdx = "(Apache-2.0 OR MIT)"
@ -36,6 +37,7 @@
r"^lib/spack/spack_installable/main\.py$", r"^lib/spack/spack_installable/main\.py$",
r"^lib/spack/spack/(?!(test/)?util/unparse).*\.py$", r"^lib/spack/spack/(?!(test/)?util/unparse).*\.py$",
r"^lib/spack/spack/.*\.sh$", r"^lib/spack/spack/.*\.sh$",
r"^lib/spack/spack/.*-test-script$", # for testing
r"^lib/spack/spack/.*\.lp$", r"^lib/spack/spack/.*\.lp$",
r"^lib/spack/llnl/.*\.py$", r"^lib/spack/llnl/.*\.py$",
# special case some test data files that have license headers # special case some test data files that have license headers
@ -102,72 +104,77 @@ class ErrorType(enum.Enum):
r"SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)", r"SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)",
] ]
#: lines for `spack license fix`
fixed_lines = [
"# Copyright Spack Project Developers. See COPYRIGHT file for details.",
"#",
"# SPDX-License-Identifier: (Apache-2.0 OR MIT)",
]
class LicenseError: class LicenseError:
error_counts: Dict[ErrorType, int] errors: List[Tuple[ErrorType, str]]
def __init__(self): def __init__(self):
self.error_counts = defaultdict(int) self.errors = []
def add_error(self, error): def add_error(self, error: ErrorType, path: str) -> None:
self.error_counts[error] += 1 self.errors.append((error, path))
def has_errors(self): def has_errors(self) -> bool:
return sum(self.error_counts.values()) > 0 return bool(self.errors)
def error_messages(self): def print_and_die(self) -> None:
total = sum(self.error_counts.values()) spdx_mismatch = missing = first_n_lines = 0
missing = self.error_counts[ErrorType.GENERAL_MISMATCH] for err, path in self.errors:
lines = self.error_counts[ErrorType.NOT_IN_FIRST_N_LINES] if err == ErrorType.SPDX_MISMATCH:
spdx_mismatch = self.error_counts[ErrorType.SPDX_MISMATCH] print(f"{path}: SPDX license identifier mismatch (expected {apache2_mit_spdx})")
return ( spdx_mismatch += 1
f"{total} improperly licensed files", elif err == ErrorType.GENERAL_MISMATCH:
print(f"{path}: license header at top of file does not match expected format")
missing += 1
elif err == ErrorType.NOT_IN_FIRST_N_LINES:
print(f"{path}: License not found in first {license_lines} lines")
first_n_lines += 1
tty.die(
f"{len(self.errors)} improperly licensed files",
f"files with wrong SPDX-License-Identifier: {spdx_mismatch}", f"files with wrong SPDX-License-Identifier: {spdx_mismatch}",
f"files without license in first {license_lines} lines: {lines}", f"files without license in first {license_lines} lines: {first_n_lines}",
f"files not containing expected license: {missing}", f"files not containing expected license: {missing}",
"",
"Try running `spack license fix` to fix these files.",
) )
def _check_license(lines, path): def _check_license(lines: List[str], path: str) -> Optional[ErrorType]:
def sanitize(line): sanitized = [re.sub(r"^[\s#\%\.\:]*", "", line).rstrip() for line in lines]
return re.sub(r"^[\s#\%\.\:]*", "", line).rstrip()
for i, line in enumerate(lines): # if start and end of license are not somewhere in the first n lines, say we didn't
if all( # see a license header at all.
re.match(regex, sanitize(lines[i + j])) for j, regex in enumerate(license_line_regexes) if not (
any(line.startswith("Copyright") for line in sanitized)
and any(line.startswith("SPDX") for line in sanitized)
): ):
return
if i >= (license_lines - len(license_line_regexes)):
print(f"{path}: License not found in first {license_lines} lines")
return ErrorType.NOT_IN_FIRST_N_LINES return ErrorType.NOT_IN_FIRST_N_LINES
# If the SPDX identifier is present, then there is a mismatch (since it # compare sliding window of sanitized lines with license regexes -- correct case
# did not match the above regex) for i in range(len(sanitized) - len(license_line_regexes) + 1):
def wrong_spdx_identifier(line, path): if all(re.match(regex, sanitized[i + j]) for j, regex in enumerate(license_line_regexes)):
return None
# If the SPDX identifier is present, then report that specifically
for line in lines:
m = re.search(r"SPDX-License-Identifier: ([^\n]*)", line) m = re.search(r"SPDX-License-Identifier: ([^\n]*)", line)
if m and m.group(1) != apache2_mit_spdx: if m and m.group(1) != apache2_mit_spdx:
print(
f"{path}: SPDX license identifier mismatch "
f"(expecting {apache2_mit_spdx}, found {m.group(1)})"
)
return ErrorType.SPDX_MISMATCH return ErrorType.SPDX_MISMATCH
checks = [wrong_spdx_identifier] # if there's some other format issue, say the license doesn't look familiar.
for line in lines:
for check in checks:
error = check(line, path)
if error:
return error
print(f"{path}: the license header at the top of the file does not match the expected format")
return ErrorType.GENERAL_MISMATCH return ErrorType.GENERAL_MISMATCH
def verify(args): def _find_license_errors(args) -> LicenseError:
"""verify that files in spack have the right license header""" """Find all license errors and return a LicenseError object."""
license_errors = LicenseError() license_errors = LicenseError()
for relpath in _licensed_files(args): for relpath in _licensed_files(args):
@ -177,14 +184,123 @@ def verify(args):
error = _check_license(lines, path) error = _check_license(lines, path)
if error: if error:
license_errors.add_error(error) license_errors.add_error(error, path)
return license_errors
def verify(args):
"""verify that files in spack have the right license header"""
license_errors = _find_license_errors(args)
if license_errors.has_errors(): if license_errors.has_errors():
tty.die(*license_errors.error_messages()) license_errors.print_and_die()
else: else:
tty.msg("No license issues found.") tty.msg("No license issues found.")
def _fix_path(path: str) -> List[str]:
"""Fix the license of a spack file using some simple heuristics.
This runs `spack license verify` and fixes the bad files (if it can).
1. If there already appears to alrady be a familiar-looking license header,
replace that license header with the canonical one.
2. If there is no license header in a file, attempt to add one, taking into account
shebangs for scripts.
Returns:
List of fixed lines, if a fix was possible, otherwise an empty list.
"""
comment, fixed = "#", fixed_lines
if path.endswith(".lp"):
fixed = [line.replace(comment, "%") for line in fixed_lines]
comment = "%"
try:
if path.endswith(".py") or path.endswith(".sh"):
with open(path, encoding="utf-8") as f:
lines = f.read().split("\n")
else:
with open(path, encoding="utf-8") as f:
if f.read(2) != "#!":
return []
f.seek(0)
lines = f.read().split("\n")
except UnicodeDecodeError:
return []
# easy case: license looks mostly familiar
start = next(
(i for i, line in enumerate(lines) if re.match(rf"{comment}\s*Copyright", line)), -1
)
end = next((i for i, line in enumerate(lines) if re.match(rf"{comment}\s*SPDX-", line)), -1)
# here we just replace a bad license with the fixed one
if start >= 0 and end >= 0:
# filter out weird cases and make sure we mostly know what we're fixing
if (
end < start
or end - start > 6
or not all(lines[i].startswith(comment) for i in range(start, end))
):
return []
if start < (license_lines - len(license_line_regexes)):
# replace license where it is
lines[start : end + 1] = fixed
else:
# move license to beginning of file
del lines[start : end + 1]
start = 0
while any(lines[start].startswith(s) for s in ("#!", f"{comment} -*-")):
start += 1
lines[start:start] = fixed
return lines
# no license in the file yet, so we add it
if start == -1 and end == -1:
start = 0
while any(lines[start].startswith(s) for s in ("#!", f"{comment} -*-")):
start += 1
# add an empty line if needed
if not re.match(rf"{comment}\s*$", lines[start]):
lines[start:start] = comment
start += 1
lines[start:start] = fixed_lines
return lines
return []
def fix(args):
"""Fix files without proper licenses."""
license_errors = _find_license_errors(args)
if not license_errors.has_errors():
tty.msg("No license issues found.")
return
returncode = 0
for error_type, path in license_errors.errors:
lines = _fix_path(path)
if not lines:
print(f"I don't know how to fix {path}")
returncode = 1
continue
parent = os.path.dirname(path)
with tempfile.NamedTemporaryFile("w", dir=parent, delete=False) as temp:
temp.write("\n".join(lines))
shutil.copymode(path, temp.name)
os.rename(temp.name, path)
print(f"Fixed {path}")
return returncode
def setup_parser(subparser): def setup_parser(subparser):
subparser.add_argument( subparser.add_argument(
"--root", "--root",
@ -196,10 +312,11 @@ def setup_parser(subparser):
sp = subparser.add_subparsers(metavar="SUBCOMMAND", dest="license_command") sp = subparser.add_subparsers(metavar="SUBCOMMAND", dest="license_command")
sp.add_parser("list-files", help=list_files.__doc__) sp.add_parser("list-files", help=list_files.__doc__)
sp.add_parser("verify", help=verify.__doc__) sp.add_parser("verify", help=verify.__doc__)
sp.add_parser("fix", help=fix.__doc__)
def license(parser, args): def license(parser, args):
licensed_files[:] = [re.compile(regex) for regex in licensed_files] licensed_files[:] = [re.compile(regex) for regex in licensed_files]
commands = {"list-files": list_files, "verify": verify} commands = {"list-files": list_files, "verify": verify, "fix": fix}
return commands[args.license_command](args) return commands[args.license_command](args)

View File

@ -4,10 +4,11 @@
import os import os
import re import re
import textwrap
import pytest import pytest
from llnl.util.filesystem import mkdirp, touch from llnl.util.filesystem import mkdirp
import spack.paths import spack.paths
from spack.main import SpackCommand from spack.main import SpackCommand
@ -24,57 +25,188 @@ def test_list_files():
assert os.path.abspath(__file__) in files assert os.path.abspath(__file__) in files
def test_verify(tmpdir): GOOD_HEADER = """\
source_dir = tmpdir.join("lib", "spack", "spack") # Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
parameters = [
(
"wrong_spdx.py",
r"files with wrong SPDX-License-Identifier:\s*1",
textwrap.dedent(
"""\
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: LGPL-2.1-only
"""
),
GOOD_HEADER,
False,
),
(
"empty_lines.py",
r"files without license in first 7 lines:\s*1",
textwrap.dedent(
"""\
#
#
#
#
#
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
GOOD_HEADER,
False,
),
(
"wrong_devs.py",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
# Copyright Not The Right Developers. See BROKEN file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
GOOD_HEADER,
False,
),
(
"old_llnl.py",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
GOOD_HEADER,
False,
),
(
"old_llnl.lp",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
% Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
% Spack Project Developers. See top-level COPYRIGHT file for details.
%
% SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
GOOD_HEADER.replace("#", "%"),
False,
),
("no_header.py", r"files without license in first 7 lines:\s*1", "", GOOD_HEADER, False),
(
"test-script",
"",
"#!/usr/bin/env python3\n#\n" + GOOD_HEADER,
"#!/usr/bin/env python3\n#\n" + GOOD_HEADER,
True,
),
(
"python-lang-test-script",
"",
"#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER,
"#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER,
True,
),
("unfixable-test-script", "", "", "", False), # because script + no shebang
(
"bad-test-script",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
#!/usr/bin/env python3
#
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
"#!/usr/bin/env python3\n#\n" + GOOD_HEADER,
False,
),
(
"bad-python-lang-test-script",
r"files not containing expected license:\s*1",
textwrap.dedent(
"""\
#!/usr/bin/env python3
# -*- python -*-
#
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
),
"#!/usr/bin/env python3\n# -*- python -*-\n#\n" + GOOD_HEADER,
False,
),
("good.py", "", GOOD_HEADER, GOOD_HEADER, True),
("good.lp", "", GOOD_HEADER.replace("#", "%"), GOOD_HEADER.replace("#", "%"), True),
]
@pytest.mark.parametrize(
"filename,expected_txt,header,fixed_header,good",
parameters,
ids=[param[0] for param in parameters],
)
class TestLicenses:
def _setup_license_root(self, tmpdir, header, filename):
source_dir = tmpdir / "lib" / "spack" / "spack"
mkdirp(str(source_dir)) mkdirp(str(source_dir))
no_header = source_dir.join("no_header.py") source_file = source_dir / filename
touch(str(no_header)) with source_file.open("w") as f:
f.write(header)
lgpl_header = source_dir.join("lgpl_header.py") return source_file
with lgpl_header.open("w") as f:
f.write(
"""\
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: LGPL-2.1-only
"""
)
not_in_first_n_lines = source_dir.join("not_in_first_n_lines.py") def test_license_verify(self, filename, expected_txt, header, fixed_header, good, tmpdir):
with not_in_first_n_lines.open("w") as f: source_file = self._setup_license_root(tmpdir, header, filename)
f.write(
"""\
#
#
#
#
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
)
correct_header = source_dir.join("correct_header.py")
with correct_header.open("w") as f:
f.write(
"""\
# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
)
out = license("--root", str(tmpdir), "verify", fail_on_error=False) out = license("--root", str(tmpdir), "verify", fail_on_error=False)
assert str(no_header) in out if not good:
assert str(lgpl_header) in out assert str(source_file) in out
assert str(not_in_first_n_lines) in out assert "1 improperly licensed file" in out
assert str(correct_header) not in out assert re.search(expected_txt, out)
assert "3 improperly licensed files" in out assert license.returncode == 1
assert re.search(r"files not containing expected license:\s*1", out) else:
assert re.search(r"files with wrong SPDX-License-Identifier:\s*1", out) assert license.returncode == 0
assert re.search(r"files without license in first 6 lines:\s*1", out)
def test_license_fix(self, filename, expected_txt, header, fixed_header, good, tmpdir):
source_file = self._setup_license_root(tmpdir, header, filename)
out = license("--root", str(tmpdir), "fix", fail_on_error=False)
if good:
assert str(source_file) not in out
assert license.returncode == 0
return
if fixed_header:
print("OUT", out)
assert f"Fixed {str(source_file)}" in out
assert license.returncode == 0
license("--root", str(tmpdir), "verify", fail_on_error=False)
assert license.returncode == 0
else:
assert f"I don't know how to fix {str(source_file)}" in out
assert license.returncode == 1 assert license.returncode == 1

View File

@ -1346,7 +1346,7 @@ _spack_license() {
then then
SPACK_COMPREPLY="-h --help --root" SPACK_COMPREPLY="-h --help --root"
else else
SPACK_COMPREPLY="list-files verify" SPACK_COMPREPLY="list-files verify fix"
fi fi
} }
@ -1358,6 +1358,10 @@ _spack_license_verify() {
SPACK_COMPREPLY="-h --help" SPACK_COMPREPLY="-h --help"
} }
_spack_license_fix() {
SPACK_COMPREPLY="-h --help"
}
_spack_list() { _spack_list() {
if $list_options if $list_options
then then

View File

@ -2105,6 +2105,7 @@ complete -c spack -n '__fish_spack_using_command install' -l deprecated -d 'allo
set -g __fish_spack_optspecs_spack_license h/help root= set -g __fish_spack_optspecs_spack_license h/help root=
complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a list-files -d 'list files in spack that should have license headers' complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a list-files -d 'list files in spack that should have license headers'
complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a verify -d 'verify that files in spack have the right license header' complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a verify -d 'verify that files in spack have the right license header'
complete -c spack -n '__fish_spack_using_command_pos 0 license' -f -a fix -d 'Fix files without proper licenses.'
complete -c spack -n '__fish_spack_using_command license' -s h -l help -f -a help complete -c spack -n '__fish_spack_using_command license' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command license' -s h -l help -d 'show this help message and exit' complete -c spack -n '__fish_spack_using_command license' -s h -l help -d 'show this help message and exit'
complete -c spack -n '__fish_spack_using_command license' -l root -r -f -a root complete -c spack -n '__fish_spack_using_command license' -l root -r -f -a root
@ -2120,6 +2121,11 @@ set -g __fish_spack_optspecs_spack_license_verify h/help
complete -c spack -n '__fish_spack_using_command license verify' -s h -l help -f -a help complete -c spack -n '__fish_spack_using_command license verify' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command license verify' -s h -l help -d 'show this help message and exit' complete -c spack -n '__fish_spack_using_command license verify' -s h -l help -d 'show this help message and exit'
# spack license fix
set -g __fish_spack_optspecs_spack_license_fix h/help
complete -c spack -n '__fish_spack_using_command license fix' -s h -l help -f -a help
complete -c spack -n '__fish_spack_using_command license fix' -s h -l help -d 'show this help message and exit'
# spack list # spack list
set -g __fish_spack_optspecs_spack_list h/help r/repo= d/search-description format= v/virtuals t/tag= count update= set -g __fish_spack_optspecs_spack_list h/help r/repo= d/search-description format= v/virtuals t/tag= count update=
complete -c spack -n '__fish_spack_using_command_pos_remainder 0 list' -f -a '(__fish_spack_packages)' complete -c spack -n '__fish_spack_using_command_pos_remainder 0 list' -f -a '(__fish_spack_packages)'