py-hail: new package (#44521)
This commit is contained in:
parent
f9fa160a24
commit
fb315c37ba
236
var/spack/repos/builtin/packages/py-hail/package.py
Normal file
236
var/spack/repos/builtin/packages/py-hail/package.py
Normal file
@ -0,0 +1,236 @@
|
||||
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
from spack.package import *
|
||||
|
||||
|
||||
class PyHail(MakefilePackage):
|
||||
"""Cloud-native genomic dataframes and batch computing (Python API)"""
|
||||
|
||||
homepage = "https://hail.is"
|
||||
git = "https://github.com/hail-is/hail.git"
|
||||
# We can't use tarballs because HAIL needs to look up git commit metadata
|
||||
# to determine its version. We could patch this, but that is not yet
|
||||
# implemented.
|
||||
# url = "https://github.com/hail-is/hail/archive/refs/tags/0.2.130.tar.gz"
|
||||
|
||||
maintainers("teaguesterling")
|
||||
license("MIT", checked_by="teaguesterling")
|
||||
|
||||
version("0.2.132", commit="678e1f52b9999cb05ebf03fd360e5c4506bd6dad")
|
||||
version("0.2.131", commit="11d9b2ff89da9ef6a4f576be89f1f06959580ea4")
|
||||
version("0.2.130", commit="bea04d9c79b5ca739364e8c121132845475f617a")
|
||||
version("0.2.129", commit="41126be2df04e4ef823cefea40fba4cadbe5db8a")
|
||||
|
||||
resource(
|
||||
name="catch",
|
||||
url="https://github.com/catchorg/Catch2/releases/download/v2.6.0/catch.hpp",
|
||||
sha256="a86133b34d4721b6e1cf7171981ea469789f83f2475907b4033012577e4975fe",
|
||||
destination="hail/src/main/resources/include/catch.hpp",
|
||||
expand=False,
|
||||
)
|
||||
|
||||
resource(
|
||||
name="libsimdpp-2.1",
|
||||
extension="tar.gz",
|
||||
url="https://storage.googleapis.com/hail-common/libsimdpp-2.1.tar.gz",
|
||||
sha256="b0e986b20bef77cd17004dd02db0c1ad9fab9c70d4e99594a9db1ee6a345be93",
|
||||
destination="hail/src/main/c",
|
||||
)
|
||||
|
||||
resource(
|
||||
name="mill-0.11.7",
|
||||
url="https://repo1.maven.org/maven2/com/lihaoyi/mill-dist/0.11.7/mill-dist-0.11.7.jar",
|
||||
sha256="278b430150af899495d360d1f886e223e78bb4a20e67144a240bfb7e2d4f6085",
|
||||
destination="hail/mill",
|
||||
expand=False,
|
||||
)
|
||||
|
||||
variant("native", default=True, description="Compile C & C++ HAIL optimizations")
|
||||
variant(
|
||||
"query_backend",
|
||||
values=["undefined", "spark", "batch"],
|
||||
default="spark",
|
||||
description="Configure HAIL query backend at build",
|
||||
)
|
||||
|
||||
depends_on("python@3.9:", type=("build", "run"))
|
||||
depends_on("py-pip", type="build")
|
||||
depends_on("py-wheel", type="build")
|
||||
depends_on("py-build@1.1+virtualenv", type="build", when="@0.2.131:")
|
||||
depends_on("c", type="build", when="+native")
|
||||
depends_on("cxx", type="build", when="+native")
|
||||
|
||||
# HAIL bundle is tied to specific runtime versions
|
||||
# HAIL spec, Java sec, Spark spec, Scala spec
|
||||
# We're not accurately capturing previous versions
|
||||
for hail, java, spark, scala in [
|
||||
# 0.2.130 and before (to somwhere around 0.2.64) used Spark 3.3
|
||||
# And either Java 8 or Java 11
|
||||
(":0.2.130", "8,11", "3.3", "2.12"),
|
||||
# 0.2.131 updated to Java 11 and Spark 3.5
|
||||
# Undocumented bump was to scala 2.12.13 for scala.annotation.noerror
|
||||
("0.2.131:", "11", "3.5", "2.12.18:2.12"),
|
||||
]:
|
||||
with default_args(type=("build", "run"), when=f"@{hail}"):
|
||||
depends_on(f"java@{java}")
|
||||
depends_on(f"scala@{scala}")
|
||||
depends_on(f"spark@{spark}")
|
||||
# This should match spark but isn't actually enforced
|
||||
# by the PySpark package and they can conflit.
|
||||
depends_on(f"py-pyspark@{spark}")
|
||||
|
||||
with default_args(type=("build", "link"), when="+native"):
|
||||
# Hail build requirements
|
||||
depends_on("blas")
|
||||
depends_on("lapack")
|
||||
depends_on("lz4")
|
||||
|
||||
with default_args(type=("build", "run")):
|
||||
depends_on("py-avro@1.10:1.11")
|
||||
depends_on("py-bokeh@3:3.3")
|
||||
depends_on("py-decorator@:4")
|
||||
depends_on("py-deprecated@1.2.10:1.2")
|
||||
depends_on("py-numpy@:1")
|
||||
depends_on("py-pandas@2:2")
|
||||
depends_on("py-parsimonious@:0")
|
||||
depends_on("py-plotly@5.18:5")
|
||||
depends_on("py-protobuf@3.20.2")
|
||||
depends_on("py-requests@2.31:2")
|
||||
depends_on("py-scipy@1.3:1.11")
|
||||
|
||||
# hailtop requirements
|
||||
depends_on("py-aiodns@2")
|
||||
depends_on("py-aiohttp@3.9")
|
||||
depends_on("py-azure-identity@1.6:1")
|
||||
depends_on("py-azure-mgmt-storage@20.1.0")
|
||||
depends_on("py-azure-storage-blob@12.11:12")
|
||||
depends_on("py-boto3@1.17:1")
|
||||
depends_on("py-botocore@1.20:1")
|
||||
depends_on("py-dill@0.3.6:0.3")
|
||||
depends_on("py-frozenlist@1.3.1:1")
|
||||
depends_on("py-google-auth@2.14.1:2")
|
||||
depends_on("py-google-auth-oauthlib@0.5.2:0")
|
||||
depends_on("py-humanize@1.0.0:1")
|
||||
depends_on("py-janus@0.6:1.0")
|
||||
depends_on("py-nest-asyncio@1.5.8:1")
|
||||
depends_on("py-rich@12.6.0:12")
|
||||
depends_on("py-orjson@3.9.15:3")
|
||||
depends_on("py-typer@0.9.0:0")
|
||||
depends_on("py-python-json-logger@2.0.2:2")
|
||||
depends_on("py-pyyaml@6.0:7")
|
||||
depends_on("py-sortedcontainers@2.4.0:2")
|
||||
depends_on("py-tabulate@0.8.9:0")
|
||||
depends_on("py-uvloop@0.19.0:0")
|
||||
depends_on("py-jproperties@2.1.1:2")
|
||||
# Undocumented runtime requirements for hailtop
|
||||
# These are also required to use the HAIL API
|
||||
# but are not explicitly mentioned anywhere
|
||||
depends_on("py-azure-mgmt-core")
|
||||
depends_on("py-typing-extensions")
|
||||
|
||||
build_directory = "hail"
|
||||
|
||||
def patch(self):
|
||||
# Hail will fail to build if it cannot determine a commit hash from git
|
||||
# which will not be available in a spack cache. Since we know it from
|
||||
# the package, we can inject it in the failure and move forward.
|
||||
revision = self.hail_revision
|
||||
version = self.hail_pip_version
|
||||
|
||||
filter_file(
|
||||
r'\$\(error "git rev-parse HEAD" failed to produce output\)',
|
||||
f"REVISION := {revision}",
|
||||
"hail/version.mk",
|
||||
)
|
||||
filter_file(
|
||||
r'\$\(error "git rev-parse --short=12 HEAD" failed to produce output\)',
|
||||
f"SHORT_REVISION := {revision[:12]}",
|
||||
"hail/version.mk",
|
||||
)
|
||||
filter_file(
|
||||
r'\$\(error "git rev-parse --abbrev-ref HEAD" failed to produce output\)',
|
||||
f"BRANCH := tags/{version}",
|
||||
"hail/version.mk",
|
||||
)
|
||||
|
||||
# Also need to make sure that build-info.properties gets the right revision
|
||||
# which ends up improperly calculated in scala and will crash at runtime
|
||||
filter_file(
|
||||
r"val revision = VcsVersion\.vcsState\(\)\.currentRevision",
|
||||
"val vcs_revision = VcsVersion.vcsState().currentRevision\n"
|
||||
f' val revision = if(vcs_revision == "no-vcs") "{revision}" else vcs_revision\n',
|
||||
"hail/build.sc",
|
||||
)
|
||||
|
||||
@property
|
||||
def hail_revision(self):
|
||||
version = self.version
|
||||
version_info = self.versions[version]
|
||||
# REVISION must look like a hash or Hail crashes at startup
|
||||
# Technically, it needs to be at least 12 characters
|
||||
revision = version_info.get("commit", version.joined.string.ljust(40, "0"))
|
||||
return revision
|
||||
|
||||
@property
|
||||
def hail_pip_version(self):
|
||||
# This is the same behavior is as is defined in hail/version.mk
|
||||
return f"{self.spec.version.up_to(3)}"
|
||||
|
||||
@property
|
||||
def build_wheel_file_path(self):
|
||||
wheel_file = f"hail-{self.hail_pip_version}-py3-none-any.whl"
|
||||
wheel_dir = join_path("build", "deploy", "dist")
|
||||
return join_path(wheel_dir, wheel_file)
|
||||
|
||||
def flag_handler(self, name, flags):
|
||||
if name == "cxxflags" and self.spec.satisfies("+native"):
|
||||
# HAIL build doesn't find lz4: https://discuss.hail.is/t/ld-pruning-repeated-errors/1838/14
|
||||
flags.append(f"-I{self.spec['lz4'].prefix.include}")
|
||||
return (flags, None, None)
|
||||
|
||||
@property
|
||||
def build_targets(self):
|
||||
spec = self.spec
|
||||
|
||||
# Hail likes variables passed in to Make
|
||||
variables = [
|
||||
f"HAIL_PYTHON3={spec['python'].home.bin.python3}",
|
||||
f"PIP={spec['py-pip'].home.bin.pip}",
|
||||
f"SCALA_VERSION={spec['scala'].version}",
|
||||
f"SPARK_VERSION={spec['spark'].version}",
|
||||
]
|
||||
if spec.satisfies("+native"):
|
||||
variables += ["HAIL_COMPILE_NATIVES=1"]
|
||||
|
||||
# We're not using the documented target to
|
||||
# because it depends on pip to install and resolve
|
||||
# dependencies directly. This does everything in one step.
|
||||
# and ends up downloading all of the dependencies via pip.
|
||||
# The documented target is `install-on-cluster`
|
||||
targets = [
|
||||
# This may be too specific but it would detect failures
|
||||
# and fail to build instead of taking a long time to build
|
||||
# and then failing at install time.
|
||||
self.build_wheel_file_path
|
||||
]
|
||||
|
||||
return targets + variables
|
||||
|
||||
def install(self, spec, prefix):
|
||||
spec = self.spec
|
||||
pip = which("pip")
|
||||
wheel = self.build_wheel_file_path
|
||||
|
||||
# This mimics the install-on-cluster target but avoids anything
|
||||
# that utilizes pip to resolve dependencies
|
||||
with working_dir(join_path(self.stage.source_path, "hail")):
|
||||
pip("install", "--use-pep517", "--no-deps", f"--prefix={prefix}", wheel)
|
||||
|
||||
backend = spec.variants["query_backend"].value
|
||||
if backend != "undefined":
|
||||
hailctl = which("hailctl") # Should be installed from above
|
||||
if hailctl is not None: # but it might not be
|
||||
hailctl("config", "set", "query/backend", f"{backend}")
|
Loading…
Reference in New Issue
Block a user