From 631bddc52ec1ef292e8afe92fd626546143cc510 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Tue, 4 Mar 2025 13:20:52 +0100 Subject: [PATCH] py-pyarrow: add v19.0.1 (#49149) * py-pyarrow: add v19.0.1 * Environment variables no longer needed either * Remove py-pyarrow variants --- .../repos/builtin/packages/arrow/package.py | 7 +- .../repos/builtin/packages/py-cudf/package.py | 3 +- .../builtin/packages/py-dask-expr/package.py | 3 +- .../builtin/packages/py-datasets/package.py | 9 +- .../builtin/packages/py-pyarrow/package.py | 89 ++++++++++--------- 5 files changed, 62 insertions(+), 49 deletions(-) diff --git a/var/spack/repos/builtin/packages/arrow/package.py b/var/spack/repos/builtin/packages/arrow/package.py index 71742f2b9c3..1caf9441c51 100644 --- a/var/spack/repos/builtin/packages/arrow/package.py +++ b/var/spack/repos/builtin/packages/arrow/package.py @@ -16,6 +16,7 @@ class Arrow(CMakePackage, CudaPackage): license("Apache-2.0") + version("19.0.1", sha256="4c898504958841cc86b6f8710ecb2919f96b5e10fa8989ac10ac4fca8362d86a") version("18.0.0", sha256="9c473f2c9914c59ab571761c9497cf0e5cfd3ea335f7782ccc6121f5cb99ae9b") version("16.1.0", sha256="9762d9ecc13d09de2a03f9c625a74db0d645cb012de1e9a10dfed0b4ddc09524") version("15.0.2", sha256="4735b349845bff1fe95ed11abbfed204eb092cabc37523aa13a80cb830fe5b5e") @@ -41,8 +42,8 @@ class Arrow(CMakePackage, CudaPackage): version("0.9.0", sha256="65f89a3910b6df02ac71e4d4283db9b02c5b3f1e627346c7b6a5982ae994af91") version("0.8.0", sha256="c61a60c298c30546fc0b418a35be66ef330fb81b06c49928acca7f1a34671d54") - depends_on("c", type="build") # generated - depends_on("cxx", type="build") # generated + depends_on("c", type="build") + depends_on("cxx", type="build") depends_on("boost@1.60: +filesystem +system") depends_on("brotli", when="+brotli") @@ -95,6 +96,7 @@ class Arrow(CMakePackage, CudaPackage): variant( "compute", default=False, description="Computational kernel functions and other support" ) + variant("dataset", default=False, description="Build the Arrow Dataset integration") variant("gandiva", default=False, description="Build Gandiva support") variant( "glog", @@ -156,6 +158,7 @@ def cmake_args(self): args.append(self.define_from_variant("ARROW_COMPUTE", "compute")) args.append(self.define_from_variant("ARROW_CUDA", "cuda")) + args.append(self.define_from_variant("ARROW_DATASET", "dataset")) args.append(self.define_from_variant("ARROW_GANDIVA", "gandiva")) args.append(self.define_from_variant("ARROW_GLOG", "glog")) args.append(self.define_from_variant("ARROW_HDFS", "hdfs")) diff --git a/var/spack/repos/builtin/packages/py-cudf/package.py b/var/spack/repos/builtin/packages/py-cudf/package.py index 172df2582ce..735495b3229 100644 --- a/var/spack/repos/builtin/packages/py-cudf/package.py +++ b/var/spack/repos/builtin/packages/py-cudf/package.py @@ -29,7 +29,8 @@ class PyCudf(PythonPackage): depends_on("py-cython", type="build") depends_on("py-numba@0.40.0:", type=("build", "run")) depends_on("py-numpy@1.14.4:", type=("build", "run")) - depends_on("py-pyarrow+cuda+orc+parquet", type=("build", "run")) + depends_on("py-pyarrow", type=("build", "run")) + depends_on("arrow+cuda+orc+parquet") depends_on("py-pandas@0.23.4:", type=("build", "run")) depends_on("py-rmm", type=("build", "run")) depends_on("cuda@10:") diff --git a/var/spack/repos/builtin/packages/py-dask-expr/package.py b/var/spack/repos/builtin/packages/py-dask-expr/package.py index 89ecb222b36..d51cd055fcc 100644 --- a/var/spack/repos/builtin/packages/py-dask-expr/package.py +++ b/var/spack/repos/builtin/packages/py-dask-expr/package.py @@ -21,5 +21,6 @@ class PyDaskExpr(PythonPackage): # Can't do circular run-time dependencies yet? # depends_on("py-dask@2024.7.1", type="run") - depends_on("py-pyarrow@7: +dataset", type="run") + depends_on("py-pyarrow@7:", type="run") + depends_on("arrow+dataset") depends_on("py-pandas@2:", type="run") diff --git a/var/spack/repos/builtin/packages/py-datasets/package.py b/var/spack/repos/builtin/packages/py-datasets/package.py index 3b9c6b80176..1e3fd7fca51 100644 --- a/var/spack/repos/builtin/packages/py-datasets/package.py +++ b/var/spack/repos/builtin/packages/py-datasets/package.py @@ -39,7 +39,8 @@ class PyDatasets(PythonPackage): depends_on("py-fsspec@:0.8.0", when="^python@:3.7") depends_on("py-huggingface-hub@:0.0") depends_on("py-importlib-metadata", when="^python@:3.7") - depends_on("py-pyarrow@1:3+parquet") + depends_on("py-pyarrow@1:3") + depends_on("arrow+parquet") depends_on("py-tqdm@4.27:4.49") with when("@2.8.0"): depends_on("py-responses@:0.18") @@ -49,7 +50,8 @@ class PyDatasets(PythonPackage): depends_on("py-dill@:0.3.6") depends_on("py-fsspec@2021.11.1:+http") depends_on("py-huggingface-hub@0.2:0") - depends_on("py-pyarrow@6:+parquet") + depends_on("py-pyarrow@6:") + depends_on("arrow+parquet") depends_on("py-tqdm@4.62.1:") depends_on("python@3.7:") with when("@2.20.0:"): @@ -57,7 +59,8 @@ class PyDatasets(PythonPackage): depends_on("py-dill@0.3.0:0.3.8") # temporary upper bound depends_on("py-fsspec@2023.1.0:2024.5.0+http") depends_on("py-huggingface-hub@0.21.2:") - depends_on("py-pyarrow@15:+parquet+dataset") + depends_on("py-pyarrow@15:") + depends_on("arrow+parquet+dataset") depends_on("py-requests@2.32.2:") depends_on("py-tqdm@4.66.3:") depends_on("python@3.8:") diff --git a/var/spack/repos/builtin/packages/py-pyarrow/package.py b/var/spack/repos/builtin/packages/py-pyarrow/package.py index eca80609067..cd48541f232 100644 --- a/var/spack/repos/builtin/packages/py-pyarrow/package.py +++ b/var/spack/repos/builtin/packages/py-pyarrow/package.py @@ -5,7 +5,7 @@ from spack.package import * -class PyPyarrow(PythonPackage, CudaPackage): +class PyPyarrow(PythonPackage): """A cross-language development platform for in-memory data. This package contains the Python bindings. @@ -19,6 +19,7 @@ class PyPyarrow(PythonPackage, CudaPackage): license("Apache-2.0") + version("19.0.1", sha256="3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e") version("16.1.0", sha256="15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315") version("15.0.2", sha256="9c9bc803cb3b7bfacc1e96ffbfd923601065d9d3f911179d81e72d99fd74a3d9") version("14.0.2", sha256="36cef6ba12b499d864d1def3e990f97949e0b79400d08b7cf74504ffbd3eb025") @@ -36,39 +37,31 @@ class PyPyarrow(PythonPackage, CudaPackage): version("0.11.0", sha256="07a6fd71c5d7440f2c42383dd2c5daa12d7f0a012f1e88288ed08a247032aead") version("0.9.0", sha256="7db8ce2f0eff5a00d6da918ce9f9cfec265e13f8a119b4adb1595e5b19fd6242") - depends_on("cxx", type="build") # generated + depends_on("cxx", type="build") - variant("parquet", default=False, description="Build with Parquet support") - variant("orc", default=False, description="Build with orc support") - variant("dataset", default=False, description="Build with Dataset support") + with default_args(type="build"): + # CMakeLists.txt + depends_on("cmake@3.16:", when="@13:") + depends_on("cmake@3.5:", when="@11:") + depends_on("cmake@3.2:", when="@0.17:") + depends_on("cmake@2.7:") - conflicts("~parquet", when="+dataset") + # cmake_modules and pyarrow/__init__.py + depends_on("pkgconfig") - depends_on("cmake@3.0.0:", type="build") - depends_on("pkgconfig", type="build") - depends_on("python@3.8:", type=("build", "run"), when="@13:") - depends_on("python@3.7:", type=("build", "run"), when="@7:") - depends_on("python@3.6:", type=("build", "run"), when="@3:") - depends_on("python@3.5:", type=("build", "run"), when="@0.17:") - depends_on("py-setuptools", type="build") - depends_on("py-setuptools@40.1.0:", type="build", when="@10.0.1:") - depends_on("py-setuptools@38.6.0:", type="build", when="@7:") - depends_on("py-setuptools-scm@:7", type="build", when="@0.15:") - depends_on("py-cython", type="build") - depends_on("py-cython@0.29.31:", type="build", when="@14:") - depends_on("py-cython@0.29.31:2", type="build", when="@12:13") - depends_on("py-cython@0.29.22:2", type="build", when="@8:11") - depends_on("py-cython@0.29:2", type="build", when="@0.15:7") - depends_on("py-cython@:2", type="build", when="@:0.14") - # in newer pip versions --install-option does not exist - depends_on("py-pip@:23.0", type="build") - - depends_on("py-numpy@1.16.6:", type=("build", "run"), when="@3:") - # Prior to python 3.9 numpy must be >=0.14,<1.25 - depends_on("py-numpy@0.14:1.24", when="^python@:3.8", type=("build", "run")) - depends_on("py-numpy@1.25:", when="^python@3.9:", type=("build", "run")) - # https://github.com/apache/arrow/issues/39532 - depends_on("py-numpy@:1", when="@:15", type=("build", "run")) + # pyproject.toml, setup.py + depends_on("py-cython@0.29.31:", when="@14:") + depends_on("py-cython@0.29.31:2", when="@12:13") + depends_on("py-cython@0.29.22:2", when="@8:11") + depends_on("py-cython@0.29:2", when="@0.15:7") + depends_on("py-cython@:2", when="@:0.14") + depends_on("py-setuptools-scm@8:+toml", when="@17:") + depends_on("py-setuptools-scm", when="@16") + depends_on("py-setuptools-scm@:7", when="@0.15:15") + depends_on("py-setuptools@64:", when="@17:") + depends_on("py-setuptools@40.1:", when="@10.0.1:") + depends_on("py-setuptools@38.6:", when="@7:") + depends_on("py-setuptools") arrow_versions = ( "@0.9.0", @@ -87,29 +80,41 @@ class PyPyarrow(PythonPackage, CudaPackage): "@14.0.2", "@15.0.2", "@16.1.0", + "@19.0.1", ) for v in arrow_versions: depends_on("arrow+python" + v, when=v) - depends_on("arrow+parquet+python" + v, when="+parquet" + v) - depends_on("arrow+cuda" + v, when="+cuda" + v) - depends_on("arrow+orc" + v, when="+orc" + v) + + # Historical dependencies + # In newer pip versions --install-option does not exist + depends_on("py-pip@:23.0", when="@:16", type="build") + + with default_args(type=("build", "run")): + # pyproject.toml, setup.py + depends_on("py-numpy@1.16.6:", when="@3:17") + depends_on("py-numpy@1.14:", when="@0.11:") + depends_on("py-numpy@1.10:") + depends_on("py-numpy@:1", when="@:15") patch("for_aarch64.patch", when="@0 target=aarch64:") + # Starting with pyarrow 17+, backend support is built if arrow was built with it + @when("@:16") def setup_build_environment(self, env): - env.set("PYARROW_WITH_PARQUET", self.spec.satisfies("+parquet")) - env.set("PYARROW_WITH_CUDA", self.spec.satisfies("+cuda")) - env.set("PYARROW_WITH_ORC", self.spec.satisfies("+orc")) - env.set("PYARROW_WITH_DATASET", self.spec.satisfies("+dataset")) + env.set("PYARROW_WITH_PARQUET", self.spec.satisfies("^arrow+parquet")) + env.set("PYARROW_WITH_CUDA", self.spec.satisfies("^arrow+cuda")) + env.set("PYARROW_WITH_ORC", self.spec.satisfies("^arrow+orc")) + env.set("PYARROW_WITH_DATASET", self.spec.satisfies("^arrow+dataset")) + @when("@:16") def install_options(self, spec, prefix): args = [] - if spec.satisfies("+parquet"): + if spec.satisfies("^arrow+parquet"): args.append("--with-parquet") - if spec.satisfies("+cuda"): + if spec.satisfies("^arrow+cuda"): args.append("--with-cuda") - if spec.satisfies("+orc"): + if spec.satisfies("^arrow+orc"): args.append("--with-orc") - if spec.satisfies("+dataset"): + if spec.satisfies("^arrow+dataset"): args.append("--with-dataset") return args