mlc-llm: new package and dependency (#44726)
This commit is contained in:
parent
22e40541c7
commit
eed7a1af24
36
var/spack/repos/builtin/packages/apache-tvm/package.py
Normal file
36
var/spack/repos/builtin/packages/apache-tvm/package.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
|
||||||
|
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
|
|
||||||
|
from spack.package import *
|
||||||
|
|
||||||
|
|
||||||
|
class ApacheTvm(CMakePackage, CudaPackage):
|
||||||
|
"""Apache TVM is an open source machine learning compiler framework for
|
||||||
|
CPUs, GPUs, and machine learning accelerators. It aims to enable machine
|
||||||
|
learning engineers to optimize and run computations efficiently on any
|
||||||
|
hardware backend."""
|
||||||
|
|
||||||
|
homepage = "https://tvm.apache.org/"
|
||||||
|
url = "https://dlcdn.apache.org/tvm/tvm-v0.16.0/apache-tvm-src-v0.16.0.tar.gz"
|
||||||
|
|
||||||
|
license("Apache-2.0", checked_by="alex391")
|
||||||
|
|
||||||
|
version("0.16.0", sha256="55e2629c39248ef3b1ee280e34a960182bd17bea7ae0d0fa132bbdaaf5aba1ac")
|
||||||
|
|
||||||
|
variant("llvm", default=True, description="Build with llvm for CPU codegen")
|
||||||
|
|
||||||
|
depends_on("c", type="build")
|
||||||
|
depends_on("cxx", type="build")
|
||||||
|
depends_on("cmake@3.18:", type="build")
|
||||||
|
depends_on("python@3.7:3.8", type=("build", "run"))
|
||||||
|
depends_on("llvm@4:", type="build", when="+llvm")
|
||||||
|
depends_on("cuda@8:", when="+cuda")
|
||||||
|
|
||||||
|
def cmake_args(self):
|
||||||
|
return [
|
||||||
|
self.define_from_variant("USE_CUDA", "cuda"),
|
||||||
|
self.define_from_variant("USE_LLVM", "llvm"),
|
||||||
|
]
|
89
var/spack/repos/builtin/packages/mlc-llm/package.py
Normal file
89
var/spack/repos/builtin/packages/mlc-llm/package.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
|
||||||
|
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
|
|
||||||
|
|
||||||
|
from spack.package import *
|
||||||
|
|
||||||
|
|
||||||
|
class MlcLlm(CMakePackage, CudaPackage):
|
||||||
|
"""MLC LLM is a machine learning compiler and high-performance deployment
|
||||||
|
engine for large language models. The mission of this project is to enable
|
||||||
|
everyone to develop, optimize, and deploy AI models natively on everyone's
|
||||||
|
platforms."""
|
||||||
|
|
||||||
|
homepage = "https://github.com/mlc-ai/mlc-llm"
|
||||||
|
git = "https://github.com/mlc-ai/mlc-llm.git"
|
||||||
|
url = "https://github.com/mlc-ai/mlc-llm/archive/refs/tags/v0.1.dev0.tar.gz"
|
||||||
|
|
||||||
|
license("Apache-2.0", checked_by="alex391")
|
||||||
|
|
||||||
|
version("2024-06-13", commit="ceba9511df3da06a8541916522d57fdc99cb6f54", submodules=True)
|
||||||
|
|
||||||
|
depends_on("cmake@3.24:", type="build")
|
||||||
|
depends_on("rust", type="build")
|
||||||
|
depends_on("cxx", type="build")
|
||||||
|
depends_on("python@3.11", type="build")
|
||||||
|
depends_on("apache-tvm")
|
||||||
|
|
||||||
|
depends_on("cuda@11.8:", when="+cuda")
|
||||||
|
|
||||||
|
variant(
|
||||||
|
"flash-infer",
|
||||||
|
default=False,
|
||||||
|
description="Use FlashInfer? (need CUDA w/ compute capability 80;86;89;90)",
|
||||||
|
when="+cuda",
|
||||||
|
)
|
||||||
|
conflicts("cuda_arch=none", when="+flash-infer")
|
||||||
|
|
||||||
|
unsupported_flash_infer_cuda_archs = filter(
|
||||||
|
lambda arch: arch not in ["80", "86", "89", "90"], CudaPackage.cuda_arch_values
|
||||||
|
)
|
||||||
|
for arch in unsupported_flash_infer_cuda_archs:
|
||||||
|
conflicts(
|
||||||
|
f"cuda_arch={arch}",
|
||||||
|
when="+flash-infer",
|
||||||
|
msg=f"CUDA architecture {arch} is not supported when +flash-infer",
|
||||||
|
)
|
||||||
|
|
||||||
|
def patch(self):
|
||||||
|
with open("cmake/config.cmake", "w") as f:
|
||||||
|
f.write(self._gen_cmake_config())
|
||||||
|
|
||||||
|
def _gen_cmake_config(self) -> str:
|
||||||
|
"""
|
||||||
|
Generate string for cmake/config.cmake (based on cmake/gen_cmake_config.py)
|
||||||
|
"""
|
||||||
|
|
||||||
|
tvm_home = self.spec["apache-tvm"].prefix
|
||||||
|
|
||||||
|
cmake_config_str = f"set(TVM_SOURCE_DIR {tvm_home})\n"
|
||||||
|
cmake_config_str += "set(CMAKE_BUILD_TYPE RelWithDebInfo)\n"
|
||||||
|
|
||||||
|
if self.spec.satisfies("+cuda"):
|
||||||
|
cmake_config_str += "set(USE_CUDA ON)\n"
|
||||||
|
cmake_config_str += "set(USE_THRUST ON)\n"
|
||||||
|
else:
|
||||||
|
cmake_config_str += "set(USE_CUDA OFF)\n"
|
||||||
|
|
||||||
|
# FlashInfer related
|
||||||
|
if self.spec.satisfies("+flash-infer"):
|
||||||
|
cmake_config_str += "set(USE_FLASHINFER ON)\n"
|
||||||
|
cmake_config_str += "set(FLASHINFER_ENABLE_FP8 OFF)\n"
|
||||||
|
cmake_config_str += "set(FLASHINFER_ENABLE_BF16 OFF)\n"
|
||||||
|
cmake_config_str += "set(FLASHINFER_GEN_GROUP_SIZES 1 4 6 8)\n"
|
||||||
|
cmake_config_str += "set(FLASHINFER_GEN_PAGE_SIZES 16)\n"
|
||||||
|
cmake_config_str += "set(FLASHINFER_GEN_HEAD_DIMS 128)\n"
|
||||||
|
cmake_config_str += "set(FLASHINFER_GEN_KV_LAYOUTS 0 1)\n"
|
||||||
|
cmake_config_str += "set(FLASHINFER_GEN_POS_ENCODING_MODES 0 1)\n"
|
||||||
|
cmake_config_str += 'set(FLASHINFER_GEN_ALLOW_FP16_QK_REDUCTIONS "false")\n'
|
||||||
|
cmake_config_str += 'set(FLASHINFER_GEN_CASUALS "false" "true")\n'
|
||||||
|
|
||||||
|
cuda_archs = ";".join(self.spec.variants["cuda_arch"].value)
|
||||||
|
cmake_config_str += f"set(FLASHINFER_CUDA_ARCHITECTURES {cuda_archs})\n"
|
||||||
|
cmake_config_str += f"set(CMAKE_CUDA_ARCHITECTURES {cuda_archs})\n"
|
||||||
|
else:
|
||||||
|
cmake_config_str += "set(USE_FLASHINFER OFF)\n"
|
||||||
|
|
||||||
|
return cmake_config_str
|
Loading…
Reference in New Issue
Block a user