mlc-llm: new package and dependency (#44726)

This commit is contained in:
Alex Leute 2024-08-19 03:33:00 -04:00 committed by GitHub
parent 22e40541c7
commit eed7a1af24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 125 additions and 0 deletions

View File

@ -0,0 +1,36 @@
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack.package import *
class ApacheTvm(CMakePackage, CudaPackage):
"""Apache TVM is an open source machine learning compiler framework for
CPUs, GPUs, and machine learning accelerators. It aims to enable machine
learning engineers to optimize and run computations efficiently on any
hardware backend."""
homepage = "https://tvm.apache.org/"
url = "https://dlcdn.apache.org/tvm/tvm-v0.16.0/apache-tvm-src-v0.16.0.tar.gz"
license("Apache-2.0", checked_by="alex391")
version("0.16.0", sha256="55e2629c39248ef3b1ee280e34a960182bd17bea7ae0d0fa132bbdaaf5aba1ac")
variant("llvm", default=True, description="Build with llvm for CPU codegen")
depends_on("c", type="build")
depends_on("cxx", type="build")
depends_on("cmake@3.18:", type="build")
depends_on("python@3.7:3.8", type=("build", "run"))
depends_on("llvm@4:", type="build", when="+llvm")
depends_on("cuda@8:", when="+cuda")
def cmake_args(self):
return [
self.define_from_variant("USE_CUDA", "cuda"),
self.define_from_variant("USE_LLVM", "llvm"),
]

View File

@ -0,0 +1,89 @@
# Copyright 2013-2024 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from spack.package import *
class MlcLlm(CMakePackage, CudaPackage):
"""MLC LLM is a machine learning compiler and high-performance deployment
engine for large language models. The mission of this project is to enable
everyone to develop, optimize, and deploy AI models natively on everyone's
platforms."""
homepage = "https://github.com/mlc-ai/mlc-llm"
git = "https://github.com/mlc-ai/mlc-llm.git"
url = "https://github.com/mlc-ai/mlc-llm/archive/refs/tags/v0.1.dev0.tar.gz"
license("Apache-2.0", checked_by="alex391")
version("2024-06-13", commit="ceba9511df3da06a8541916522d57fdc99cb6f54", submodules=True)
depends_on("cmake@3.24:", type="build")
depends_on("rust", type="build")
depends_on("cxx", type="build")
depends_on("python@3.11", type="build")
depends_on("apache-tvm")
depends_on("cuda@11.8:", when="+cuda")
variant(
"flash-infer",
default=False,
description="Use FlashInfer? (need CUDA w/ compute capability 80;86;89;90)",
when="+cuda",
)
conflicts("cuda_arch=none", when="+flash-infer")
unsupported_flash_infer_cuda_archs = filter(
lambda arch: arch not in ["80", "86", "89", "90"], CudaPackage.cuda_arch_values
)
for arch in unsupported_flash_infer_cuda_archs:
conflicts(
f"cuda_arch={arch}",
when="+flash-infer",
msg=f"CUDA architecture {arch} is not supported when +flash-infer",
)
def patch(self):
with open("cmake/config.cmake", "w") as f:
f.write(self._gen_cmake_config())
def _gen_cmake_config(self) -> str:
"""
Generate string for cmake/config.cmake (based on cmake/gen_cmake_config.py)
"""
tvm_home = self.spec["apache-tvm"].prefix
cmake_config_str = f"set(TVM_SOURCE_DIR {tvm_home})\n"
cmake_config_str += "set(CMAKE_BUILD_TYPE RelWithDebInfo)\n"
if self.spec.satisfies("+cuda"):
cmake_config_str += "set(USE_CUDA ON)\n"
cmake_config_str += "set(USE_THRUST ON)\n"
else:
cmake_config_str += "set(USE_CUDA OFF)\n"
# FlashInfer related
if self.spec.satisfies("+flash-infer"):
cmake_config_str += "set(USE_FLASHINFER ON)\n"
cmake_config_str += "set(FLASHINFER_ENABLE_FP8 OFF)\n"
cmake_config_str += "set(FLASHINFER_ENABLE_BF16 OFF)\n"
cmake_config_str += "set(FLASHINFER_GEN_GROUP_SIZES 1 4 6 8)\n"
cmake_config_str += "set(FLASHINFER_GEN_PAGE_SIZES 16)\n"
cmake_config_str += "set(FLASHINFER_GEN_HEAD_DIMS 128)\n"
cmake_config_str += "set(FLASHINFER_GEN_KV_LAYOUTS 0 1)\n"
cmake_config_str += "set(FLASHINFER_GEN_POS_ENCODING_MODES 0 1)\n"
cmake_config_str += 'set(FLASHINFER_GEN_ALLOW_FP16_QK_REDUCTIONS "false")\n'
cmake_config_str += 'set(FLASHINFER_GEN_CASUALS "false" "true")\n'
cuda_archs = ";".join(self.spec.variants["cuda_arch"].value)
cmake_config_str += f"set(FLASHINFER_CUDA_ARCHITECTURES {cuda_archs})\n"
cmake_config_str += f"set(CMAKE_CUDA_ARCHITECTURES {cuda_archs})\n"
else:
cmake_config_str += "set(USE_FLASHINFER OFF)\n"
return cmake_config_str