mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
use arch specific targets when possible (#2771)
This commit is contained in:
@@ -279,11 +279,14 @@ void compile(
|
||||
// Compile program.
|
||||
std::vector<const char*> args;
|
||||
bool use_sass = compiler_supports_device_sass(device);
|
||||
auto cc = device.compute_capability_major();
|
||||
std::string arch_tag = (cc == 90 || cc == 100 || cc == 121) ? "a" : "";
|
||||
std::string compute = fmt::format(
|
||||
"--gpu-architecture={}_{}{}",
|
||||
"--gpu-architecture={}_{}{}{}",
|
||||
use_sass ? "sm" : "compute",
|
||||
device.compute_capability_major(),
|
||||
device.compute_capability_minor());
|
||||
cc,
|
||||
device.compute_capability_minor(),
|
||||
arch_tag);
|
||||
args.push_back(compute.c_str());
|
||||
std::string cccl_include = cccl_dir();
|
||||
if (!cccl_include.empty()) {
|
||||
|
||||
Reference in New Issue
Block a user