Custom cuda kernel (#2517)

This commit is contained in:
Angelos Katharopoulos
2025-08-20 17:20:22 -07:00
committed by GitHub
parent f4c8888cbe
commit e397177f6e
19 changed files with 1042 additions and 211 deletions

View File

@@ -267,7 +267,8 @@ void Compiled::eval_gpu(
}
}
return std::make_pair(std::move(builder.os), std::move(kernel_names));
return std::make_tuple(
false, std::move(builder.os), std::move(kernel_names));
});
// Collapse contiguous dims to route to a faster kernel if possible. Also