2024-02-15 06:04:25 +08:00
|
|
|
// Copyright © 2023-2024 Apple Inc.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2024-03-05 13:06:11 +08:00
|
|
|
#include <optional>
|
2025-03-21 02:01:32 +08:00
|
|
|
#include <variant>
|
2024-03-05 13:06:11 +08:00
|
|
|
|
2024-02-17 11:16:39 +08:00
|
|
|
#include "mlx/utils.h"
|
2024-02-15 06:04:25 +08:00
|
|
|
|
|
|
|
namespace mlx::core::fast {
|
|
|
|
|
2024-03-21 22:20:54 +08:00
|
|
|
array rms_norm(
|
|
|
|
const array& x,
|
2025-03-01 12:26:57 +08:00
|
|
|
const std::optional<array>& weight,
|
2024-03-21 22:20:54 +08:00
|
|
|
float eps,
|
|
|
|
StreamOrDevice s = {});
|
|
|
|
|
2024-03-22 04:55:51 +08:00
|
|
|
array layer_norm(
|
|
|
|
const array& x,
|
|
|
|
const std::optional<array>& weight,
|
|
|
|
const std::optional<array>& bias,
|
|
|
|
float eps,
|
|
|
|
StreamOrDevice s = {});
|
|
|
|
|
2024-02-15 06:04:25 +08:00
|
|
|
array rope(
|
|
|
|
const array& x,
|
|
|
|
int dims,
|
|
|
|
bool traditional,
|
2024-08-20 09:30:50 +08:00
|
|
|
std::optional<float> base,
|
2024-02-15 06:04:25 +08:00
|
|
|
float scale,
|
|
|
|
int offset,
|
2024-08-20 09:30:50 +08:00
|
|
|
const std::optional<array>& freqs = std::nullopt,
|
2024-03-21 22:20:54 +08:00
|
|
|
StreamOrDevice s = {});
|
2024-02-15 06:04:25 +08:00
|
|
|
|
2024-12-20 07:51:44 +08:00
|
|
|
array rope(
|
|
|
|
const array& x,
|
|
|
|
int dims,
|
|
|
|
bool traditional,
|
|
|
|
std::optional<float> base,
|
|
|
|
float scale,
|
|
|
|
const array& offset,
|
|
|
|
const std::optional<array>& freqs = std::nullopt,
|
|
|
|
StreamOrDevice s = {});
|
|
|
|
|
2024-03-05 13:06:11 +08:00
|
|
|
/** Computes: O = softmax(Q @ K.T) @ V **/
|
|
|
|
array scaled_dot_product_attention(
|
|
|
|
const array& queries,
|
|
|
|
const array& keys,
|
|
|
|
const array& values,
|
|
|
|
const float scale,
|
2025-04-04 02:58:28 +08:00
|
|
|
const std::string& mask_mode = "",
|
|
|
|
const std::vector<array>& mask_arrs = {},
|
2024-03-05 13:06:11 +08:00
|
|
|
StreamOrDevice s = {});
|
|
|
|
|
2024-07-30 06:11:38 +08:00
|
|
|
std::tuple<array, array, array> affine_quantize(
|
|
|
|
const array& w,
|
|
|
|
int group_size = 64,
|
|
|
|
int bits = 4,
|
|
|
|
StreamOrDevice s = {});
|
|
|
|
|
|
|
|
array affine_dequantize(
|
|
|
|
const array& w,
|
|
|
|
const array& scales,
|
|
|
|
const array& biases,
|
|
|
|
int group_size = 64,
|
|
|
|
int bits = 4,
|
|
|
|
StreamOrDevice s = {});
|
|
|
|
|
2024-08-23 04:46:29 +08:00
|
|
|
typedef std::variant<int, bool, Dtype> TemplateArg;
|
|
|
|
|
2024-09-07 10:16:50 +08:00
|
|
|
typedef std::function<std::vector<array>(
|
|
|
|
const std::vector<array>&,
|
2024-12-20 00:08:20 +08:00
|
|
|
const std::vector<Shape>&,
|
2024-09-07 10:16:50 +08:00
|
|
|
const std::vector<Dtype>&,
|
|
|
|
std::tuple<int, int, int>,
|
|
|
|
std::tuple<int, int, int>,
|
|
|
|
std::vector<std::pair<std::string, TemplateArg>>,
|
|
|
|
std::optional<float>,
|
|
|
|
bool,
|
|
|
|
StreamOrDevice)>
|
|
|
|
MetalKernelFunction;
|
2024-08-23 04:46:29 +08:00
|
|
|
|
2024-09-07 10:16:50 +08:00
|
|
|
MetalKernelFunction metal_kernel(
|
|
|
|
const std::string& name,
|
|
|
|
const std::vector<std::string>& input_names,
|
|
|
|
const std::vector<std::string>& output_names,
|
|
|
|
const std::string& source,
|
|
|
|
const std::string& header = "",
|
|
|
|
bool ensure_row_contiguous = true,
|
|
|
|
bool atomic_outputs = false);
|
2024-08-23 04:46:29 +08:00
|
|
|
|
2024-02-15 06:04:25 +08:00
|
|
|
} // namespace mlx::core::fast
|