mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
WIP (io)
This commit is contained in:
@@ -238,7 +238,7 @@ std::unordered_map<std::string, array> load_arrays(gguf_ctx* ctx) {
|
|||||||
return array_map;
|
return array_map;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGUFLoad load_gguf(const std::string& file, StreamOrDevice s) {
|
GGUFLoad load_gguf(const std::string& file, StreamOrDevice /* s */) {
|
||||||
bool exists;
|
bool exists;
|
||||||
{
|
{
|
||||||
std::ifstream f(file.c_str());
|
std::ifstream f(file.c_str());
|
||||||
@@ -440,7 +440,7 @@ void save_gguf(
|
|||||||
}
|
}
|
||||||
const char* tensorname = key.c_str();
|
const char* tensorname = key.c_str();
|
||||||
const uint64_t namelen = key.length();
|
const uint64_t namelen = key.length();
|
||||||
const uint32_t num_dim = arr.ndim();
|
const int num_dim = arr.ndim();
|
||||||
std::vector<uint64_t> dim(num_dim);
|
std::vector<uint64_t> dim(num_dim);
|
||||||
for (int i = 0; i < num_dim; i++) {
|
for (int i = 0; i < num_dim; i++) {
|
||||||
dim[i] = arr.shape()[num_dim - 1 - i];
|
dim[i] = arr.shape()[num_dim - 1 - i];
|
||||||
|
|||||||
@@ -77,8 +77,8 @@ void extract_q8_0_data(
|
|||||||
array& weights_arr,
|
array& weights_arr,
|
||||||
array& scales_arr,
|
array& scales_arr,
|
||||||
array& biases_arr) {
|
array& biases_arr) {
|
||||||
const uint64_t weights_per_block = 32;
|
const int64_t weights_per_block = 32;
|
||||||
const uint64_t bytes_per_block = 34; // 2 bytes scale, 32x1 byte weights
|
const int64_t bytes_per_block = 34; // 2 bytes scale, 32x1 byte weights
|
||||||
auto data = static_cast<uint8_t*>(tensor.weights_data);
|
auto data = static_cast<uint8_t*>(tensor.weights_data);
|
||||||
auto weights = weights_arr.data<int8_t>();
|
auto weights = weights_arr.data<int8_t>();
|
||||||
auto scales = scales_arr.data<float16_t>();
|
auto scales = scales_arr.data<float16_t>();
|
||||||
|
|||||||
Reference in New Issue
Block a user