mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 11:45:16 +08:00
comment
This commit is contained in:
parent
5e8f88d079
commit
26e2d97e27
@ -10,8 +10,6 @@ std::function<mx::Args(mx::Args)> load_model(const std::string &path);
|
|||||||
|
|
||||||
BPETokenizer load_tokenizer(const std::string &path);
|
BPETokenizer load_tokenizer(const std::string &path);
|
||||||
|
|
||||||
struct GenerationResponse {};
|
|
||||||
|
|
||||||
void generate(const std::function<mx::Args(mx::Args)> &model,
|
void generate(const std::function<mx::Args(mx::Args)> &model,
|
||||||
const BPETokenizer &tokenizer, const std::string &prompt,
|
const BPETokenizer &tokenizer, const std::string &prompt,
|
||||||
int max_tokens = 256);
|
int max_tokens = 256);
|
||||||
|
10
llms/export/third_party/CMakeLists.txt
vendored
10
llms/export/third_party/CMakeLists.txt
vendored
@ -7,12 +7,10 @@ FetchContent_MakeAvailable(json)
|
|||||||
target_include_directories(
|
target_include_directories(
|
||||||
mlxlm PRIVATE $<BUILD_INTERFACE:${json_SOURCE_DIR}/single_include/nlohmann>)
|
mlxlm PRIVATE $<BUILD_INTERFACE:${json_SOURCE_DIR}/single_include/nlohmann>)
|
||||||
|
|
||||||
add_custom_target(unicode)
|
execute_process(
|
||||||
add_custom_command(
|
COMMAND zsh "${CMAKE_CURRENT_SOURCE_DIR}/download_unicode.sh" "${CMAKE_CURRENT_BINARY_DIR}"
|
||||||
TARGET unicode
|
COMMAND_ERROR_IS_FATAL ANY
|
||||||
PRE_BUILD
|
)
|
||||||
COMMAND /bin/bash ${CMAKE_CURRENT_SOURCE_DIR}/download_unicode.sh)
|
|
||||||
add_dependencies(mlxlm unicode)
|
|
||||||
|
|
||||||
target_sources(mlxlm
|
target_sources(mlxlm
|
||||||
PRIVATE
|
PRIVATE
|
||||||
|
4
llms/export/third_party/download_unicode.sh
vendored
4
llms/export/third_party/download_unicode.sh
vendored
@ -5,7 +5,5 @@ url=https://raw.githubusercontent.com/ggerganov/llama.cpp/${commit}/src/
|
|||||||
|
|
||||||
for file in 'unicode.cpp' 'unicode.h' 'unicode-data.cpp' 'unicode-data.h'
|
for file in 'unicode.cpp' 'unicode.h' 'unicode-data.cpp' 'unicode-data.h'
|
||||||
do
|
do
|
||||||
curl -OL ${url}/${file}
|
curl -OL ${url}/${file} --output-dir $1 2>/dev/null
|
||||||
done
|
done
|
||||||
|
|
||||||
touch unicode_downloaded
|
|
||||||
|
@ -98,13 +98,17 @@ std::vector<int> BPETokenizer::encode(std::string text) const {
|
|||||||
auto one_step_merge = [this](std::string segment, std::vector<int> &splits) {
|
auto one_step_merge = [this](std::string segment, std::vector<int> &splits) {
|
||||||
int merge_idx;
|
int merge_idx;
|
||||||
int rank = INT32_MAX;
|
int rank = INT32_MAX;
|
||||||
|
std::string candidate;
|
||||||
for (int i = 0; i < splits.size() - 2; ++i) {
|
for (int i = 0; i < splits.size() - 2; ++i) {
|
||||||
auto start = splits[i];
|
auto start = splits[i];
|
||||||
auto mid = splits[i + 1];
|
auto mid = splits[i + 1];
|
||||||
auto end = splits[i + 2];
|
auto end = splits[i + 2];
|
||||||
std::string candidate = segment.substr(start, mid - start);
|
candidate.clear();
|
||||||
|
candidate.insert(candidate.end(), segment.begin() + start,
|
||||||
|
segment.begin() + mid);
|
||||||
candidate += " ";
|
candidate += " ";
|
||||||
candidate += segment.substr(mid, end - mid);
|
candidate.insert(candidate.end(), segment.begin() + mid,
|
||||||
|
segment.begin() + end);
|
||||||
if (auto it = merges_.find(candidate); it != merges_.end()) {
|
if (auto it = merges_.find(candidate); it != merges_.end()) {
|
||||||
if (it->second < rank) {
|
if (it->second < rank) {
|
||||||
merge_idx = i;
|
merge_idx = i;
|
||||||
|
Loading…
Reference in New Issue
Block a user