mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-28 23:49:43 +08:00
comment
This commit is contained in:
parent
5e8f88d079
commit
26e2d97e27
@ -10,8 +10,6 @@ std::function<mx::Args(mx::Args)> load_model(const std::string &path);
|
||||
|
||||
BPETokenizer load_tokenizer(const std::string &path);
|
||||
|
||||
struct GenerationResponse {};
|
||||
|
||||
void generate(const std::function<mx::Args(mx::Args)> &model,
|
||||
const BPETokenizer &tokenizer, const std::string &prompt,
|
||||
int max_tokens = 256);
|
||||
|
10
llms/export/third_party/CMakeLists.txt
vendored
10
llms/export/third_party/CMakeLists.txt
vendored
@ -7,12 +7,10 @@ FetchContent_MakeAvailable(json)
|
||||
target_include_directories(
|
||||
mlxlm PRIVATE $<BUILD_INTERFACE:${json_SOURCE_DIR}/single_include/nlohmann>)
|
||||
|
||||
add_custom_target(unicode)
|
||||
add_custom_command(
|
||||
TARGET unicode
|
||||
PRE_BUILD
|
||||
COMMAND /bin/bash ${CMAKE_CURRENT_SOURCE_DIR}/download_unicode.sh)
|
||||
add_dependencies(mlxlm unicode)
|
||||
execute_process(
|
||||
COMMAND zsh "${CMAKE_CURRENT_SOURCE_DIR}/download_unicode.sh" "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
COMMAND_ERROR_IS_FATAL ANY
|
||||
)
|
||||
|
||||
target_sources(mlxlm
|
||||
PRIVATE
|
||||
|
4
llms/export/third_party/download_unicode.sh
vendored
4
llms/export/third_party/download_unicode.sh
vendored
@ -5,7 +5,5 @@ url=https://raw.githubusercontent.com/ggerganov/llama.cpp/${commit}/src/
|
||||
|
||||
for file in 'unicode.cpp' 'unicode.h' 'unicode-data.cpp' 'unicode-data.h'
|
||||
do
|
||||
curl -OL ${url}/${file}
|
||||
curl -OL ${url}/${file} --output-dir $1 2>/dev/null
|
||||
done
|
||||
|
||||
touch unicode_downloaded
|
||||
|
@ -98,13 +98,17 @@ std::vector<int> BPETokenizer::encode(std::string text) const {
|
||||
auto one_step_merge = [this](std::string segment, std::vector<int> &splits) {
|
||||
int merge_idx;
|
||||
int rank = INT32_MAX;
|
||||
std::string candidate;
|
||||
for (int i = 0; i < splits.size() - 2; ++i) {
|
||||
auto start = splits[i];
|
||||
auto mid = splits[i + 1];
|
||||
auto end = splits[i + 2];
|
||||
std::string candidate = segment.substr(start, mid - start);
|
||||
candidate.clear();
|
||||
candidate.insert(candidate.end(), segment.begin() + start,
|
||||
segment.begin() + mid);
|
||||
candidate += " ";
|
||||
candidate += segment.substr(mid, end - mid);
|
||||
candidate.insert(candidate.end(), segment.begin() + mid,
|
||||
segment.begin() + end);
|
||||
if (auto it = merges_.find(candidate); it != merges_.end()) {
|
||||
if (it->second < rank) {
|
||||
merge_idx = i;
|
||||
|
Loading…
Reference in New Issue
Block a user