mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-10-28 17:08:07 +08:00
Commit Graph
Select branches
Hide Pull Requests
awq
awq-tq
dist-eval
distributed-layers
flux-dist-improv
flux-qlora
load-gguf
main
openlm
packed-quants
#1
#10
#1001
#1003
#1004
#1006
#1009
#1012
#1013
#1014
#1015
#1016
#102
#1020
#1023
#1024
#1026
#1027
#1028
#103
#1035
#1036
#1037
#1038
#1040
#1045
#1047
#1048
#1048
#1049
#105
#1050
#1054
#1055
#106
#1061
#1062
#1063
#1065
#1068
#1069
#107
#1070
#1072
#1074
#1075
#1078
#1079
#108
#1080
#1081
#1082
#1085
#1089
#1090
#1092
#1093
#1094
#1096
#1099
#110
#1100
#1103
#1105
#1113
#1114
#1115
#1117
#1118
#1119
#112
#1121
#1122
#1125
#1128
#1129
#113
#1131
#1132
#1133
#1135
#1137
#1140
#1141
#1143
#1144
#1145
#1146
#1148
#1149
#115
#1152
#1153
#1154
#1155
#1156
#1157
#1158
#1159
#116
#1163
#1164
#1166
#1173
#1174
#1176
#1178
#1180
#1189
#1191
#1192
#1193
#1194
#1196
#1197
#1199
#12
#1200
#1202
#1204
#1205
#1206
#1208
#1209
#121
#1210
#1211
#1212
#1213
#1214
#1215
#1216
#1217
#1220
#1222
#1225
#1228
#1229
#123
#1230
#1230
#1231
#1231
#1233
#1234
#1235
#1240
#1241
#1242
#1246
#1249
#1250
#1251
#1253
#1257
#1259
#1260
#1263
#1265
#1267
#1270
#1271
#1272
#1273
#1276
#1277
#1278
#1279
#1280
#1283
#1287
#129
#1291
#1294
#1295
#1297
#1298
#1299
#1300
#1301
#1302
#1304
#1305
#1308
#1311
#1312
#1314
#1315
#1316
#1319
#1321
#1322
#1323
#1324
#1325
#1326
#133
#1330
#1331
#1332
#1336
#1338
#1339
#134
#1340
#1342
#1346
#1348
#1349
#1350
#1353
#1358
#1359
#136
#1364
#1365
#1367
#1370
#1371
#1375
#1375
#1377
#1377
#1383
#1383
#1385
#1386
#1387
#1388
#1389
#1390
#14
#140
#141
#144
#145
#147
#149
#151
#152
#153
#154
#157
#158
#159
#160
#161
#162
#163
#164
#165
#166
#167
#169
#171
#172
#173
#174
#176
#177
#178
#180
#183
#184
#186
#187
#189
#19
#190
#191
#192
#193
#195
#197
#198
#2
#200
#201
#202
#203
#205
#208
#211
#213
#214
#215
#219
#22
#221
#222
#227
#229
#23
#231
#234
#235
#237
#238
#24
#240
#241
#242
#243
#245
#248
#250
#251
#252
#253
#254
#255
#257
#260
#263
#264
#265
#266
#269
#27
#270
#271
#272
#274
#275
#276
#278
#282
#284
#285
#287
#290
#291
#292
#293
#294
#295
#30
#300
#301
#302
#303
#306
#307
#308
#309
#310
#311
#312
#315
#318
#319
#32
#320
#321
#325
#326
#33
#331
#333
#335
#337
#338
#340
#342
#343
#347
#350
#351
#352
#353
#354
#358
#36
#360
#361
#363
#364
#365
#366
#369
#37
#373
#375
#377
#378
#379
#380
#386
#387
#388
#389
#391
#392
#393
#396
#397
#398
#399
#40
#401
#405
#408
#409
#41
#41
#411
#413
#414
#415
#417
#418
#419
#42
#420
#421
#424
#426
#427
#429
#43
#431
#432
#433
#439
#441
#443
#445
#446
#449
#45
#450
#451
#453
#455
#457
#458
#461
#462
#466
#467
#468
#469
#47
#470
#471
#472
#474
#475
#479
#48
#482
#483
#483
#486
#489
#491
#494
#495
#496
#497
#498
#5
#50
#501
#502
#503
#505
#506
#509
#51
#510
#514
#515
#516
#518
#519
#52
#520
#521
#522
#523
#528
#53
#530
#531
#534
#536
#539
#541
#544
#545
#546
#547
#548
#549
#55
#552
#555
#558
#562
#563
#565
#566
#569
#570
#571
#572
#573
#574
#577
#578
#580
#581
#585
#589
#590
#591
#592
#595
#596
#599
#602
#603
#604
#608
#609
#610
#611
#613
#614
#62
#621
#623
#628
#632
#633
#634
#636
#639
#64
#640
#643
#644
#645
#648
#650
#651
#654
#657
#66
#661
#665
#666
#667
#668
#670
#673
#674
#675
#676
#679
#680
#681
#682
#683
#684
#685
#687
#688
#69
#690
#691
#693
#694
#697
#698
#7
#701
#702
#703
#705
#707
#708
#711
#712
#715
#716
#717
#719
#720
#721
#729
#73
#731
#735
#736
#739
#74
#740
#743
#744
#746
#749
#75
#752
#753
#758
#76
#760
#763
#766
#77
#770
#773
#775
#778
#779
#78
#782
#789
#79
#790
#792
#793
#794
#797
#798
#8
#80
#800
#802
#803
#806
#807
#810
#813
#817
#818
#82
#821
#822
#824
#825
#827
#828
#830
#831
#833
#835
#837
#838
#839
#84
#840
#85
#851
#852
#853
#855
#856
#857
#86
#863
#867
#87
#871
#877
#879
#88
#882
#885
#886
#888
#889
#89
#890
#891
#895
#898
#899
#90
#902
#903
#904
#905
#906
#907
#911
#913
#914
#915
#915
#920
#923
#926
#93
#931
#932
#935
#936
#937
#94
#940
#942
#945
#946
#948
#949
#954
#955
#956
#957
#96
#960
#961
#962
#963
#965
#969
#97
#971
#971
#973
#979
#98
#981
#983
#984
#989
#99
#990
#991
#993
#995
#996
#998
Select branches
Hide Pull Requests
awq
awq-tq
dist-eval
distributed-layers
flux-dist-improv
flux-qlora
load-gguf
main
openlm
packed-quants
#1
#10
#1001
#1003
#1004
#1006
#1009
#1012
#1013
#1014
#1015
#1016
#102
#1020
#1023
#1024
#1026
#1027
#1028
#103
#1035
#1036
#1037
#1038
#1040
#1045
#1047
#1048
#1048
#1049
#105
#1050
#1054
#1055
#106
#1061
#1062
#1063
#1065
#1068
#1069
#107
#1070
#1072
#1074
#1075
#1078
#1079
#108
#1080
#1081
#1082
#1085
#1089
#1090
#1092
#1093
#1094
#1096
#1099
#110
#1100
#1103
#1105
#1113
#1114
#1115
#1117
#1118
#1119
#112
#1121
#1122
#1125
#1128
#1129
#113
#1131
#1132
#1133
#1135
#1137
#1140
#1141
#1143
#1144
#1145
#1146
#1148
#1149
#115
#1152
#1153
#1154
#1155
#1156
#1157
#1158
#1159
#116
#1163
#1164
#1166
#1173
#1174
#1176
#1178
#1180
#1189
#1191
#1192
#1193
#1194
#1196
#1197
#1199
#12
#1200
#1202
#1204
#1205
#1206
#1208
#1209
#121
#1210
#1211
#1212
#1213
#1214
#1215
#1216
#1217
#1220
#1222
#1225
#1228
#1229
#123
#1230
#1230
#1231
#1231
#1233
#1234
#1235
#1240
#1241
#1242
#1246
#1249
#1250
#1251
#1253
#1257
#1259
#1260
#1263
#1265
#1267
#1270
#1271
#1272
#1273
#1276
#1277
#1278
#1279
#1280
#1283
#1287
#129
#1291
#1294
#1295
#1297
#1298
#1299
#1300
#1301
#1302
#1304
#1305
#1308
#1311
#1312
#1314
#1315
#1316
#1319
#1321
#1322
#1323
#1324
#1325
#1326
#133
#1330
#1331
#1332
#1336
#1338
#1339
#134
#1340
#1342
#1346
#1348
#1349
#1350
#1353
#1358
#1359
#136
#1364
#1365
#1367
#1370
#1371
#1375
#1375
#1377
#1377
#1383
#1383
#1385
#1386
#1387
#1388
#1389
#1390
#14
#140
#141
#144
#145
#147
#149
#151
#152
#153
#154
#157
#158
#159
#160
#161
#162
#163
#164
#165
#166
#167
#169
#171
#172
#173
#174
#176
#177
#178
#180
#183
#184
#186
#187
#189
#19
#190
#191
#192
#193
#195
#197
#198
#2
#200
#201
#202
#203
#205
#208
#211
#213
#214
#215
#219
#22
#221
#222
#227
#229
#23
#231
#234
#235
#237
#238
#24
#240
#241
#242
#243
#245
#248
#250
#251
#252
#253
#254
#255
#257
#260
#263
#264
#265
#266
#269
#27
#270
#271
#272
#274
#275
#276
#278
#282
#284
#285
#287
#290
#291
#292
#293
#294
#295
#30
#300
#301
#302
#303
#306
#307
#308
#309
#310
#311
#312
#315
#318
#319
#32
#320
#321
#325
#326
#33
#331
#333
#335
#337
#338
#340
#342
#343
#347
#350
#351
#352
#353
#354
#358
#36
#360
#361
#363
#364
#365
#366
#369
#37
#373
#375
#377
#378
#379
#380
#386
#387
#388
#389
#391
#392
#393
#396
#397
#398
#399
#40
#401
#405
#408
#409
#41
#41
#411
#413
#414
#415
#417
#418
#419
#42
#420
#421
#424
#426
#427
#429
#43
#431
#432
#433
#439
#441
#443
#445
#446
#449
#45
#450
#451
#453
#455
#457
#458
#461
#462
#466
#467
#468
#469
#47
#470
#471
#472
#474
#475
#479
#48
#482
#483
#483
#486
#489
#491
#494
#495
#496
#497
#498
#5
#50
#501
#502
#503
#505
#506
#509
#51
#510
#514
#515
#516
#518
#519
#52
#520
#521
#522
#523
#528
#53
#530
#531
#534
#536
#539
#541
#544
#545
#546
#547
#548
#549
#55
#552
#555
#558
#562
#563
#565
#566
#569
#570
#571
#572
#573
#574
#577
#578
#580
#581
#585
#589
#590
#591
#592
#595
#596
#599
#602
#603
#604
#608
#609
#610
#611
#613
#614
#62
#621
#623
#628
#632
#633
#634
#636
#639
#64
#640
#643
#644
#645
#648
#650
#651
#654
#657
#66
#661
#665
#666
#667
#668
#670
#673
#674
#675
#676
#679
#680
#681
#682
#683
#684
#685
#687
#688
#69
#690
#691
#693
#694
#697
#698
#7
#701
#702
#703
#705
#707
#708
#711
#712
#715
#716
#717
#719
#720
#721
#729
#73
#731
#735
#736
#739
#74
#740
#743
#744
#746
#749
#75
#752
#753
#758
#76
#760
#763
#766
#77
#770
#773
#775
#778
#779
#78
#782
#789
#79
#790
#792
#793
#794
#797
#798
#8
#80
#800
#802
#803
#806
#807
#810
#813
#817
#818
#82
#821
#822
#824
#825
#827
#828
#830
#831
#833
#835
#837
#838
#839
#84
#840
#85
#851
#852
#853
#855
#856
#857
#86
#863
#867
#87
#871
#877
#879
#88
#882
#885
#886
#888
#889
#89
#890
#891
#895
#898
#899
#90
#902
#903
#904
#905
#906
#907
#911
#913
#914
#915
#915
#920
#923
#926
#93
#931
#932
#935
#936
#937
#94
#940
#942
#945
#946
#948
#949
#954
#955
#956
#957
#96
#960
#961
#962
#963
#965
#969
#97
#971
#971
#973
#979
#98
#981
#983
#984
#989
#99
#990
#991
#993
#995
#996
#998
-
21a4d4cdab
Update whisper command line help mentioning --word-timestamps (#1390)
main
Armin Stross-Radschinski
2025-10-07 20:19:46 +02:00 -
8e4391ca21
whisper nits (#1388)
Awni Hannun
2025-09-03 13:18:50 -07:00 -
c1af8c46bd
version (#1387)
Awni Hannun
2025-08-29 08:03:52 -07:00 -
f143957a06
switch quantized and non-quantized to be correct (#1385)
Awni Hannun
2025-08-29 07:53:44 -07:00 -
cfc5d25acd
fix temperature based sampling (#1386)
Awni Hannun
2025-08-29 07:53:37 -07:00 -
4b2a0df237
adding wwdc25 samples (#1370)
Shashank
2025-06-10 10:23:25 -07:00 -
977cd30242
Update lora README.md (#1365)
Denrei Keith
2025-05-01 21:00:14 +08:00 -
4c9f9f9be7
Made llama and mistral files mypy compatible (#1359)
Param Thakkar
2025-04-24 02:53:46 +05:30 -
c52cc748f8
Distributed FLUX (#1325)
Angelos Katharopoulos
2025-03-24 22:16:48 -07:00 -
c243370044
remove mlx lm (#1353)
Awni Hannun
2025-03-18 18:47:55 -07:00 -
7ca05d2e51
LoRa/README.md should be --hf-path instead of --hf-repo (#1350)
Tingzhen
2025-03-16 23:02:52 -04:00 -
d9e1d9c0ef
mlx-lm move notice (#1346)
Awni Hannun
2025-03-16 15:14:28 -07:00 -
2fce02acd8
Add support for Gemma3 (#1336)
Prince Canuma
2025-03-13 16:14:25 +01:00 -
3e5baf583b
Make sure to use UTF-8 when loading tokenizer.json (#1340)
Mirko Nasato
2025-03-13 02:17:14 +00:00 -
4c3df00162
make_samplercreates sampler chain with all sampling parameters (#1330)
Neil Mehta
2025-03-11 16:37:35 -04:00 -
d2e02b3aae
fix mixed quant option (#1326)
Awni Hannun
2025-03-07 08:35:48 -08:00 -
595f5da146
remove lm head if unused (#1324)
Awni Hannun
2025-03-06 15:35:47 -08:00 -
877d2a345b
Change DEFAULT_SEED to None for stochastic generation by default (#1323)
cavit99
2025-03-06 14:49:35 +00:00 -
32d10036de
fix flaky test (#1322)
Awni Hannun
2025-03-05 14:00:09 -08:00 -
e150621095
Adding multiple optimizers to mlx lm (#1315)
Gökdeniz Gülmez
2025-03-05 22:54:54 +01:00 -
56d2db23e1
adding OLMoE architecture (#1321)
Gökdeniz Gülmez
2025-03-05 22:46:06 +01:00 -
e7267d30f8
Distributed support cifar (#1301)
Angelos Katharopoulos
2025-03-05 13:33:15 -08:00 -
f621218ff5
Tool use example (#1316)
Awni Hannun
2025-03-04 13:53:20 -08:00 -
65aa2ec849
use a bool mask for attention (#1319)
Awni Hannun
2025-03-04 12:47:32 -08:00 -
1bc3476a46
chore(lora): Add real-time log buffering fix for nohup execution (#1311)
Pierre-Louis
2025-03-03 09:12:33 -05:00 -
269faa5fa4
Fix plamo2 model to use rms_norm (#1308)
Shunta Saito
2025-03-03 23:12:02 +09:00 -
845cd8c01e
support kimi + more options in chat mode (#1312)
Awni Hannun
2025-02-28 11:33:18 -08:00 -
b2108a0de6
Allow mask prompt in config (#1314)
Awni Hannun
2025-02-28 11:33:04 -08:00 -
eb73549631
Generate: Support Prefill Response (#1299)
madroid
2025-02-27 23:44:00 +08:00 -
00a7379070
Fixes for phi4 mini (#1305)
Awni Hannun
2025-02-26 16:21:54 -08:00 -
0f240a4c7e
Use max tokens from options in mlx_lm evaluate (#1302)
Awni Hannun
2025-02-26 15:46:16 -08:00 -
56e60ad5a6
fix manage for new transformers (#1304)
Awni Hannun
2025-02-26 15:44:57 -08:00 -
b7f742ef56
Mixed quant recipes (#1300)
Pedro Cuenca
2025-02-26 20:32:36 +01:00 -
c37e26a1a3
Add plamo-2-1b model (#1283)
Shunta Saito
2025-02-25 12:24:43 +09:00 -
09b641aaa7
Fix FutureWarning in torch.load by setting weights_only=True (#1295)
Usama Ahmed
2025-02-22 17:08:54 +03:00 -
3d793ecf68
Fix logits processor bugs with spec dec (#1291)
Awni Hannun
2025-02-20 15:55:55 -08:00 -
85669451d0
Fix num layers in fine tune (#1294)
Awni Hannun
2025-02-20 13:32:01 -08:00 -
1cbf5cdac7
use more standard window strategy (#1287)
Awni Hannun
2025-02-19 06:22:51 -08:00 -
96bf37008e
Update README.md to include how to set temperature (#1280)
Matthias Neumayer
2025-02-14 04:32:56 +01:00 -
7b07b14e67
add logits processor to spec gen (#1260)
Awni Hannun
2025-02-13 19:19:53 -08:00 -
ec30dc3538
hunyuan finetune (#1270)
Awni Hannun
2025-02-11 16:49:35 -08:00 -
42413c5d85
fix lora timings after validation (#1278)
Awni Hannun
2025-02-11 16:48:55 -08:00 -
f8cbf159e0
fix sharding for more even number of layers (#1276)
Awni Hannun
2025-02-11 16:26:59 -08:00 -
e879ea70e1
fix generation evaluations (#1277)
Awni Hannun
2025-02-11 16:10:30 -08:00 -
3d677f0870
Add "from_draft" to GenerationResponse (#1272)
Matt Clayton
2025-02-11 18:41:02 -05:00 -
bded1a8fcd
fix looping in whisper (#1273)
Awni Hannun
2025-02-10 13:04:35 -08:00 -
5865899c81
Completion only fine-tuning of instruction models with collections of HF datasets (#1103)
Chime Ogbuji
2025-02-09 23:12:34 -05:00 -
1ced1b00ca
rm temp argument (#1267)
Sri Harsha Pamu
2025-02-09 11:39:11 -08:00 -
f58c7de901
Some improvements to speedup alignment computation in MLX Whisper (#1259)
Awni Hannun
2025-02-08 15:47:00 -08:00 -
1503bd4f55
support hunyuan 7b (#1263)
Awni Hannun
2025-02-08 15:46:47 -08:00 -
31611b62d7
Add IBM granite model (#1265)
Awni Hannun
2025-02-08 15:46:15 -08:00 -
6120a5f376
Faster DSv2/3 expert score computation (#1257)
Awni Hannun
2025-02-07 10:24:57 -08:00 -
52c41b5b5a
Fix prompt cache for models without chat template (#1250)
Awni Hannun
2025-02-06 11:10:58 -08:00 -
747c08e202
Chore: pre-commit bump (#1253)
Nripesh Niketan
2025-02-06 17:06:31 +00:00 -
65b792d7c0
fix lazy load
distributed-layers
Awni Hannun
2025-01-14 13:14:48 -08:00 -
617f9289b9
Make the chat distributed
Angelos Katharopoulos
2024-11-05 13:09:34 -08:00 -
026362e0f8
Remove async eval and add sequential load
Angelos Katharopoulos
2024-11-05 13:04:07 -08:00 -
a0ce0594f6
Temporarily remove async_eval
Angelos Katharopoulos
2024-08-29 10:33:43 -07:00 -
d77840207c
Start distributed inference for llama models
Angelos Katharopoulos
2024-07-15 13:24:50 -07:00 -
e2e5478da5
READMEs: fix typo in link, minor update. (#1246)
Pedro Cuenca
2025-02-04 20:52:32 +01:00 -
21d0ab6e8a
fix deepseek sharding (#1242)
Awni Hannun
2025-02-03 16:59:50 -08:00 -
0989c073b0
Optimizations for mamba1 (#1213)
Gökdeniz Gülmez
2025-02-03 22:36:08 +01:00 -
d9924d08d1
Fix no validation in lora (#1241)
Awni Hannun
2025-02-03 09:55:24 -08:00 -
9c2ef38d4d
only download local shard (#1240)
Awni Hannun
2025-02-02 13:58:44 -08:00 -
e8afb59de4
better overflow correction (#1229)
Awni Hannun
2025-01-28 14:37:30 -08:00 -
7a83077cd7
chore(mlx-lm): support text type content in messages (#1225)
Anchen
2025-01-28 12:13:50 +11:00 -
f44a52e2dc
batched min p and fix spec gen sampling (#1222)
Awni Hannun
2025-01-27 15:40:31 -08:00 -
77faa14ba4
adding support for kyutai's helium (#1208)
Gökdeniz Gülmez
2025-01-26 16:19:07 +01:00 -
f787c08585
comments
dist-eval
Alex Barron
2025-01-23 06:36:31 -08:00 -
d5f49d65b9
ordering
Alex Barron
2024-12-19 00:08:28 -08:00 -
4385363c0f
distributed evaluate
Alex Barron
2024-12-18 22:12:08 -08:00 -
9a3ddc3e65
some fixes for pipeline parallel deep seek r1 (#1216)
Awni Hannun
2025-01-21 19:40:29 -08:00 -
df1406735b
Fix dataset variable name, in
datasets.py(#1212)
Victor Nogueira
2025-01-21 23:12:43 +01:00 -
07f88f8057
fix(lora): add back store_true default args (#1205)
Jarrett
2025-01-16 12:15:42 -07:00 -
50f0a7f6d9
add internlm3 (#1206)
Awni Hannun
2025-01-15 14:55:41 -08:00 -
6ae6c72c2e
reduction moved to CPU in case of distributed training (#1200)
Ivan Fioravanti
2025-01-15 02:20:42 +01:00 -
b9eff0d744
Improve printing for FLUX distributed training
flux-dist-improv
Angelos Katharopoulos
2025-01-13 22:32:35 -08:00 -
c117af83b8
fix gpt bigcode (#1204)
Awni Hannun
2025-01-13 10:22:32 -08:00 -
0228c46434
Custom local dataset features (#1085)
Chime Ogbuji
2025-01-13 13:01:18 -05:00 -
bf2da36fc6
Fix Cohere2: mask shape error (long context) (#1202)
Prince Canuma
2025-01-12 21:58:08 +01:00 -
514502da22
Support snapshot_download for ModelScope (#1194)
Xingjun.Wang
2025-01-11 07:29:34 +08:00 -
93c5cfd781
Add a speculative decoding generator (#1155)
Awni Hannun
2025-01-10 15:27:08 -08:00 -
5cae0a60e6
deepseek v3 model with pipeline parallelism (#1191)
Awni Hannun
2025-01-09 15:55:53 -08:00 -
40b88eff48
fix(lora): config yaml & arg default merge bug (#1196)
Jarrett
2025-01-09 12:33:54 -07:00 -
b8f0cacfa8
Use upload_large_folder (#1193)
Pedro Cuenca
2025-01-07 18:18:31 +01:00 -
9183fe8b6d
fix (#1192)
Awni Hannun
2025-01-06 10:12:07 -08:00 -
f2619f507c
Add support for fewshot and apply chat template lm_eval functionality (#1180)
Chime Ogbuji
2025-01-06 10:58:43 -05:00 -
25ec2d8c44
Change the eos-token argument for mlx_lm.generate (#1176)
Angelos Katharopoulos
2025-01-05 22:26:05 -08:00 -
c4833a2f55
fix encoding with special tokens + chat template (#1189)
Awni Hannun
2025-01-03 10:50:59 -08:00 -
3a58c36109
Improvements to mlx_lm.manage (#1178)
Ivan Fioravanti
2025-01-01 16:25:57 +01:00 -
ae53ed9090
add TesseraQ rounding
awq-tq
Alex Barron
2024-12-19 19:35:26 -08:00 -
fc81342afe
remove comment
awq
Alex Barron
2024-12-19 19:14:02 -08:00 -
77d75f3ccc
Add learned AWQ quantization
Alex Barron
2024-12-19 19:13:22 -08:00 -
d4ef909d4a
Length masking for batch inputs (#1173)
Alex Barron
2024-12-18 19:43:52 -08:00 -
db109184b7
Fix no template prompt + top_k sampling (#1166)
Awni Hannun
2024-12-18 18:46:50 -08:00 -
bc08025f41
Add optional quantization types
packed-quants
Angelos Katharopoulos
2024-12-17 22:24:41 -08:00 -
845efddc8c
Fix decoding manually added tokens (#1164)
Billel Mokeddem
2024-12-17 21:54:29 +04:00 -
dfa4dd6c93
Add support for cohere2 (#1157)
Prince Canuma
2024-12-16 17:01:03 +01:00 -
fc0674d2d8
chore: update evaluate.py (#1159)
Ikko Eltociear Ashimine
2024-12-15 23:06:29 +09:00 -
9f2ea5892e
Bpe stream without space (#1154)
Awni Hannun
2024-12-12 13:13:50 -08:00