mlx-examples/.circleci/config.yml
Chime Ogbuji f2619f507c
Add support for fewshot and apply chat template lm_eval functionality (#1180)
* Add support for multiturn fewshot examples and chat templates

Added two new arguments to the evaluation script: `--fewshot-as-multiturn` and `--apply-chat-template` which correspond to lm_eval options of similar names and are very often used to ensure apples-to-apples comparisons of lm_evaluation results

* Add HF overrides for methods needed by added options

* don't add duplicate bos

---------

Co-authored-by: Awni Hannun <awni@apple.com>
2025-01-06 07:58:43 -08:00

68 lines
1.6 KiB
YAML

version: 2.1
orbs:
apple: ml-explore/pr-approval@0.1.0
jobs:
linux_build_and_test:
docker:
- image: cimg/python:3.9
steps:
- checkout
- run:
name: Run style checks
command: |
pip install pre-commit
pre-commit run --all
if ! git diff --quiet; then echo 'Style checks failed, please install pre-commit and run pre-commit run --all and push the change'; exit 1; fi
mlx_lm_build_and_test:
macos:
xcode: "15.2.0"
resource_class: macos.m1.large.gen1
steps:
- checkout
- run:
name: Install dependencies
command: |
brew install python@3.9
python3.9 -m venv env
source env/bin/activate
pip install --upgrade pip
pip install unittest-xml-reporting
cd llms/
pip install -e ".[test]"
- run:
name: Run Python tests
command: |
source env/bin/activate
python -m xmlrunner discover -v llms/tests -o test-results/
- store_test_results:
path: test-results
workflows:
build_and_test:
when:
matches:
pattern: "^(?!pull/)[-\\w]+$"
value: << pipeline.git.branch >>
jobs:
- mlx_lm_build_and_test
- linux_build_and_test
prb:
when:
matches:
pattern: "^pull/\\d+(/head)?$"
value: << pipeline.git.branch >>
jobs:
- hold:
type: approval
- apple/authenticate:
context: pr-approval
- mlx_lm_build_and_test:
requires: [ hold ]
- linux_build_and_test:
requires: [ hold ]