mirror of
				https://github.com/ml-explore/mlx-examples.git
				synced 2025-11-04 13:38:09 +08:00 
			
		
		
		
	Example of response generation with optional arguments (#853)
* Generate response with optional arguments * Reference response generation example * Include transformers and sentencepiece * Update example to run Mistral-7B-Instruct-v0.3 * Link to generation example * Style changes from pre-commit
This commit is contained in:
		
				
					committed by
					
						
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							68e88d42fb
						
					
				
				
					commit
					63800c8feb
				
			
							
								
								
									
										40
									
								
								llms/mlx_lm/examples/generate_response.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								llms/mlx_lm/examples/generate_response.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,40 @@
 | 
			
		||||
from mlx_lm import generate, load
 | 
			
		||||
 | 
			
		||||
# Specify the checkpoint
 | 
			
		||||
checkpoint = "mistralai/Mistral-7B-Instruct-v0.3"
 | 
			
		||||
 | 
			
		||||
# Load the corresponding model and tokenizer
 | 
			
		||||
model, tokenizer = load(path_or_hf_repo=checkpoint)
 | 
			
		||||
 | 
			
		||||
# Specify the prompt and conversation history
 | 
			
		||||
prompt = "Why is the sky blue?"
 | 
			
		||||
conversation = [{"role": "user", "content": prompt}]
 | 
			
		||||
 | 
			
		||||
# Transform the prompt into the chat template
 | 
			
		||||
prompt = tokenizer.apply_chat_template(
 | 
			
		||||
    conversation=conversation, tokenize=False, add_generation_prompt=True
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Specify the maximum number of tokens
 | 
			
		||||
max_tokens = 1_000
 | 
			
		||||
 | 
			
		||||
# Specify if tokens and timing information will be printed
 | 
			
		||||
verbose = True
 | 
			
		||||
 | 
			
		||||
# Some optional arguments for causal language model generation
 | 
			
		||||
generation_args = {
 | 
			
		||||
    "temp": 0.7,
 | 
			
		||||
    "repetition_penalty": 1.2,
 | 
			
		||||
    "repetition_context_size": 20,
 | 
			
		||||
    "top_p": 0.95,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Generate a response with the specified settings
 | 
			
		||||
response = generate(
 | 
			
		||||
    model=model,
 | 
			
		||||
    tokenizer=tokenizer,
 | 
			
		||||
    prompt=prompt,
 | 
			
		||||
    max_tokens=max_tokens,
 | 
			
		||||
    verbose=verbose,
 | 
			
		||||
    **generation_args,
 | 
			
		||||
)
 | 
			
		||||
		Reference in New Issue
	
	Block a user