From 9fd017e1d0fd58020023636cd83c8072972aa786 Mon Sep 17 00:00:00 2001 From: Brian DeRenzi Date: Wed, 7 Feb 2024 12:37:06 +0000 Subject: [PATCH] Use messages API and system prompt Thanks to @9j7axvsLuF for some sample code in the original issue https://github.com/tomviner/llm-claude/issues/6 --- llm_claude/__init__.py | 80 ++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/llm_claude/__init__.py b/llm_claude/__init__.py index 00a74c6..ffd4ab9 100644 --- a/llm_claude/__init__.py +++ b/llm_claude/__init__.py @@ -2,19 +2,14 @@ import click import llm -from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic +from anthropic import Anthropic from pydantic import Field, field_validator - @llm.hookimpl def register_models(register): - # https://docs.anthropic.com/claude/reference/selecting-a-model - # Family Latest major version Latest full version - # Claude Instant claude-instant-1 claude-instant-1.1 - # Claude claude-2 claude-2.0 - register(Claude("claude-instant-1"), aliases=("claude-instant",)) - register(Claude("claude-2"), aliases=("claude",)) - + # Registering models as per the latest naming conventions + register(Claude("claude-instant-1.2"), aliases=("claude-instant",)) + register(Claude("claude-2.1"), aliases=("claude",)) class Claude(llm.Model): needs_key = "claude" @@ -22,46 +17,61 @@ class Claude(llm.Model): can_stream = True class Options(llm.Options): - max_tokens_to_sample: Optional[int] = Field( - description="The maximum number of tokens to generate before stopping", - default=10_000, + max_tokens: Optional[int] = Field( + description="The maximum number of tokens for the model to generate", + default=4096, # Adjusted to the maximum allowed for claude-2.1 ) - @field_validator("max_tokens_to_sample") - def validate_length(cls, max_tokens_to_sample): - if not (0 < max_tokens_to_sample <= 1_000_000): - raise ValueError("max_tokens_to_sample must be in range 1-1,000,000") - return max_tokens_to_sample + @field_validator("max_tokens") + def validate_max_tokens(cls, max_tokens): + if not (0 < max_tokens <= 4096): # Updated maximum limit + raise ValueError("max_tokens must be in range 1-4096 for claude-2.1") + return max_tokens def __init__(self, model_id): self.model_id = model_id def generate_prompt_messages(self, prompt, conversation): - if conversation: - for response in conversation.responses: - yield self.build_prompt(response.prompt.prompt, response.text()) - - yield self.build_prompt(prompt) - - def build_prompt(self, human, ai=""): - return f"{HUMAN_PROMPT} {human}{AI_PROMPT}{ai}" + # Generate a list of message dictionaries based on conversation history + messages = [] + current_system = None + if conversation is not None: + for prev_response in conversation.responses: + if (prev_response.prompt.system and prev_response.prompt.system != current_system): + current_system = prev_response.prompt.system + messages.append({"role": "user", "content": prev_response.prompt.prompt}) + messages.append({"role": "assistant", "content": prev_response.text()}) + if prompt.system and prompt.system != current_system: + current_system = prompt.system + messages.append({"role": "user", "content": prompt.prompt}) + + return messages, current_system def execute(self, prompt, stream, response, conversation): anthropic = Anthropic(api_key=self.get_key()) - prompt_str = "".join(self.generate_prompt_messages(prompt.prompt, conversation)) + messages, system_prompt = self.generate_prompt_messages(prompt, conversation) - completion = anthropic.completions.create( - model=self.model_id, - max_tokens_to_sample=prompt.options.max_tokens_to_sample, - prompt=prompt_str, - stream=stream, - ) if stream: - for comp in completion: - yield comp.completion + # Handling streaming responses + with anthropic.beta.messages.stream( + max_tokens=prompt.options.max_tokens, + messages=messages, + system=system_prompt, + model=self.model_id + ) as stream_response: + for text in stream_response.text_stream: + yield text else: - yield completion.completion + # Handling non-streaming response + message_response = anthropic.beta.messages.create( + model=self.model_id, + max_tokens=prompt.options.max_tokens, + messages=messages, + system=system_prompt + ) + # Concatenating text from content blocks + yield "".join(content_block['text'] for content_block in message_response.content) def __str__(self): return "Anthropic: {}".format(self.model_id)