From 1a88e0012507862f02f873416d03c0def7bab9bf Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 10:38:08 -0500 Subject: [PATCH 1/9] update model used in tests --- .../anthropic/tests/integration_tests/test_chat_models.py | 2 +- .../partners/anthropic/tests/integration_tests/test_standard.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index 8a6e3c42a9a42..e0f4fd67dd6f1 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -24,7 +24,7 @@ from langchain_anthropic import ChatAnthropic, ChatAnthropicMessages from tests.unit_tests._utils import FakeCallbackHandler -MODEL_NAME = "claude-3-5-sonnet-20240620" +MODEL_NAME = "claude-3-5-haiku-latest" def test_stream() -> None: diff --git a/libs/partners/anthropic/tests/integration_tests/test_standard.py b/libs/partners/anthropic/tests/integration_tests/test_standard.py index dc69c1dda4034..7079951f45d66 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_standard.py +++ b/libs/partners/anthropic/tests/integration_tests/test_standard.py @@ -19,7 +19,7 @@ def chat_model_class(self) -> Type[BaseChatModel]: @property def chat_model_params(self) -> dict: - return {"model": "claude-3-haiku-20240307"} + return {"model": "claude-3-5-haiku-latest"} @property def supports_image_inputs(self) -> bool: From 157da4529fb61af4c0003d65d2137eb2e8aca6fb Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 10:52:33 -0500 Subject: [PATCH 2/9] update --- .../anthropic/tests/integration_tests/test_chat_models.py | 5 +++-- .../anthropic/tests/integration_tests/test_standard.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index e0f4fd67dd6f1..3fa6d5dd85795 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -25,6 +25,7 @@ from tests.unit_tests._utils import FakeCallbackHandler MODEL_NAME = "claude-3-5-haiku-latest" +IMAGE_MODEL_NAME = "claude-3-5-sonnet-latest" def test_stream() -> None: @@ -318,7 +319,7 @@ async def test_anthropic_async_streaming_callback() -> None: def test_anthropic_multimodal() -> None: """Test that multimodal inputs are handled correctly.""" - chat = ChatAnthropic(model=MODEL_NAME) + chat = ChatAnthropic(model=IMAGE_MODEL_NAME) messages: list[BaseMessage] = [ HumanMessage( content=[ @@ -602,7 +603,7 @@ def test_pdf_document_input() -> None: url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" data = b64encode(requests.get(url).content).decode() - result = ChatAnthropic(model=MODEL_NAME).invoke( + result = ChatAnthropic(model=IMAGE_MODEL_NAME).invoke( [ HumanMessage( [ diff --git a/libs/partners/anthropic/tests/integration_tests/test_standard.py b/libs/partners/anthropic/tests/integration_tests/test_standard.py index 7079951f45d66..c4f3805ae97ed 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_standard.py +++ b/libs/partners/anthropic/tests/integration_tests/test_standard.py @@ -19,7 +19,7 @@ def chat_model_class(self) -> Type[BaseChatModel]: @property def chat_model_params(self) -> dict: - return {"model": "claude-3-5-haiku-latest"} + return {"model": "claude-3-5-sonnet-latest"} @property def supports_image_inputs(self) -> bool: From 3bb132ca3fd76d8e67f92029cb50b004fda97d93 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 13:51:20 -0500 Subject: [PATCH 3/9] update --- .../anthropic/tests/integration_tests/test_chat_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index 3fa6d5dd85795..d07ebe887acf6 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -114,7 +114,7 @@ async def test_astream() -> None: # Check expected raw API output async_client = model._async_client params: dict = { - "model": "claude-3-haiku-20240307", + "model": MODEL_NAME, "max_tokens": 1024, "messages": [{"role": "user", "content": "hi"}], "temperature": 0.0, From d33ae183fb885a8d46dc52accd8dcee82948316b Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 13:58:27 -0500 Subject: [PATCH 4/9] update --- libs/partners/anthropic/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/partners/anthropic/Makefile b/libs/partners/anthropic/Makefile index b7b3ca1eb2dfb..57d217f3f2145 100644 --- a/libs/partners/anthropic/Makefile +++ b/libs/partners/anthropic/Makefile @@ -11,10 +11,10 @@ TEST_FILE ?= tests/unit_tests/ integration_test integration_tests: TEST_FILE=tests/integration_tests/ test tests: - uv run --group test pytest -vvv --timeout 30 --disable-socket --allow-unix-socket $(TEST_FILE) + uv run --group test pytest -vvv --disable-socket --allow-unix-socket $(TEST_FILE) integration_test integration_tests: - uv run --group test --group test_integration pytest -vvv --timeout 30 $(TEST_FILE) + uv run --group test --group test_integration pytest -vvv $(TEST_FILE) test_watch: uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE) From 6b30a11ee58b3d41579d903008fab76c6e771fe2 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 14:03:22 -0500 Subject: [PATCH 5/9] revert --- libs/partners/anthropic/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/partners/anthropic/Makefile b/libs/partners/anthropic/Makefile index 57d217f3f2145..136c4debc853c 100644 --- a/libs/partners/anthropic/Makefile +++ b/libs/partners/anthropic/Makefile @@ -14,7 +14,7 @@ test tests: uv run --group test pytest -vvv --disable-socket --allow-unix-socket $(TEST_FILE) integration_test integration_tests: - uv run --group test --group test_integration pytest -vvv $(TEST_FILE) + uv run --group test --group test_integration pytest -vvv --timeout 30 $(TEST_FILE) test_watch: uv run --group test ptw --snapshot-update --now . -- -vv $(TEST_FILE) From a697d6aed764b44a35d2ef7fd13a167628196b93 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 14:03:31 -0500 Subject: [PATCH 6/9] update --- .../integration_tests/test_chat_models.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index d07ebe887acf6..da9b89456b820 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -112,25 +112,25 @@ async def test_astream() -> None: assert token.usage_metadata is None # Check expected raw API output - async_client = model._async_client - params: dict = { - "model": MODEL_NAME, - "max_tokens": 1024, - "messages": [{"role": "user", "content": "hi"}], - "temperature": 0.0, - } - stream = await async_client.messages.create(**params, stream=True) - async for event in stream: - if event.type == "message_start": - assert event.message.usage.input_tokens > 1 - # Note: this single output token included in message start event - # does not appear to contribute to overall output token counts. It - # is excluded from the total token count. - assert event.message.usage.output_tokens == 1 - elif event.type == "message_delta": - assert event.usage.output_tokens > 1 - else: - pass + # async_client = model._async_client + # params: dict = { + # "model": MODEL_NAME, + # "max_tokens": 1024, + # "messages": [{"role": "user", "content": "hi"}], + # "temperature": 0.0, + # } + # stream = await async_client.messages.create(**params, stream=True) + # async for event in stream: + # if event.type == "message_start": + # assert event.message.usage.input_tokens > 1 + # # Note: this single output token included in message start event + # # does not appear to contribute to overall output token counts. It + # # is excluded from the total token count. + # assert event.message.usage.output_tokens == 1 + # elif event.type == "message_delta": + # assert event.usage.output_tokens > 1 + # else: + # pass async def test_abatch() -> None: From 07214e804ee7eba12de2e8d15b4ab1ac0b1edda2 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 14:06:41 -0500 Subject: [PATCH 7/9] x --- .../integration_tests/test_chat_models.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index da9b89456b820..a4c3eaf5f5a99 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -100,16 +100,16 @@ async def test_astream() -> None: assert "stop_sequence" in full.response_metadata # test usage metadata can be excluded - model = ChatAnthropic(model_name=MODEL_NAME, stream_usage=False) # type: ignore[call-arg] - async for token in model.astream("hi"): - assert isinstance(token, AIMessageChunk) - assert token.usage_metadata is None - # check we override with kwarg - model = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg] - assert model.stream_usage - async for token in model.astream("hi", stream_usage=False): - assert isinstance(token, AIMessageChunk) - assert token.usage_metadata is None + # model = ChatAnthropic(model_name=MODEL_NAME, stream_usage=False) # type: ignore[call-arg] + # async for token in model.astream("hi"): + # assert isinstance(token, AIMessageChunk) + # assert token.usage_metadata is None + # # check we override with kwarg + # model = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg] + # assert model.stream_usage + # async for token in model.astream("hi", stream_usage=False): + # assert isinstance(token, AIMessageChunk) + # assert token.usage_metadata is None # Check expected raw API output # async_client = model._async_client From b879d135cfc6d904c502ae64861467588e58e52e Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 14:09:05 -0500 Subject: [PATCH 8/9] x --- .../integration_tests/test_chat_models.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index a4c3eaf5f5a99..e5d033288342e 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -112,25 +112,25 @@ async def test_astream() -> None: # assert token.usage_metadata is None # Check expected raw API output - # async_client = model._async_client - # params: dict = { - # "model": MODEL_NAME, - # "max_tokens": 1024, - # "messages": [{"role": "user", "content": "hi"}], - # "temperature": 0.0, - # } - # stream = await async_client.messages.create(**params, stream=True) - # async for event in stream: - # if event.type == "message_start": - # assert event.message.usage.input_tokens > 1 - # # Note: this single output token included in message start event - # # does not appear to contribute to overall output token counts. It - # # is excluded from the total token count. - # assert event.message.usage.output_tokens == 1 - # elif event.type == "message_delta": - # assert event.usage.output_tokens > 1 - # else: - # pass + async_client = llm._async_client + params: dict = { + "model": MODEL_NAME, + "max_tokens": 1024, + "messages": [{"role": "user", "content": "hi"}], + "temperature": 0.0, + } + stream = await async_client.messages.create(**params, stream=True) + async for event in stream: + if event.type == "message_start": + assert event.message.usage.input_tokens > 1 + # Note: this single output token included in message start event + # does not appear to contribute to overall output token counts. It + # is excluded from the total token count. + assert event.message.usage.output_tokens == 1 + elif event.type == "message_delta": + assert event.usage.output_tokens > 1 + else: + pass async def test_abatch() -> None: From 23f6d39666716810cae5b5e49176596d20ac4b38 Mon Sep 17 00:00:00 2001 From: Chester Curme Date: Sun, 23 Feb 2025 14:12:13 -0500 Subject: [PATCH 9/9] x --- .../integration_tests/test_chat_models.py | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index e5d033288342e..309969275651d 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -99,18 +99,6 @@ async def test_astream() -> None: assert "stop_reason" in full.response_metadata assert "stop_sequence" in full.response_metadata - # test usage metadata can be excluded - # model = ChatAnthropic(model_name=MODEL_NAME, stream_usage=False) # type: ignore[call-arg] - # async for token in model.astream("hi"): - # assert isinstance(token, AIMessageChunk) - # assert token.usage_metadata is None - # # check we override with kwarg - # model = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg] - # assert model.stream_usage - # async for token in model.astream("hi", stream_usage=False): - # assert isinstance(token, AIMessageChunk) - # assert token.usage_metadata is None - # Check expected raw API output async_client = llm._async_client params: dict = { @@ -133,6 +121,20 @@ async def test_astream() -> None: pass +async def test_stream_usage() -> None: + """Test usage metadata can be excluded.""" + model = ChatAnthropic(model_name=MODEL_NAME, stream_usage=False) # type: ignore[call-arg] + async for token in model.astream("hi"): + assert isinstance(token, AIMessageChunk) + assert token.usage_metadata is None + # check we override with kwarg + model = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg] + assert model.stream_usage + async for token in model.astream("hi", stream_usage=False): + assert isinstance(token, AIMessageChunk) + assert token.usage_metadata is None + + async def test_abatch() -> None: """Test streaming tokens from ChatAnthropicMessages.""" llm = ChatAnthropicMessages(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]