1
1
import os
2
- from typing import Any , AsyncIterator , Dict , Iterator , List , Optional , Tuple
2
+ import re
3
+ from typing import Any , AsyncIterator , Dict , Iterator , List , Optional , Tuple , Union
3
4
4
5
import anthropic
5
6
from langchain_core ._api .deprecation import deprecated
24
25
_message_type_lookups = {"human" : "user" , "ai" : "assistant" }
25
26
26
27
28
+ def _format_image (image_url : str ) -> Dict :
29
+ """
30
+ Formats an image of format data:image/jpeg;base64,{b64_string}
31
+ to a dict for anthropic api
32
+
33
+ {
34
+ "type": "base64",
35
+ "media_type": "image/jpeg",
36
+ "data": "/9j/4AAQSkZJRg...",
37
+ }
38
+
39
+ And throws an error if it's not a b64 image
40
+ """
41
+ regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
42
+ match = re .match (regex , image_url )
43
+ if match is None :
44
+ raise ValueError (
45
+ "Anthropic only supports base64-encoded images currently."
46
+ " Example: data:image/png;base64,'/9j/4AAQSk'..."
47
+ )
48
+ return {
49
+ "type" : "base64" ,
50
+ "media_type" : match .group ("media_type" ),
51
+ "data" : match .group ("data" ),
52
+ }
53
+
54
+
27
55
def _format_messages (messages : List [BaseMessage ]) -> Tuple [Optional [str ], List [Dict ]]:
28
56
"""Format messages for anthropic."""
29
57
@@ -36,22 +64,66 @@ def _format_messages(messages: List[BaseMessage]) -> Tuple[Optional[str], List[D
36
64
for m in messages
37
65
]
38
66
"""
39
- system = None
40
- formatted_messages = []
67
+ system : Optional [ str ] = None
68
+ formatted_messages : List [ Dict ] = []
41
69
for i , message in enumerate (messages ):
42
- if not isinstance (message .content , str ):
43
- raise ValueError ("Anthropic Messages API only supports text generation." )
44
70
if message .type == "system" :
45
71
if i != 0 :
46
72
raise ValueError ("System message must be at beginning of message list." )
73
+ if not isinstance (message .content , str ):
74
+ raise ValueError (
75
+ "System message must be a string, "
76
+ f"instead was: { type (message .content )} "
77
+ )
47
78
system = message .content
79
+ continue
80
+
81
+ role = _message_type_lookups [message .type ]
82
+ content : Union [str , List [Dict ]]
83
+
84
+ if not isinstance (message .content , str ):
85
+ # parse as dict
86
+ assert isinstance (
87
+ message .content , list
88
+ ), "Anthropic message content must be str or list of dicts"
89
+
90
+ # populate content
91
+ content = []
92
+ for item in message .content :
93
+ if isinstance (item , str ):
94
+ content .append (
95
+ {
96
+ "type" : "text" ,
97
+ "text" : item ,
98
+ }
99
+ )
100
+ elif isinstance (item , dict ):
101
+ if "type" not in item :
102
+ raise ValueError ("Dict content item must have a type key" )
103
+ if item ["type" ] == "image_url" :
104
+ # convert format
105
+ source = _format_image (item ["image_url" ]["url" ])
106
+ content .append (
107
+ {
108
+ "type" : "image" ,
109
+ "source" : source ,
110
+ }
111
+ )
112
+ else :
113
+ content .append (item )
114
+ else :
115
+ raise ValueError (
116
+ f"Content items must be str or dict, instead was: { type (item )} "
117
+ )
48
118
else :
49
- formatted_messages .append (
50
- {
51
- "role" : _message_type_lookups [message .type ],
52
- "content" : message .content ,
53
- }
54
- )
119
+ content = message .content
120
+
121
+ formatted_messages .append (
122
+ {
123
+ "role" : role ,
124
+ "content" : content ,
125
+ }
126
+ )
55
127
return system , formatted_messages
56
128
57
129
0 commit comments