Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

refactor: image model get_num_tokens override #1994

Merged
merged 1 commit into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

from typing import Dict

from langchain_community.chat_models import ChatOpenAI

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI


class QwenVLChatModel(MaxKBBaseModel, ChatOpenAI):
class QwenVLChatModel(MaxKBBaseModel, BaseChatOpenAI):

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
Expand All @@ -18,6 +17,7 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
openai_api_base='https://dashscope.aliyuncs.com/compatible-mode/v1',
# stream_options={"include_usage": True},
streaming=True,
stream_usage=True,
**optional_params,
)
return chat_tong_yi
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Dict
from typing import Dict, List

from langchain_core.messages import BaseMessage, get_buffer_string
from langchain_openai import AzureChatOpenAI
from langchain_openai.chat_models import ChatOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
Expand All @@ -26,3 +26,17 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
streaming=True,
**optional_params,
)

def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
try:
return super().get_num_tokens_from_messages(messages)
except Exception as e:
tokenizer = TokenizerManage.get_tokenizer()
return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages])

def get_num_tokens(self, text: str) -> int:
try:
return super().get_num_tokens(text)
except Exception as e:
tokenizer = TokenizerManage.get_tokenizer()
return len(tokenizer.encode(text))
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
from typing import Dict
from urllib.parse import urlparse, ParseResult

from langchain_openai.chat_models import ChatOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel


def custom_get_token_ids(text: str):
tokenizer = TokenizerManage.get_tokenizer()
return tokenizer.encode(text)
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI


def get_base_url(url: str):
Expand All @@ -20,7 +13,7 @@ def get_base_url(url: str):
return result_url[:-1] if result_url.endswith("/") else result_url


class OllamaImage(MaxKBBaseModel, ChatOpenAI):
class OllamaImage(MaxKBBaseModel, BaseChatOpenAI):

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
Expand All @@ -34,5 +27,6 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
openai_api_key=model_credential.get('api_key'),
# stream_options={"include_usage": True},
streaming=True,
stream_usage=True,
**optional_params,
)
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
from typing import Dict

from langchain_openai.chat_models import ChatOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI


def custom_get_token_ids(text: str):
tokenizer = TokenizerManage.get_tokenizer()
return tokenizer.encode(text)


class OpenAIImage(MaxKBBaseModel, ChatOpenAI):
class OpenAIImage(MaxKBBaseModel, BaseChatOpenAI):

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
Expand All @@ -22,5 +15,6 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
openai_api_key=model_credential.get('api_key'),
# stream_options={"include_usage": True},
streaming=True,
stream_usage=True,
**optional_params,
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

from typing import Dict

from langchain_community.chat_models import ChatOpenAI

from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI


class QwenVLChatModel(MaxKBBaseModel, ChatOpenAI):
class QwenVLChatModel(MaxKBBaseModel, BaseChatOpenAI):

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
Expand All @@ -18,6 +17,7 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
openai_api_base='https://dashscope.aliyuncs.com/compatible-mode/v1',
# stream_options={"include_usage": True},
streaming=True,
stream_usage=True,
**optional_params,
)
return chat_tong_yi
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
from typing import Dict

from langchain_openai.chat_models import ChatOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI


def custom_get_token_ids(text: str):
tokenizer = TokenizerManage.get_tokenizer()
return tokenizer.encode(text)


class TencentVision(MaxKBBaseModel, ChatOpenAI):
class TencentVision(MaxKBBaseModel, BaseChatOpenAI):

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
Expand All @@ -22,5 +15,6 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
openai_api_key=model_credential.get('api_key'),
# stream_options={"include_usage": True},
streaming=True,
stream_usage=True,
**optional_params,
)
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
from typing import Dict

from langchain_openai.chat_models import ChatOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI


def custom_get_token_ids(text: str):
tokenizer = TokenizerManage.get_tokenizer()
return tokenizer.encode(text)


class VolcanicEngineImage(MaxKBBaseModel, ChatOpenAI):
class VolcanicEngineImage(MaxKBBaseModel, BaseChatOpenAI):

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
Expand All @@ -22,5 +15,6 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
openai_api_base=model_credential.get('api_base'),
# stream_options={"include_usage": True},
streaming=True,
stream_usage=True,
**optional_params,
)
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
from typing import Dict

from langchain_openai.chat_models import ChatOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI


def custom_get_token_ids(text: str):
tokenizer = TokenizerManage.get_tokenizer()
return tokenizer.encode(text)


class XinferenceImage(MaxKBBaseModel, ChatOpenAI):
class XinferenceImage(MaxKBBaseModel, BaseChatOpenAI):

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
Expand All @@ -22,5 +15,6 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
openai_api_key=model_credential.get('api_key'),
# stream_options={"include_usage": True},
streaming=True,
stream_usage=True,
**optional_params,
)
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
from typing import Dict

from langchain_openai.chat_models import ChatOpenAI

from common.config.tokenizer_manage_config import TokenizerManage
from setting.models_provider.base_model_provider import MaxKBBaseModel
from setting.models_provider.impl.base_chat_open_ai import BaseChatOpenAI


def custom_get_token_ids(text: str):
tokenizer = TokenizerManage.get_tokenizer()
return tokenizer.encode(text)


class ZhiPuImage(MaxKBBaseModel, ChatOpenAI):
class ZhiPuImage(MaxKBBaseModel, BaseChatOpenAI):

@staticmethod
def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
Expand All @@ -22,5 +15,6 @@ def new_instance(model_type, model_name, model_credential: Dict[str, object], **
openai_api_base='https://open.bigmodel.cn/api/paas/v4',
# stream_options={"include_usage": True},
streaming=True,
stream_usage=True,
**optional_params,
)
Loading