fix multimodal readme and config (#195)

aigc-apps · Sep 4, 2024 · 08c15e0 · 08c15e0
1 parent 4ef5451
commit 08c15e0
Show file tree

Hide file tree

Showing 8 changed files with 68 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -119,13 +119,35 @@ poetry install
    export DASHSCOPE_API_KEY=""
    ```
 
+   To utilize Object Storage Service (OSS) for file storage, particularly when operating in multimodal mode, you must first configure settings in both the src/pai_rag/config/settings.toml and src/pai_rag/config/settings_multi_modal.toml configuration files. Append the following TOML configuration snippet within these files:
+
+   ```toml
+   [rag.oss_store]
+   bucket = ""
+   endpoint = ""
+   prefix = ""
+   ```
+
+   Additionally, you need to introduce environment variables:
+
+   ```bash
+   export OSS_ACCESS_KEY_ID=""
+   export OSS_ACCESS_KEY_SECRET=""
+   ```
+
    ```bash
    # Support custom host (default 0.0.0.0), port (default 8001), config (default src/pai_rag/config/settings.yaml), enable-example (default True), skip-download-models (default False)
    # Download [bge-small-zh-v1.5, easyocr] by default, you can skip it by setting --skip-download-models.
    # you can use tool "load_model" to download other models including [bge-small-zh-v1.5, easyocr, SGPT-125M-weightedmean-nli-bitfit, bge-large-zh-v1.5, bge-m3, bge-reranker-base, bge-reranker-large, paraphrase-multilingual-MiniLM-L12-v2, qwen_1.8b, text2vec-large-chinese]
    pai_rag serve [--host HOST] [--port PORT] [--config CONFIG_FILE] [--enable-example False] [--skip-download-models]
    ```
 
+   The default configuration file is src/pai_rag/config/settings.yaml. However, if you require the multimodal llm module, you should switch to the src/pai_rag/config/settings_multi_modal.yaml file instead.
+
+   ```bash
+   pai_rag serve -c src/pai_rag/config/settings_multi_modal.yaml
+   ```
+
 5. Download provided models to local directory
 
    ```bash

diff --git a/README_zh.md b/README_zh.md
@@ -119,13 +119,37 @@ poetry install
    export DASHSCOPE_API_KEY=""
    ```
 
+   使用OSS存储文件(使用多模态模式时必须提前配置)，在配置文件src/pai_rag/config/settings.toml和src/pai_rag/config/settings_multi_modal.toml中添加以下配置:
+
+   ```toml
+   [rag.oss_store]
+   bucket = ""
+   endpoint = ""
+   prefix = ""
+   ```
+
+   并需要在命令行引入环境变量
+
+   ```bash
+   export OSS_ACCESS_KEY_ID=""
+   export OSS_ACCESS_KEY_SECRET=""
+   ```
+
+   启动RAG服务
+
    ```bash
    # 启动，支持自定义host(默认0.0.0.0), port(默认8001), config(默认src/pai_rag/config/settings.yaml), enable-example(默认True), skip-download-models(不加为False)
    # 默认启动时下载模型 [bge-small-zh-v1.5, easyocr] , 可设置 skip-download-models 避免启动时下载模型.
    # 可使用命令行 "load_model" 下载模型 including [bge-small-zh-v1.5, easyocr, SGPT-125M-weightedmean-nli-bitfit, bge-large-zh-v1.5, bge-m3, bge-reranker-base, bge-reranker-large, paraphrase-multilingual-MiniLM-L12-v2, qwen_1.8b, text2vec-large-chinese]
    pai_rag serve [--host HOST] [--port PORT] [--config CONFIG_FILE] [--enable-example False] [--skip-download-models]
    ```
 
+   启动默认配置文件为src/pai_rag/config/settings.yaml，若需要使用多模态，请切换到src/pai_rag/config/settings_multi_modal.yaml
+
+   ```bash
+   pai_rag serve -c src/pai_rag/config/settings_multi_modal.yaml
+   ```
+
 5. 下载其他模型到本地
 
    ```bash

diff --git a/src/pai_rag/config/settings.toml b/src/pai_rag/config/settings.toml
@@ -30,7 +30,6 @@ type = "local"
 
 [rag.data_reader]
 type = "SimpleDirectoryReader"
-enable_multimodal = true
 
 # embedding configurations, source support API: OpenAI,DashScope; and local model:HuggingFace
 # if use API, need set OPENAI_API_KEY or DASHSCOPE_API_KEY in ENV, If HuggingFace, need set model_name
@@ -42,9 +41,6 @@ enable_multimodal = true
 source = "DashScope"
 embed_batch_size = 10
 
-[rag.embedding.multi_modal]
-source = "cnclip"
-
 [rag.evaluation]
 retrieval = ["mrr", "hit_rate"]
 response_label = true
@@ -60,6 +56,7 @@ vector_store.type = "FAISS"
 # llm configurations, source support API: OpenAI,DashScope or PAI-EAS's deployment
 # eg.
 # source = "PaiEas"
+# name = ""
 # endpoint = ""
 # token = ""
 [rag.llm]
@@ -69,11 +66,6 @@ name = "qwen-turbo"
 [rag.llm.function_calling_llm]
 source = ""
 
-[rag.llm.multi_modal]
-source = "PaiEas"
-endpoint = ""
-token = ""
-
 [rag.llm_chat_engine]
 type = "SimpleChatEngine"
 

diff --git a/src/pai_rag/config/settings_multi_modal.toml b/src/pai_rag/config/settings_multi_modal.toml
@@ -57,6 +57,7 @@ vector_store.type = "FAISS"
 # llm configurations, source support API: OpenAI,DashScope or PAI-EAS's deployment
 # eg.
 # source = "PaiEas"
+# name = ""
 # endpoint = ""
 # token = ""
 [rag.llm]
@@ -68,6 +69,7 @@ source = ""
 
 [rag.llm.multi_modal]
 source = "DashScope"
+name = "qwen-vl-max"
 
 [rag.llm_chat_engine]
 type = "SimpleChatEngine"
@@ -103,7 +105,7 @@ similarity_top_k = 3
 image_similarity_top_k = 2
 retrieval_mode = "hybrid" # [hybrid, embedding, keyword, router]
 query_rewrite_n = 1 # set to 1 to disable query generation
-need_image = false
+need_image = true
 
 [rag.synthesizer]
 type = "SimpleSummarize"

diff --git a/src/pai_rag/core/rag_configuration.py b/src/pai_rag/core/rag_configuration.py
@@ -51,15 +51,17 @@ def get_value(self, key=None):
     def update(self, new_value: Dynaconf):
         if self.config.get("rag", None):
             if not new_value.get("llm").get("multi_modal"):
-                new_value["llm"]["multi_modal"] = self.config["rag.llm.multi_modal"]
+                new_value["llm"]["multi_modal"] = self.config.get(
+                    "rag.llm.multi_modal", None
+                )
             if not new_value.get("llm").get("function_calling_llm"):
-                new_value["llm"]["function_calling_llm"] = self.config[
-                    "rag.llm.function_calling_llm"
-                ]
+                new_value["llm"]["function_calling_llm"] = self.config.get(
+                    "rag.llm.function_calling_llm", None
+                )
             if not new_value.get("embedding").get("multi_modal"):
-                new_value["embedding"]["multi_modal"] = self.config[
-                    "rag.embedding.multi_modal"
-                ]
+                new_value["embedding"]["multi_modal"] = self.config.get(
+                    "rag.embedding.multi_modal", None
+                )
 
             self.config.rag.update(new_value, tomlfy=True, merge=True)
 

diff --git a/src/pai_rag/modules/embedding/multi_modal_embedding.py b/src/pai_rag/modules/embedding/multi_modal_embedding.py
@@ -16,6 +16,9 @@ def get_dependencies() -> List[str]:
 
     def _create_new_instance(self, new_params: Dict[str, Any]):
         config = new_params[MODULE_PARAM_CONFIG]
+        if config is None:
+            logger.info("Don't use Multi-Modal.")
+            return None
         source = config["source"].lower()
         embed_batch_size = config.get("embed_batch_size", DEFAULT_EMBED_BATCH_SIZE)
 

diff --git a/src/pai_rag/modules/index/index.py b/src/pai_rag/modules/index/index.py
@@ -27,6 +27,8 @@ def __init__(self, config, embed_model, multi_modal_embed_model, postprocessor):
             self.multi_modal_embed_dims = self._get_embed_vec_dim(
                 self.multi_modal_embed_model
             )
+        else:
+            self.multi_modal_embed_dims = 0
         persist_path = config.get("persist_path", DEFAULT_PERSIST_DIR)
         folder_name = get_store_persist_directory_name(config, self.embed_dims)
         self.persist_path = os.path.join(persist_path, folder_name)

diff --git a/src/pai_rag/modules/llm/multi_modal_llm.py b/src/pai_rag/modules/llm/multi_modal_llm.py
@@ -19,6 +19,9 @@ def get_dependencies() -> List[str]:
 
     def _create_new_instance(self, new_params: Dict[str, Any]):
         llm_config = new_params[MODULE_PARAM_CONFIG]
+        if llm_config is None:
+            logger.info("Don't use Multi-Modal-LLM.")
+            return None
         if llm_config.source.lower() == "dashscope":
             model_name = llm_config.get("name", "qwen-vl-max")
             logger.info(
@@ -36,7 +39,7 @@ def _create_new_instance(self, new_params: Dict[str, Any]):
             logger.info("Using PAI-EAS Multi-Modal-LLM.")
             return OpenAIAlikeMultiModal(
                 model=llm_config.get(
-                    "model_name", "/model_repository/MiniCPM-V-2_6"
+                    "name", "/model_repository/MiniCPM-V-2_6"
                 ),  # TODO: change model path
                 api_base=llm_config.endpoint,
                 api_key=llm_config.token,