modelscope · woshixiaobai2019 · Mar 14, 2025 · Mar 15, 2025 · Mar 17, 2025 · Mar 17, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -39,8 +39,6 @@ repos:
         exclude: thirdparty/|tests/run.py
       - id: requirements-txt-fixer
         exclude: thirdparty/|tests/run.py
-      - id: double-quote-string-fixer
-        exclude: thirdparty/|tests/run.py
       - id: check-merge-conflict
         exclude: thirdparty/|tests/run.py
       - id: fix-encoding-pragma

diff --git a/.pre-commit-config_local.yaml b/.pre-commit-config_local.yaml
@@ -37,8 +37,6 @@ repos:
         exclude: thirdparty/|tests/run.py
       - id: end-of-file-fixer
         exclude: thirdparty/
-      - id: requirements-txt-fixer
-        exclude: thirdparty/|tests/run.py
       - id: double-quote-string-fixer
         exclude: thirdparty/|tests/run.py
       - id: check-merge-conflict

diff --git a/swift/plugin/orm.py b/swift/plugin/orm.py
@@ -383,5 +383,5 @@ def __call__(self, completions, **kwargs) -> List[float]:
     'format': Format,
     'react_format': ReActFormat,
     'cosine': CosineReward,
-    'repetition': RepetitionPenalty,
+    'repetition': RepetitionPenalty
 }
diff --git a/swift/plugin/tool_call.py b/swift/plugin/tool_call.py
@@ -0,0 +1,10 @@
+from typing import Tuple, Any, Optional
+
+
+class TOOL_CALL:
+
+    def __call__(self, completion: str) -> Tuple[Any, bool, Optional[float]]:
+        raise NotImplementedError
+
+
+tools = {}
diff --git a/swift/trainers/arguments.py b/swift/trainers/arguments.py
@@ -2,7 +2,7 @@
 import os
 from dataclasses import dataclass
 from functools import wraps
-from typing import Any, Dict, Literal, Optional, Union
+from typing import Any, Dict, Literal, Optional, Union, Callable
 
 import torch
 import torch.utils.checkpoint
@@ -104,6 +104,9 @@ class GRPOArgumentsMixin:
     offload_optimizer: bool = False
     offload_model: bool = False
     gc_collect_after_offload: bool = False
+    is_reward_tool_call: bool = True  #是否额外单独计算每个tool call的format得分
+    tool_call_weight: float = 1.0
+    tool_call: str = None
 
 
 @dataclass

diff --git a/swift/trainers/rlhf_arguments.py b/swift/trainers/rlhf_arguments.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import List, Optional
+from typing import List, Optional, Callable
 
 from trl import CPOConfig as HfCPOConfig
 from trl import DPOConfig as HfDPOConfig

diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py