Skip to content

Commit 322002f

Browse files
divakar-amdShangmingCai
authored andcommitted
[Misc][MoE] add Deepseek-V3 moe tuning support (vllm-project#12558)
Signed-off-by: Divakar Verma <divakar.verma@amd.com>
1 parent c9f2237 commit 322002f

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

benchmarks/kernels/benchmark_moe.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,8 @@ def save_configs(configs: Dict[int, BenchmarkConfig], num_experts: int,
450450
def main(args: argparse.Namespace):
451451
print(args)
452452

453-
config = AutoConfig.from_pretrained(args.model)
453+
config = AutoConfig.from_pretrained(
454+
args.model, trust_remote_code=args.trust_remote_code)
454455
if config.architectures[0] == "DbrxForCausalLM":
455456
E = config.ffn_config.moe_num_experts
456457
topk = config.ffn_config.moe_top_k
@@ -461,6 +462,11 @@ def main(args: argparse.Namespace):
461462
topk = config.num_experts_per_tok
462463
intermediate_size = config.intermediate_size
463464
shard_intermediate_size = 2 * intermediate_size // args.tp_size
465+
elif config.architectures[0] == "DeepseekV3ForCausalLM":
466+
E = config.n_routed_experts
467+
topk = config.num_experts_per_tok
468+
intermediate_size = config.moe_intermediate_size
469+
shard_intermediate_size = 2 * intermediate_size // args.tp_size
464470
else:
465471
# Default: Mixtral.
466472
E = config.num_local_experts
@@ -538,6 +544,7 @@ def _distribute(method: str, inputs: List[Any]) -> List[Any]:
538544
parser.add_argument("--seed", type=int, default=0)
539545
parser.add_argument("--batch-size", type=int, required=False)
540546
parser.add_argument("--tune", action="store_true")
547+
parser.add_argument("--trust-remote-code", action="store_true")
541548
args = parser.parse_args()
542549

543550
main(args)

0 commit comments

Comments
 (0)