@@ -450,7 +450,8 @@ def save_configs(configs: Dict[int, BenchmarkConfig], num_experts: int,
450
450
def main (args : argparse .Namespace ):
451
451
print (args )
452
452
453
- config = AutoConfig .from_pretrained (args .model )
453
+ config = AutoConfig .from_pretrained (
454
+ args .model , trust_remote_code = args .trust_remote_code )
454
455
if config .architectures [0 ] == "DbrxForCausalLM" :
455
456
E = config .ffn_config .moe_num_experts
456
457
topk = config .ffn_config .moe_top_k
@@ -461,6 +462,11 @@ def main(args: argparse.Namespace):
461
462
topk = config .num_experts_per_tok
462
463
intermediate_size = config .intermediate_size
463
464
shard_intermediate_size = 2 * intermediate_size // args .tp_size
465
+ elif config .architectures [0 ] == "DeepseekV3ForCausalLM" :
466
+ E = config .n_routed_experts
467
+ topk = config .num_experts_per_tok
468
+ intermediate_size = config .moe_intermediate_size
469
+ shard_intermediate_size = 2 * intermediate_size // args .tp_size
464
470
else :
465
471
# Default: Mixtral.
466
472
E = config .num_local_experts
@@ -538,6 +544,7 @@ def _distribute(method: str, inputs: List[Any]) -> List[Any]:
538
544
parser .add_argument ("--seed" , type = int , default = 0 )
539
545
parser .add_argument ("--batch-size" , type = int , required = False )
540
546
parser .add_argument ("--tune" , action = "store_true" )
547
+ parser .add_argument ("--trust-remote-code" , action = "store_true" )
541
548
args = parser .parse_args ()
542
549
543
550
main (args )
0 commit comments