|
57 | 57 | "ArthurZ/Ilama-3.2-1B",
|
58 | 58 | "llava-hf/llava-1.5-7b-hf",
|
59 | 59 | "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 60 | + "JackFram/llama-160m", |
| 61 | + "ai21labs/Jamba-tiny-random", |
| 62 | + "neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV", |
| 63 | + "nm-testing/Phi-3-mini-128k-instruct-FP8", |
| 64 | + "nm-testing/Qwen2-0.5B-Instruct-FP8-SkipQKV", |
| 65 | + "neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV", |
| 66 | + "nm-testing/Qwen2-1.5B-Instruct-FP8-K-V", |
| 67 | + "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue", |
| 68 | + "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse", |
| 69 | + "AMead10/Llama-3.2-1B-Instruct-AWQ", |
| 70 | + "shuyuej/Llama-3.2-1B-Instruct-GPTQ", |
| 71 | + "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head", |
| 72 | + "ModelCloud/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit-10-25-2024", |
| 73 | + "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", |
| 74 | + "neuralmagic/Meta-Llama-3-8B-Instruct-FP8", |
| 75 | + "amd/Llama-3.1-8B-Instruct-FP8-KV-Quark-test", |
| 76 | + "nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change", |
| 77 | + "nm-testing/tinyllama-oneshot-w8-channel-a8-tensor", |
| 78 | + "nm-testing/asym-w8w8-int8-static-per-tensor-tiny-llama", |
| 79 | + "neuralmagic/Llama-3.2-1B-quantized.w8a8", |
| 80 | + "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dynamic-Asym", |
| 81 | + "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Sym", |
| 82 | + "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Asym", |
| 83 | + "nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change", |
| 84 | + "nm-testing/tinyllama-oneshot-w8a8-dynamic-token-v2", |
| 85 | + "nm-testing/tinyllama-oneshot-w8a8-dynamic-token-v2-asym", |
| 86 | + "nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2", |
| 87 | + "nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2-asym", |
| 88 | + "nm-testing/tinyllama-oneshot-w4a16-channel-v2", |
| 89 | + "nm-testing/tinyllama-oneshot-w4a16-group128-v2", |
| 90 | + "nm-testing/tinyllama-oneshot-w8a16-per-channel", |
| 91 | + "nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t", |
| 92 | + "nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test", |
| 93 | + "nm-testing/TinyLlama-1.1B-compressed-tensors-kv-cache-scheme", |
| 94 | + "nm-testing/Meta-Llama-3-8B-Instruct-FP8-Dynamic-2of4-testing", |
| 95 | + "nm-testing/Meta-Llama-3-8B-Instruct-FP8-Static-Per-Tensor-testing", |
| 96 | + "nm-testing/Meta-Llama-3-8B-Instruct-FP8-Static-testing", |
| 97 | + "nm-testing/Meta-Llama-3-8B-Instruct-FP8-Dynamic-IA-Per-Tensor-Weight-testing", |
| 98 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM", |
| 99 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_tensor_act_fp8-BitM", |
| 100 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_per_tok_dyn_act_fp8-BitM", |
| 101 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_tensor_act_fp8-BitM", |
| 102 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_int8-BitM", |
| 103 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_tensor_act_int8-BitM", |
| 104 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_per_tok_dyn_act_int8-BitM", |
| 105 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_tensor_act_int8-BitM", |
| 106 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Channel-Weight-testing", |
| 107 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Static-testing", |
| 108 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Tensor-Weight-testing", |
| 109 | + "nm-testing/TinyLlama-1.1B-Chat-v1.0-2of4-Sparse-Dense-Compressor", |
| 110 | + "nm-testing/llama2.c-stories42M-pruned2.4-compressed", |
60 | 111 | ]
|
61 | 112 |
|
62 | 113 | MODEL_WEIGHTS_S3_BUCKET = "s3://vllm-ci-model-weights"
|
|
0 commit comments