@@ -13,26 +13,26 @@ numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build -t cpu-test -f Dockerfile.
13
13
numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --build-arg VLLM_CPU_DISABLE_AVX512=" true" -t cpu-test-avx2 -f Dockerfile.cpu .
14
14
15
15
# Setup cleanup
16
- remove_docker_container () { docker rm -f cpu-test cpu-test-avx2 || true ; }
16
+ remove_docker_container () { docker rm -f cpu-test- " $NUMA_NODE " cpu-test-avx2- " $NUMA_NODE " || true ; }
17
17
trap remove_docker_container EXIT
18
18
remove_docker_container
19
19
20
20
# Run the image, setting --shm-size=4g for tensor parallel.
21
21
docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=" $CORE_RANGE " \
22
- --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test
22
+ --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test- " $NUMA_NODE " cpu-test
23
23
docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=" $CORE_RANGE " \
24
- --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-avx2 cpu-test-avx2
24
+ --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-avx2- " $NUMA_NODE " cpu-test-avx2
25
25
26
26
function cpu_tests() {
27
27
set -e
28
28
29
29
# offline inference
30
- docker exec cpu-test-avx2 bash -c "
30
+ docker exec cpu-test-avx2- " $NUMA_NODE " bash -c "
31
31
set -e
32
32
python3 examples/offline_inference.py"
33
33
34
34
# Run basic model test
35
- docker exec cpu-test bash -c "
35
+ docker exec cpu-test- " $NUMA_NODE " bash -c "
36
36
set -e
37
37
pip install pytest pytest-asyncio \
38
38
decord einops librosa peft Pillow sentence-transformers soundfile \
@@ -45,20 +45,20 @@ function cpu_tests() {
45
45
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
46
46
47
47
# Run compressed-tensor test
48
- docker exec cpu-test bash -c "
48
+ docker exec cpu-test- " $NUMA_NODE " bash -c "
49
49
set -e
50
50
pytest -s -v \
51
51
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup \
52
52
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"
53
53
54
54
# Run AWQ test
55
- docker exec cpu-test bash -c "
55
+ docker exec cpu-test- " $NUMA_NODE " bash -c "
56
56
set -e
57
57
pytest -s -v \
58
58
tests/quantization/test_ipex_quant.py"
59
59
60
60
# online inference
61
- docker exec cpu-test bash -c "
61
+ docker exec cpu-test- " $NUMA_NODE " bash -c "
62
62
set -e
63
63
export VLLM_CPU_KVCACHE_SPACE=10
64
64
export VLLM_CPU_OMP_THREADS_BIND=$1
0 commit comments