Skip to content

Commit

Permalink
[BugFix] Don't scan entire cache dir when loading model
Browse files Browse the repository at this point in the history
Recent PR vllm-project#12926 added logging of the time to load model weights, but to determine whether any new files were downloaded to the cache, scans the entire local HF cache dir before and after to determine the size.

This can be very expensive if there is a large cache, and/or it's a remote filesystem mount which is common.

With this fix the time will still be logged even if no files were downloaded, unless HF_HUB_OFFLINE is set. But I think this is ok since it still includes the time to connect to the hub to check for existence of new files.

Signed-off-by: Nick Hill <nhill@redhat.com>
  • Loading branch information
njhill committed Feb 14, 2025
1 parent c9e2d64 commit 552b3a5
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions vllm/model_executor/model_loader/weight_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
import huggingface_hub.constants
import numpy as np
import torch
from huggingface_hub import (HfFileSystem, hf_hub_download, scan_cache_dir,
snapshot_download)
from huggingface_hub import HfFileSystem, hf_hub_download, snapshot_download
from safetensors.torch import load_file, safe_open, save_file
from tqdm.auto import tqdm

Expand Down Expand Up @@ -239,7 +238,8 @@ def download_weights_from_hf(
Returns:
str: The path to the downloaded model weights.
"""
if not huggingface_hub.constants.HF_HUB_OFFLINE:
local_only = huggingface_hub.constants.HF_HUB_OFFLINE
if not local_only:
# Before we download we look at that is available:
fs = HfFileSystem()
file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
Expand All @@ -255,7 +255,6 @@ def download_weights_from_hf(
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with get_lock(model_name_or_path, cache_dir):
start_size = scan_cache_dir().size_on_disk
start_time = time.perf_counter()
hf_folder = snapshot_download(
model_name_or_path,
Expand All @@ -264,12 +263,11 @@ def download_weights_from_hf(
cache_dir=cache_dir,
tqdm_class=DisabledTqdm,
revision=revision,
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
local_files_only=local_only,
)
end_time = time.perf_counter()
end_size = scan_cache_dir().size_on_disk
if end_size != start_size:
logger.info("Time took to download weights for %s: %.6f seconds",
if not local_only:
logger.info("Time spent downloading weights for %s: %.6f seconds",
model_name_or_path, end_time - start_time)
return hf_folder

Expand Down

0 comments on commit 552b3a5

Please # to comment.