-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathfinetune.py
115 lines (96 loc) · 2.75 KB
/
finetune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import subprocess
import os
from pathlib import Path
import modal
from .common import (
MODEL_NAME,
MODEL_PATH,
WANDB_PROJECT,
app,
output_vol,
get_user_data_path,
get_user_model_path,
VOL_MOUNT_PATH,
)
MINUTES = 60 # seconds
HOURS = 60 * MINUTES
REMOTE_CONFIG_PATH = Path("/llama3_1_8B_lora.yaml")
image = (
modal.Image.debian_slim()
.pip_install("wandb", "torch", "torchao", "torchvision")
.apt_install("git")
.pip_install(
"git+https://github.com/pytorch/torchtune.git@06a837953a89cdb805c7538ff5e0cc86c7ab44d9"
)
.add_local_file(
Path(__file__).parent / "llama3_1_8B_lora.yaml", REMOTE_CONFIG_PATH.as_posix()
)
)
def download_model():
subprocess.run(
[
"tune",
"download",
MODEL_NAME,
"--output-dir",
MODEL_PATH.as_posix(),
"--ignore-patterns",
"original/consolidated.00.pth",
]
)
secrets = [modal.Secret.from_name("huggingface-secret")]
wandb_args = []
if WANDB_PROJECT:
secrets.append(modal.Secret.from_name("my-wandb-secret"))
wandb_args = [
"metric_logger._component_=torchtune.training.metric_logging.WandBLogger",
f"metric_logger.project={WANDB_PROJECT}",
]
else:
wandb_args = []
@app.function(
image=image,
gpu="H100",
volumes={VOL_MOUNT_PATH: output_vol},
timeout=2 * HOURS,
secrets=secrets,
)
def finetune(
user: str, team_id: str = None, recipe_args: str = None, cleanup: bool = True
):
"""Fine-tune a model on the user from the provided team with torchtune.
Args:
user: The real or display username of a Slack user.
team_id: Identifier for a Slack workspace.
recipe_args: Additional arguments to pass to the fine-tuning recipe.
cleanup: Remove user data after fine-tuning. On by default.
"""
import shlex
if not MODEL_PATH.exists():
print("Downloading model...")
download_model()
output_vol.commit()
data_path = get_user_data_path(user, team_id)
output_dir = get_user_model_path(user, team_id)
output_dir.mkdir(parents=True, exist_ok=True)
if recipe_args is not None:
recipe_args = shlex.split(recipe_args)
else:
recipe_args = []
subprocess.run(
[
"tune",
"run",
"lora_finetune_single_device",
"--config",
REMOTE_CONFIG_PATH,
f"output_dir={output_dir.as_posix()}",
f"dataset_path={data_path.as_posix()}",
f"model_path={MODEL_PATH.as_posix()}",
*wandb_args,
]
+ recipe_args
)
if cleanup and user != "test":
# Delete scraped data after fine-tuning
os.remove(data_path)