Skip to content

Commit

Permalink
Update requirements, drop nan examples, fix nans in logging (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
chiragjn authored Jun 27, 2024
1 parent 2071597 commit 6fcfafa
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 8 deletions.
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# https://hub.docker.com/layers/winglian/axolotl/main-20240612-py3.11-cu121-2.3.0/images/sha256-798eed818fb11d24a640c0efbf27f65fbaebc1d9a5db210d585aa2a4328e93e1?context=explore
FROM --platform=linux/amd64 winglian/axolotl@sha256:aac52c92ab245793932a635e6dedf14a3a9fb009e40cdf16c10b715f1466afa8
# https://hub.docker.com/layers/winglian/axolotl/main-20240626-py3.11-cu121-2.3.0/images/sha256-d157d1b80bfbbea689e9a4ea233d04bbc37f684f82e01d9dd6730dd0251e61fe?context=explore
FROM --platform=linux/amd64 winglian/axolotl@sha256:7945505e1651a474aa11ed4d70188ff5c5052e17f61bb5f60b956ad8f082328f
USER root
COPY requirements.txt /tmp/
RUN pip install -U pip wheel setuptools && \
Expand All @@ -9,7 +9,7 @@ RUN mkdir -p /packages && \
cd /packages && \
git clone https://github.com/truefoundry/axolotl && \
cd axolotl/ && \
git checkout 5ba183d302ed1c91912555b76e423786acaccae8
git checkout 99da242b9aee961acebaae99da8d615781f399e3
RUN cd /packages/axolotl/ && \
MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-build-isolation -e .[flash-attn,mamba-ssm,fused-dense-lib] && \
pip install --no-cache-dir -U -r /tmp/requirements.txt && \
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile-notebook
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ USER jovyan
RUN cd /packages && \
git clone https://github.com/truefoundry/axolotl && \
cd axolotl/ && \
git checkout 5ba183d302ed1c91912555b76e423786acaccae8
git checkout 99da242b9aee961acebaae99da8d615781f399e3
RUN cd /packages/axolotl/ && \
MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-build-isolation -e .[flash-attn,mamba-ssm,fused-dense-lib] && \
pip install --no-cache-dir -U -r /tmp/llm-finetune/notebook-requirements.txt
Expand Down
10 changes: 9 additions & 1 deletion mlfoundry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ def log_model_to_mlfoundry(
logger.warning("Python file in hf model cache in unknown path:", file_path)

metadata.update({"huggingface_model_url": f"https://huggingface.co/{hf_hub_model_id}"})

metadata = {
k: v for k, v in metadata.items() if isinstance(v, (int, float, np.integer, np.floating)) and math.isfinite(v)
}
run.log_model(
name=model_name,
model_file_or_folder=model_dir,
Expand Down Expand Up @@ -171,6 +173,12 @@ def on_save(self, args, state, control, **kwargs):
for log in state.log_history:
if isinstance(log, dict) and log.get("step") == state.global_step:
metadata = log.copy()

metadata = {
k: v
for k, v in metadata.items()
if isinstance(v, (int, float, np.integer, np.floating)) and math.isfinite(v)
}
self._run.log_artifact(
name=self._checkpoint_artifact_name,
artifact_paths=[(artifact_path,)],
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
--extra-index-url https://download.pytorch.org/whl/cu121
cloud-files==4.15.2
deepspeed @ git+https://github.com/truefoundry/DeepSpeed@1372f3d1937030f20283b6bfdb7209c55eb8a7bf
deepspeed @ git+https://github.com/microsoft/DeepSpeed@88b2ef71b3f2dfc42932cd2c097397f637ad77f4
pyarrow==15.0.0
rich>=13.0.0,<14
snowflake-connector-python[pandas]==3.7.0
torch==2.3.0+cu121
truefoundry[ml]==0.2.4
unsloth @ git+https://github.com/unslothai/unsloth@27fa021a7bb959a53667dd4e7cdb9598c207aa0d
truefoundry[ml]==0.2.8
unsloth @ git+https://github.com/unslothai/unsloth@a558f22992813209ef9a369da8ef5163e9782258

0 comments on commit 6fcfafa

Please # to comment.