Skip to content

Commit

Permalink
Update base image (#48)
Browse files Browse the repository at this point in the history
* use cuda base image

* fix

* update base image for cpu machine

* lint
  • Loading branch information
suzhoum authored Aug 8, 2023
1 parent b004580 commit 450eb6b
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 18 deletions.
32 changes: 22 additions & 10 deletions src/autogluon/bench/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
ARG AG_BENCH_BASE_IMAGE
FROM $AG_BENCH_BASE_IMAGE

ENV DEBIAN_FRONTEND=noninteractive

# Install essential packages and Python 3.9
RUN apt-get update && \
apt-get install -y software-properties-common build-essential && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y python3.9 python3.9-dev python3.9-distutils python3.9-venv && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1

# Install utilities and AWS CLI
RUN apt-get install -y python3-pip unzip curl git pciutils && \
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip -q awscliv2.zip && \
./aws/install && \
rm awscliv2.zip && \
python3 -m pip install --upgrade pip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /usr/local/aws

# Application-specific steps
ARG AG_BENCH_DEV_URL
ARG AG_BENCH_VERSION
ARG CDK_DEPLOY_REGION
Expand All @@ -12,16 +34,6 @@ ARG AMLB_USER_DIR

WORKDIR /app/

RUN apt-get update && apt-get install -y --no-install-recommends unzip curl && rm -rf /var/lib/apt/lists/* \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /usr/local/aws \
&& pip freeze | cut -d "@" -f1 | xargs pip uninstall -y \
&& curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \
&& unzip -q awscliv2.zip \
&& ./aws/install \
&& rm awscliv2.zip \
&& python3 -m pip install --upgrade pip

RUN if [ -n "$AG_BENCH_DEV_URL" ]; then \
echo "Cloning: $AG_BENCH_DEV_URL" \
&& AG_BENCH_DEV_REPO=$(echo "$AG_BENCH_DEV_URL" | cut -d "#" -f 1) \
Expand Down
8 changes: 2 additions & 6 deletions src/autogluon/bench/cloud/aws/stack_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,10 @@ def deploy_stack(custom_configs: dict) -> dict:
custom_infra_configs = custom_configs.get("cdk_context", {})
infra_configs = construct_context(custom_configs=custom_infra_configs)
instance_type: str = infra_configs["INSTANCE_TYPES"][0]
os.environ[
"AG_BENCH_BASE_IMAGE"
] = "763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:1.13.1-gpu-py39-cu117-ubuntu20.04-ec2"
os.environ["AG_BENCH_BASE_IMAGE"] = "nvidia/cuda:12.2.0-runtime-ubuntu20.04"
if not instance_type.startswith(("p", "g")):
# CPU instances
os.environ[
"AG_BENCH_BASE_IMAGE"
] = "763104351884.dkr.ecr.us-east-1.amazonaws.com/pytorch-training:1.13.1-cpu-py39-ubuntu20.04-ec2"
os.environ["AG_BENCH_BASE_IMAGE"] = "ubuntu:20.04"
command = [
os.path.join(module_base_dir, "deploy.sh"),
infra_configs["STACK_NAME_PREFIX"],
Expand Down
4 changes: 2 additions & 2 deletions src/autogluon/bench/eval/benchmark_context/output_context.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional, Set
from typing import Optional, Set, Union

import boto3
import numpy as np
Expand Down Expand Up @@ -216,7 +216,7 @@ def get_single_leaderboard(self, columns_to_keep, with_infer_speed, i, num_conte
print(f"SUCCESS: {print_msg}")
return combined_full

def get_model_failures(self) -> pd.DataFrame | None:
def get_model_failures(self) -> Union[pd.DataFrame, None]:
"""
Load and return the model failures CSV as a pandas DataFrame if it exists, else return None.
Expand Down

0 comments on commit 450eb6b

Please # to comment.