-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathDockerfile.cpu
46 lines (35 loc) · 1.68 KB
/
Dockerfile.cpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# ===----------------------------------------------------------------------=== #
# Copyright (c) 2025, Modular Inc. All rights reserved.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions:
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===----------------------------------------------------------------------=== #
# This is an example of how to run max-pipelines serve in a docker container.
# Create a build image to download depedencies
FROM ghcr.io/modular/magic:latest AS production
RUN apt-get update && apt-get install -y --no-install-recommends build-essential git
COPY . /app
WORKDIR /app
RUN magic install
EXPOSE 8000
# TODO: Add HEALTHCHECK instruction
# HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD curl --fail http://localhost:8000/health || exit 1
LABEL maintainer "Modular Inc <hello@modular.com>"
ENV PYTHONUNBUFFERED=1
# suppress transformers logging
ENV TRANSFORMERS_VERBOSITY="critical"
ENV TRANSFORMERS_NO_ADVISORY_WARNINGS=1
ENV MODULAR_STRUCTURED_LOGGING=1
ENV PROMETHEUS_MULTIPROC_DIR=/tmp
# OTEL configuration
ENV OTEL_SERVICE_NAME="max-serve"
# OTEL's default histogram bucktes have poor resolution. Prefer exponential histograms.
ENV OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION="base2_exponential_bucket_histogram"
ENTRYPOINT ["magic", "run", "serve"]
CMD ["--huggingface-repo-id=modularai/Llama-3.1-8B-Instruct-GGUF"]