Skip to content

Commit e072e98

Browse files
committed
Add Dockerfiles for inference engines
So contributors can further enhance, they may want to implement/enable intel's oneapi, musa, etc. Signed-off-by: Eric Curtin <eric.curtin@docker.com>
1 parent b8802e9 commit e072e98

13 files changed

Lines changed: 380 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# syntax=docker/dockerfile:1
2+
3+
FROM scratch
4+
ARG TARGETOS
5+
ARG TARGETARCH
6+
ARG ACCEL
7+
COPY --from=release-artifacts /com.docker.llama-server.native.$TARGETOS.$ACCEL.$TARGETARCH /com.docker.llama-server.native.$TARGETOS.$ACCEL.$TARGETARCH

inference-engine/llamacpp/Makefile

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
ifeq ($(OS),Windows_NT)
2+
DETECTED_OS := Windows
3+
else
4+
UNAME_S := $(shell uname -s)
5+
ifeq ($(UNAME_S),Linux)
6+
DETECTED_OS := Linux
7+
endif
8+
ifeq ($(UNAME_S),Darwin)
9+
DETECTED_OS := macOS
10+
endif
11+
endif
12+
13+
BUILD_DIR := build
14+
INSTALL_DIR := install
15+
NATIVE_DIR := native
16+
17+
.PHONY: build clean install-deps install-dir
18+
19+
build: install-deps
20+
ifeq ($(DETECTED_OS),macOS)
21+
@echo "Building for macOS..."
22+
@echo "Configuring CMake..."
23+
cmake -B $(BUILD_DIR) \
24+
-DCMAKE_CXX_COMPILER=clang++ \
25+
-DCMAKE_C_COMPILER=clang \
26+
-DCMAKE_BUILD_TYPE=Release \
27+
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3 \
28+
-DCMAKE_MACOSX_RPATH=ON \
29+
-DCMAKE_INSTALL_RPATH='@executable_path/../lib' \
30+
-DGGML_NATIVE=OFF \
31+
-DGGML_OPENMP=OFF \
32+
-DLLAMA_CURL=OFF \
33+
-GNinja \
34+
-S $(NATIVE_DIR)
35+
@echo "Building..."
36+
cmake --build $(BUILD_DIR) --config Release
37+
@echo "Installing..."
38+
cmake --install $(BUILD_DIR) \
39+
--config Release \
40+
--prefix $(INSTALL_DIR)
41+
@echo "Cleaning install directory..."
42+
rm -rf $(INSTALL_DIR)/lib/cmake
43+
rm -rf $(INSTALL_DIR)/lib/pkgconfig
44+
rm -rf $(INSTALL_DIR)/include
45+
@echo "Build complete! Binaries are in $(INSTALL_DIR)"
46+
else ifeq ($(DETECTED_OS),Linux)
47+
@echo "Linux build not implemented yet"
48+
@exit 1
49+
else ifeq ($(DETECTED_OS),Windows)
50+
@echo "Windows build not implemented yet"
51+
@exit 1
52+
else
53+
@echo "Unsupported OS: $(DETECTED_OS)"
54+
@exit 1
55+
endif
56+
57+
install-deps:
58+
ifeq ($(DETECTED_OS),macOS)
59+
@echo "Installing build dependencies for macOS..."
60+
@if ! command -v ninja >/dev/null 2>&1; then \
61+
echo "Installing Ninja..."; \
62+
brew install ninja; \
63+
else \
64+
echo "Ninja already installed"; \
65+
fi
66+
else ifeq ($(DETECTED_OS),Linux)
67+
@echo "Linux dependency installation not implemented yet"
68+
@exit 1
69+
else ifeq ($(DETECTED_OS),Windows)
70+
@echo "Windows dependency installation not implemented yet"
71+
@exit 1
72+
else
73+
@echo "Unsupported OS: $(DETECTED_OS)"
74+
@exit 1
75+
endif
76+
77+
clean:
78+
rm -rf $(BUILD_DIR)
79+
rm -rf $(INSTALL_DIR)
80+
81+
install-dir:
82+
@echo "$(INSTALL_DIR)"
83+
84+
help:
85+
@echo "Available targets:"
86+
@echo " build - Build llama.cpp (macOS only for now)"
87+
@echo " install-deps - Install build dependencies"
88+
@echo " install-dir - Print install directory path"
89+
@echo " clean - Clean build artifacts"
90+
@echo " help - Show this help"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# llama.cpp inference runtime
2+
3+
This repo contains implementations of the llama.cpp inference runtime.
4+
5+
* native/ - contains an implementaion based on `llama.cpp`'s native server
6+
implementation
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
build/
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
cmake_minimum_required(VERSION 3.13)
2+
3+
project(
4+
com.docker.llama-server.native
5+
DESCRIPTION "DD inference server, based on llama.cpp native server"
6+
LANGUAGES C CXX
7+
)
8+
9+
option(DDLLAMA_BUILD_SERVER "Build the DD llama.cpp server executable" ON)
10+
option(DDLLAMA_BUILD_UTILS "Build utilities, e.g. nv-gpu-info" OFF)
11+
set(DDLLAMA_PATCH_COMMAND "patch" CACHE STRING "patch command")
12+
13+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
14+
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
15+
16+
if (DDLLAMA_BUILD_SERVER)
17+
set(LLAMA_BUILD_COMMON ON)
18+
add_subdirectory(vendor/llama.cpp)
19+
add_subdirectory(vendor/llama.cpp/tools/mtmd)
20+
add_subdirectory(src/server)
21+
endif()
22+
23+
if (WIN32 AND DDLLAMA_BUILD_UTILS)
24+
add_subdirectory(src/nv-gpu-info)
25+
endif()
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Native llama-server for DD
2+
3+
## Building
4+
5+
cmake -B build
6+
cmake --build build --parallel 8 --config Release
7+
8+
## Running
9+
10+
DD_INF_UDS=<socket path> ./build/bin/com.docker.llama-server --model <path to model>
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# syntax=docker/dockerfile:1
2+
3+
ARG CUDA_VERSION=12.9.0
4+
ARG CUDA_IMAGE_VARIANT=ubuntu24.04
5+
6+
FROM nvidia/cuda:${CUDA_VERSION}-devel-${CUDA_IMAGE_VARIANT} AS builder
7+
8+
ARG TARGETARCH
9+
ARG CUDA_IMAGE_VARIANT
10+
11+
COPY native/install-clang.sh .
12+
RUN ./install-clang.sh "${CUDA_IMAGE_VARIANT}"
13+
14+
WORKDIR /llama-server
15+
16+
COPY .git .git
17+
COPY native/CMakeLists.txt .
18+
COPY native/src src
19+
COPY native/vendor vendor
20+
21+
# Fix submodule .git file to point to correct location in container
22+
RUN echo "gitdir: ../../.git/modules/native/vendor/llama.cpp" > vendor/llama.cpp/.git && \
23+
sed -i 's|worktree = ../../../../../native/vendor/llama.cpp|worktree = /llama-server/vendor/llama.cpp|' .git/modules/native/vendor/llama.cpp/config
24+
25+
ENV CC=/usr/bin/clang-20
26+
ENV CXX=/usr/bin/clang++-20
27+
RUN echo "-B build \
28+
-DCMAKE_BUILD_TYPE=Release \
29+
-DBUILD_SHARED_LIBS=ON \
30+
-DGGML_BACKEND_DL=ON \
31+
-DGGML_CPU_ALL_VARIANTS=ON \
32+
-DGGML_NATIVE=OFF \
33+
-DGGML_OPENMP=OFF \
34+
-DGGML_CUDA=ON \
35+
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
36+
-DLLAMA_CURL=OFF \
37+
-GNinja \
38+
-S ." > cmake-flags
39+
RUN cmake $(cat cmake-flags)
40+
RUN cmake --build build --config Release
41+
RUN cmake --install build --config Release --prefix install
42+
43+
RUN rm install/bin/*.py
44+
RUN rm -r install/lib/cmake
45+
RUN rm -r install/lib/pkgconfig
46+
RUN rm -r install/include
47+
48+
FROM scratch AS final
49+
50+
ARG TARGETARCH
51+
ARG CUDA_VERSION
52+
53+
COPY --from=builder /llama-server/install /com.docker.llama-server.native.linux.cuda$CUDA_VERSION.$TARGETARCH
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# syntax=docker/dockerfile:1
2+
3+
ARG BASE_IMAGE=ubuntu:25.10
4+
5+
FROM ${BASE_IMAGE} AS builder
6+
7+
ARG TARGETARCH
8+
9+
RUN apt-get update && apt-get install -y cmake ninja-build git build-essential curl
10+
11+
COPY native/install-vulkan.sh .
12+
RUN ./install-vulkan.sh
13+
14+
ENV VULKAN_SDK=/opt/vulkan
15+
ENV PATH=$VULKAN_SDK/bin:$PATH
16+
ENV LD_LIBRARY_PATH=$VULKAN_SDK/lib
17+
ENV CMAKE_PREFIX_PATH=$VULKAN_SDK
18+
ENV PKG_CONFIG_PATH=$VULKAN_SDK/lib/pkgconfig
19+
20+
WORKDIR /llama-server
21+
22+
COPY .git .git
23+
COPY native/CMakeLists.txt .
24+
COPY native/src src
25+
COPY native/vendor vendor
26+
27+
# Fix submodule .git file to point to correct location in container
28+
RUN echo "gitdir: ../../.git/modules/native/vendor/llama.cpp" > vendor/llama.cpp/.git && \
29+
sed -i 's|worktree = ../../../../../native/vendor/llama.cpp|worktree = /llama-server/vendor/llama.cpp|' .git/modules/native/vendor/llama.cpp/config
30+
31+
RUN echo "-B build \
32+
-DCMAKE_BUILD_TYPE=Release \
33+
-DGGML_NATIVE=OFF \
34+
-DGGML_OPENMP=OFF \
35+
-DLLAMA_CURL=OFF \
36+
-DGGML_VULKAN=ON \
37+
-GNinja \
38+
-S ." > cmake-flags
39+
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
40+
echo " -DBUILD_SHARED_LIBS=ON \
41+
-DGGML_BACKEND_DL=ON \
42+
-DGGML_CPU_ALL_VARIANTS=ON" >> cmake-flags; \
43+
elif [ "${TARGETARCH}" = "arm64" ]; then \
44+
echo " -DBUILD_SHARED_LIBS=OFF" >> cmake-flags; \
45+
else \
46+
echo "${TARGETARCH} is not supported"; \
47+
exit 1; \
48+
fi
49+
RUN cmake $(cat cmake-flags)
50+
RUN cmake --build build --config Release -j 4
51+
RUN cmake --install build --config Release --prefix install
52+
53+
RUN rm install/bin/*.py
54+
RUN rm -r install/lib/cmake
55+
RUN rm -r install/lib/pkgconfig
56+
RUN rm -r install/include
57+
58+
FROM scratch AS final
59+
60+
ARG TARGETARCH
61+
62+
COPY --from=builder /llama-server/install /com.docker.llama-server.native.linux.cpu.$TARGETARCH
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/bash
2+
3+
main() {
4+
set -eux -o pipefail
5+
6+
apt-get update && apt-get install -y cmake ninja-build git wget gnupg2
7+
wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
8+
9+
if [ "$1" = "ubuntu22.04" ]; then
10+
echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-20 main" >> /etc/apt/sources.list
11+
echo "deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-20 main" >> /etc/apt/sources.list
12+
elif [ "$1" = "ubuntu24.04" ]; then
13+
echo "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-20 main" >> /etc/apt/sources.list
14+
echo "deb-src http://apt.llvm.org/noble/ llvm-toolchain-noble-20 main" >> /etc/apt/sources.list
15+
else
16+
echo "distro variant not supported yet"
17+
exit 1
18+
fi
19+
20+
apt-get update && apt-get install -y clang-20 lldb-20 lld-20
21+
}
22+
23+
main "$@"
24+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
main() {
4+
set -eux -o pipefail
5+
6+
apt-get install -y glslc libvulkan-dev
7+
}
8+
9+
main "$@"
10+

0 commit comments

Comments
 (0)