Merge pull request #546 from doringeman/fix-llamacpp-makefile

doringeman · web-flow · commit fe10622b7c06 · 2026-01-08T16:33:32.000+02:00
Fix unforked llamacpp
diff --git a/llamacpp/Makefile b/llamacpp/Makefile
@@ -30,6 +30,9 @@ ifeq ($(DETECTED_OS),macOS)
 		-DGGML_NATIVE=OFF \
 		-DGGML_OPENMP=OFF \
 		-DLLAMA_CURL=OFF \
+		-DLLAMA_BUILD_COMMON=ON \
+		-DLLAMA_BUILD_SERVER=ON \
+		-DLLAMA_BUILD_TOOLS=ON \
 		-GNinja \
 		-S $(NATIVE_DIR)
 	@echo "Building..."
@@ -43,6 +46,9 @@ ifeq ($(DETECTED_OS),macOS)
 	rm -rf $(INSTALL_DIR)/lib/cmake
 	rm -rf $(INSTALL_DIR)/lib/pkgconfig
 	rm -rf $(INSTALL_DIR)/include
+	@echo "Fixing rpath..."
+	install_name_tool -delete_rpath "$(CURDIR)/$(BUILD_DIR)/bin" $(INSTALL_DIR)/bin/com.docker.llama-server
+	install_name_tool -add_rpath "@executable_path/../lib" $(INSTALL_DIR)/bin/com.docker.llama-server
 	@echo "Build complete! Binaries are in $(INSTALL_DIR)"
 else ifeq ($(DETECTED_OS),Linux)
 	@echo "Linux build not implemented yet"
@@ -80,16 +86,16 @@ clean:
 	rm -rf $(INSTALL_DIR)
 
 build-dir:
-	@echo "$(shell pwd)/$(BUILD_DIR)"
+	@echo "$(CURDIR)/$(BUILD_DIR)"
 
 install-dir:
-	@echo "$(shell pwd)/$(INSTALL_DIR)"
+	@echo "$(CURDIR)/$(INSTALL_DIR)"
 
 help:
 	@echo "Available targets:"
-	@echo "  build      	- Build llama.cpp (macOS only for now)"
-	@echo "  install-deps	- Install build dependencies"
-	@echo "  build-dir		- Print build directory path"
-	@echo "  install-dir	- Print install directory path"
-	@echo "  clean       	- Clean build artifacts"
-	@echo "  help        	- Show this help"
+	@echo "  build        - Build llama.cpp (macOS only for now)"
+	@echo "  install-deps - Install build dependencies"
+	@echo "  build-dir    - Print build directory path"
+	@echo "  install-dir  - Print install directory path"
+	@echo "  clean        - Clean build artifacts"
+	@echo "  help         - Show this help"
diff --git a/llamacpp/native/CMakeLists.txt b/llamacpp/native/CMakeLists.txt
@@ -41,10 +41,8 @@ if (DDLLAMA_BUILD_SERVER)
 
     add_custom_target(com.docker.llama-server ALL DEPENDS "${LLAMA_SERVER_DST}")
 
-    # Install the renamed binary using TARGETS instead of PROGRAMS for better cross-platform support
-    install(TARGETS llama-server
-        RUNTIME DESTINATION bin
-        RENAME "com.docker.llama-server${CMAKE_EXECUTABLE_SUFFIX}")
+    # Install the renamed binary
+    install(PROGRAMS "${LLAMA_SERVER_DST}" DESTINATION bin)
 endif()
 
 if (WIN32 AND DDLLAMA_BUILD_UTILS)
diff --git a/pkg/inference/scheduling/runner.go b/pkg/inference/scheduling/runner.go
@@ -222,8 +222,9 @@ func (r *runner) wait(ctx context.Context) error {
 			return r.err
 		default:
 		}
-		// Create and execute a request targeting a known-valid endpoint.
-		readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/v1/models", http.NoBody)
+		// Create and execute a request targeting the health endpoint.
+		// Note: /health returns 503 during model loading, 200 when ready.
+		readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/health", http.NoBody)
 		if err != nil {
 			return fmt.Errorf("readiness request creation failed: %w", err)
 		}

Original file line number	Diff line number	Diff line change
`@@ -222,8 +222,9 @@ func (r *runner) wait(ctx context.Context) error {`
`222`	`222`	`return r.err`
`223`	`223`	`default:`
`224`	`224`	`}`
`225`		`- // Create and execute a request targeting a known-valid endpoint.`
`226`		`- readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/v1/models", http.NoBody)`
	`225`	`+ // Create and execute a request targeting the health endpoint.`
	`226`	`+ // Note: /health returns 503 during model loading, 200 when ready.`
	`227`	`+ readyRequest, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/health", http.NoBody)`
`227`	`228`	`if err != nil {`
`228`	`229`	`return fmt.Errorf("readiness request creation failed: %w", err)`
`229`	`230`	`}`