LLM: plugin module - initial commit

Chris Warren-Smith · Chris Warren-Smith · commit a9abd5ff62ff · 2025-12-13T07:42:10.000+10:30
diff --git a/.gitmodules b/.gitmodules
@@ -31,3 +31,6 @@
 [submodule "gtk-server/uthash"]
 	path = gtk-server/uthash
 	url = https://github.com/troydhanson/uthash.git
+[submodule "llama/llama.cpp"]
+	path = llama/llama.cpp
+	url = https://github.com/ggerganov/llama.cpp
diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
@@ -0,0 +1,113 @@
+cmake_minimum_required(VERSION 3.15)
+project(llm_plugin C CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_C_STANDARD 11)
+
+# -----------------------------
+# Path to llama.cpp
+# -----------------------------
+set(LLAMA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp)
+
+# -----------------------------
+# FORCE CPU-only static builds
+# -----------------------------
+# Disable all shared libraries globally
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
+
+# llama.cpp specific static settings
+set(LLAMA_STATIC ON CACHE BOOL "" FORCE)
+set(LLAMA_SHARED OFF CACHE BOOL "" FORCE)
+set(LLAMA_BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
+set(LLAMA_BUILD_LLAMA_SHARED OFF CACHE BOOL "" FORCE)
+set(LLAMA_BUILD_GGML_SHARED OFF CACHE BOOL "" FORCE)
+set(LLAMA_SERVER_BUILD OFF CACHE BOOL "" FORCE)
+set(LLAMA_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+set(LLAMA_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
+
+# ggml specific static settings
+set(GGML_STATIC ON CACHE BOOL "" FORCE)
+set(GGML_SHARED OFF CACHE BOOL "" FORCE)
+set(GGML_BUILD_SHARED OFF CACHE BOOL "" FORCE)
+set(GGML_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+set(GGML_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
+
+# CPU-only flags
+set(GGML_CUDA OFF CACHE BOOL "" FORCE)
+set(GGML_METAL OFF CACHE BOOL "" FORCE)
+set(GGML_OPENCL OFF CACHE BOOL "" FORCE)
+set(GGML_KOMPUTE OFF CACHE BOOL "" FORCE)
+set(GGML_SYCL OFF CACHE BOOL "" FORCE)
+set(GGML_ACCELERATE OFF CACHE BOOL "" FORCE)
+set(GGML_NATIVE ON CACHE BOOL "" FORCE)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# -----------------------------
+# Add llama.cpp subdirectories
+# -----------------------------
+add_subdirectory(${LLAMA_DIR}/ggml)
+add_subdirectory(${LLAMA_DIR})
+
+# -----------------------------
+# Plugin sources
+# -----------------------------
+set(PLUGIN_SOURCES
+  main.cpp
+  ../include/param.cpp
+  ../include/hashmap.cpp
+  ../include/apiexec.cpp
+)
+
+# -----------------------------
+# Build plugin as a shared library (.so)
+# -----------------------------
+add_library(llm_plugin SHARED ${PLUGIN_SOURCES})
+
+target_include_directories(llm_plugin PRIVATE
+  ${LLAMA_DIR}/include
+  ${LLAMA_DIR}/ggml/include
+  ${CMAKE_CURRENT_SOURCE_DIR}/../include
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+)
+
+target_link_libraries(llm_plugin PRIVATE
+  llama
+  ggml
+)
+
+# Include all static code into plugin
+target_link_options(llm_plugin PRIVATE
+  -Wl,--whole-archive
+    $<TARGET_FILE:llama>
+    $<TARGET_FILE:ggml>
+  -Wl,--no-whole-archive
+)
+
+# Ensure position-independent code for .so
+set_target_properties(llm_plugin PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
+)
+
+# -----------------------------
+# Optional test application
+# -----------------------------
+add_executable(llm_test
+  test_main.cpp
+)
+
+target_include_directories(llm_test PRIVATE
+  ${LLAMA_DIR}/include
+  ${LLAMA_DIR}/ggml/include
+  ${CMAKE_CURRENT_SOURCE_DIR}/../include
+)
+
+target_link_libraries(llm_test PRIVATE
+  llm_plugin
+  llama
+  ggml
+)
+
+set_target_properties(llm_test PROPERTIES
+  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
+)
diff --git a/llama/llama.cpp b/llama/llama.cpp
@@ -0,0 +1 @@
+Subproject commit e4ae38331702aeb43b6ecc3f912d626171c9862a
diff --git a/llama/main.cpp b/llama/main.cpp
@@ -0,0 +1,74 @@
+// This file is part of SmallBASIC
+//
+// This program is distributed under the terms of the GPL v2.0 or later
+// Download the GNU Public License (GPL) from www.gnu.org
+//
+// Copyright(C) 2026 Chris Warren-Smith
+
+#include "config.h"
+
+#include <cstring>
+#include <cstdio>
+#include "robin-hood-hashing/src/include/robin_hood.h"
+#include "include/log.h"
+#include "include/var.h"
+#include "include/module.h"
+#include "include/param.h"
+
+#define CLASS_IOTASK_ID 1
+
+int g_nextId = 1;
+
+FUNC_SIG lib_func[] = {
+};
+
+FUNC_SIG lib_proc[] = {
+};
+
+SBLIB_API int sblib_proc_count() {
+  return 0;
+}
+
+SBLIB_API int sblib_func_count() {
+  return 0;
+}
+
+//
+// Program startup
+//
+int sblib_init(const char *sourceFile) {
+  return 1;
+}
+
+#if defined(ANDROID_MODULE)
+//
+// Retrieves the _app->activity->clazz value sent from App/JNI to Java to IOIOLoader
+//
+extern "C" JNIEXPORT void JNICALL Java_ioio_smallbasic_android_ModuleLoader_init
+  (JNIEnv *env, jclass clazz, jobject activity) {
+  logEntered();
+  jclass longClass = env->FindClass("java/lang/Long");
+  jmethodID longValueMethod = env->GetMethodID(longClass, "longValue", "()J");
+  g_activity = (jobject)env->CallLongMethod(activity, longValueMethod);
+  g_env = env;
+}
+
+#endif
+
+//
+// Release ioio variables falling out of scope
+//
+SBLIB_API void sblib_free(int cls_id, int id) {
+  if (id != -1) {
+    switch (cls_id) {
+    case CLASS_IOTASK_ID:
+      break;
+    }
+  }
+}
+
+//
+// Program termination
+//
+void sblib_close(void) {
+}
diff --git a/llama/test_main.cpp b/llama/test_main.cpp
@@ -0,0 +1,112 @@
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include <vector>
+
+#define LLAMA_BUILD_EXAMPLES
+#include "llama.h"
+
+int main() {
+  const char *model_path = "model.gguf";
+
+  // --- Model load ---
+  llama_model_params model_params = llama_model_default_params();
+  llama_model *model = llama_model_load_from_file(model_path, model_params);
+  if (!model) {
+    fprintf(stderr, "failed to load model\n");
+    return 1;
+  }
+
+  const llama_vocab *vocab = llama_model_get_vocab(model);
+
+  // --- Context creation ---
+  llama_context_params ctx_params = llama_context_default_params();
+  llama_context *ctx = llama_init_from_model(model, ctx_params);
+  if (!ctx) {
+    fprintf(stderr, "failed to create context\n");
+    llama_model_free(model);
+    return 1;
+  }
+
+  // --- Tokenize ---
+  std::string prompt = "Hello, Llama!";
+
+  int n_tokens = llama_tokenize(
+                                vocab,
+                                prompt.c_str(),
+                                prompt.size(),
+                                nullptr,
+                                0,
+                                true,   // add BOS
+                                false
+                                );
+
+  if (n_tokens <= 0) return 1;
+
+  std::vector<llama_token> tokens(n_tokens);
+
+  llama_tokenize(
+                 vocab,
+                 prompt.c_str(),
+                 prompt.size(),
+                 tokens.data(),
+                 n_tokens,
+                 true,
+                 false
+                 );
+
+  // --- Build batch for prompt ---
+  llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
+
+  for (size_t i = 0; i < tokens.size(); i++) {
+    batch.token[i]      = tokens[i];
+    batch.pos[i]        = i;
+    batch.seq_id[i]     = 0;
+    batch.logits[i]     = false;
+    batch.n_tokens++;
+  }
+
+  if (llama_decode(ctx, batch) != 0) {
+    fprintf(stderr, "decode failed\n");
+    return 1;
+  }
+
+  // --- Sampler ---
+  llama_sampler *sampler = llama_sampler_init_greedy();
+
+  // --- Generation loop ---
+  for (int i = 0; i < 100; i++) {
+    llama_token tok = llama_sampler_sample(sampler, ctx, 0);
+
+    //std::string piece = llama_vocab_token_to_piece(vocab, tok);
+    char piece_buf[512];
+    int32_t piece_len = llama_token_to_piece(
+                                             vocab,
+                                             tok,
+                                             piece_buf,
+                                             sizeof(piece_buf),
+                                             0,      // lstrip
+                                             true    // print special tokens
+                                             );
+
+    if (piece_len > 0) {
+      printf("%s", piece_buf);
+    }
+
+    // feed token back
+    llama_batch next = llama_batch_init(1, 0, 1);
+    next.token[0] = tok;
+    next.pos[0]   = 0;
+    next.seq_id[0]= 0;
+    next.logits[0]= false;
+    next.n_tokens = 1;
+
+    if (llama_decode(ctx, next) != 0) break;
+  }
+
+  printf("\n");
+
+  llama_sampler_free(sampler);
+  llama_free(ctx);
+  llama_model_free(model);
+}