LLM: plugin module - initial commit

Chris Warren-Smith · Chris Warren-Smith · commit 50c5c2ae1e6c · 2025-12-16T10:34:48.000+10:30
diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.15)
-project(llm_plugin C CXX)
+project(llm C CXX)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_C_STANDARD 11)
@@ -49,42 +49,40 @@ add_subdirectory(${LLAMA_DIR}/ggml)
 add_subdirectory(${LLAMA_DIR})
 
 # -----------------------------
-# Plugin sources
+# Build plugin as a shared library (.so)
 # -----------------------------
 set(PLUGIN_SOURCES
   main.cpp
+  llama-sb.cpp
   ../include/param.cpp
   ../include/hashmap.cpp
   ../include/apiexec.cpp
 )
 
-# -----------------------------
-# Build plugin as a shared library (.so)
-# -----------------------------
-add_library(llm_plugin SHARED ${PLUGIN_SOURCES})
+add_library(llm SHARED ${PLUGIN_SOURCES})
 
-target_include_directories(llm_plugin PRIVATE
+target_include_directories(llm PRIVATE
   ${LLAMA_DIR}/include
   ${LLAMA_DIR}/ggml/include
   ${CMAKE_CURRENT_SOURCE_DIR}/../include
   ${CMAKE_CURRENT_SOURCE_DIR}/..
 )
 
-target_link_libraries(llm_plugin PRIVATE
+target_link_libraries(llm PRIVATE
   llama
   ggml
 )
 
 # Include all static code into plugin
-target_link_options(llm_plugin PRIVATE
+target_link_options(llm PRIVATE
   -Wl,--whole-archive
-    $<TARGET_FILE:llama>
-    $<TARGET_FILE:ggml>
+  $<TARGET_FILE:llama>
+  $<TARGET_FILE:ggml>
   -Wl,--no-whole-archive
 )
 
 # Ensure position-independent code for .so
-set_target_properties(llm_plugin PROPERTIES
+set_target_properties(llm PROPERTIES
   POSITION_INDEPENDENT_CODE ON
   LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
 )
@@ -103,11 +101,41 @@ target_include_directories(llm_test PRIVATE
 )
 
 target_link_libraries(llm_test PRIVATE
-  llm_plugin
+  llm
   llama
   ggml
 )
 
 set_target_properties(llm_test PROPERTIES
   RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
 )
+
+# ------------------------------------------------------------------
+# Android native library
+# ------------------------------------------------------------------
+if (ANDROID)
+  # CMake sets ANDROID when using the Android toolchain
+  # Re‑use the same source files for the Android .so
+  add_library(llm_android SHARED
+    main.cpp
+    llama-sb.cpp
+    ../include/param.cpp
+    ../include/hashmap.cpp
+    ../include/apiexec.cpp
+  )
+   
+  # Optional: set the SONAME / versioning if you need it
+  set_target_properties(llm_android PROPERTIES
+    OUTPUT_NAME "libllm"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${ANDROID_ABI}")
+
+  target_link_libraries(llm_test PRIVATE
+    log
+    llm
+    llama
+    ggml
+  )
+
+  # Export the location so Gradle can copy it later
+  set(MY_NATIVE_LIB_PATH "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${ANDROID_ABI}/libllm.so")
+endif()
diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
@@ -0,0 +1,145 @@
+// This file is part of SmallBASIC
+//
+// This program is distributed under the terms of the GPL v2.0 or later
+// Download the GNU Public License (GPL) from www.gnu.org
+//
+// Copyright(C) 2026 Chris Warren-Smith
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "llama.h"
+#include "llama-sb.h"
+
+Llama::Llama() :
+  _model(nullptr),
+  _ctx(nullptr),
+  _sampler(nullptr),
+  _vocab(nullptr),
+  _temperature(0),
+  _n_ctx(0) {
+}
+
+bool Llama::create(string model_path, int n_ctx, bool disable_log) {
+  if (disable_log) {
+    // only print errors
+    llama_log_set([](enum ggml_log_level level, const char * text, void * /* user_data */) {
+      if (level >= GGML_LOG_LEVEL_ERROR) {
+        fprintf(stderr, "%s", text);
+      }
+    }, nullptr);
+  }
+
+  ggml_backend_load_all();
+
+  llama_model_params mparams = llama_model_default_params();
+  mparams.n_gpu_layers = 0;
+
+  _model = llama_model_load_from_file(model_path.c_str(), mparams);
+  if (!_model) {
+    _last_error = "failed to load model";
+  } else {
+    llama_context_params cparams = llama_context_default_params();
+    cparams.n_ctx   = n_ctx;
+    cparams.n_batch = n_ctx;
+
+    _ctx = llama_init_from_model(_model, cparams);
+    if (!_ctx) {
+      _last_error = "failed to create context";
+    } else {
+      _vocab = llama_model_get_vocab(_model);
+      configure_sampler(0);
+    }
+  }
+  return _last_error.empty();
+}
+
+Llama::~Llama() {
+  if (_sampler) {
+    llama_sampler_free(_sampler);
+  }
+  if (_ctx) {
+    llama_free(_ctx);
+  }
+  if (_model) {
+    llama_model_free(_model);
+  }
+}
+
+string Llama::build_chat_prompt(const string &user_msg) {
+  _chat_prompt += "User: ";
+  _chat_prompt += user_msg;
+  _chat_prompt += "\nAssistant: ";
+  return _chat_prompt;
+}
+
+void Llama::configure_sampler(float temperature) {
+  if (temperature != _temperature || _sampler == nullptr) {
+    if (_sampler) {
+      llama_sampler_free(_sampler);
+    }
+    auto sparams = llama_sampler_chain_default_params();
+    _sampler = llama_sampler_chain_init(sparams);
+    _temperature = temperature;
+
+    //  llama_sampler_chain_reset(sampler);
+    if (temperature <= 0.0f) {
+      llama_sampler_chain_add(_sampler, llama_sampler_init_greedy());
+    } else {
+      llama_sampler_chain_add(_sampler, llama_sampler_init_temp(temperature));
+    }
+  }
+}
+
+static std::vector<llama_token> tokenize(const llama_vocab *vocab, const string &text) {
+  int n = -llama_tokenize(vocab, text.c_str(), text.size(), nullptr, 0, true, true);
+  std::vector<llama_token> tokens(n);
+  llama_tokenize(vocab, text.c_str(), text.size(), tokens.data(), tokens.size(), true, true);
+  return tokens;
+}
+
+string Llama::generate(const string &prompt, int max_tokens, float temperature, bool echo, bool clear_cache) {
+  string out;
+
+  if (clear_cache) {
+    // llama_kv_cache_clear(_ctx);
+  }
+
+  auto prompt_tokens = tokenize(_vocab, prompt);
+  configure_sampler(temperature);
+
+  llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());
+
+  if (llama_decode(_ctx, batch)) {
+    _last_error = "decode failed";
+    return out;
+  }
+
+  if (echo) {
+    out += prompt;
+  }
+
+  for (int i = 0; i < max_tokens; ++i) {
+    llama_token tok = llama_sampler_sample(_sampler, _ctx, -1);
+
+    if (llama_vocab_is_eog(_vocab, tok)) {
+      break;
+    }
+
+    char buf[128];
+    int n = llama_token_to_piece(_vocab, tok, buf, sizeof(buf), 0, true);
+
+    if (n > 0) {
+      out.append(buf, n);
+    }
+    batch = llama_batch_get_one(&tok, 1);
+    if (llama_decode(_ctx, batch)) {
+      break;
+    }
+  }
+
+  return out;
+}
+
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
@@ -0,0 +1,39 @@
+// This file is part of SmallBASIC
+//
+// This program is distributed under the terms of the GPL v2.0 or later
+// Download the GNU Public License (GPL) from www.gnu.org
+//
+// Copyright(C) 2026 Chris Warren-Smith
+
+#pragma once
+
+#include <string>
+#include "llama.h"
+
+using namespace std;
+
+struct Llama {
+  explicit Llama();
+  ~Llama();
+
+  bool create(string model_path, int n_ctx, bool disable_log);
+  string generate(const string &prompt,
+                  int max_tokens = 128,
+                  float temperature = 0.8f,
+                  bool echo = true,
+                  bool clear_cache = true);
+  const char *last_error() { return _last_error.c_str(); }
+
+  private:
+  string build_chat_prompt(const string &user_msg);
+  void configure_sampler(float temperature);
+
+  llama_model *_model;
+  llama_context *_ctx;
+  llama_sampler *_sampler;
+  const llama_vocab *_vocab;
+  string _chat_prompt;
+  string _last_error;
+  float _temperature;
+  int _n_ctx;
+};
diff --git a/llama/llama.cpp b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit e4ae38331702aeb43b6ecc3f912d626171c9862a
+Subproject commit 380b4c984e06f8d8381392d15814b3392e39560e
diff --git a/llama/main.cpp b/llama/main.cpp
diff --git a/llama/test_main.cpp b/llama/test_main.cpp