Skip to content

Commit 50c5c2a

Browse files
author
Chris Warren-Smith
committed
LLM: plugin module - initial commit
1 parent a9abd5f commit 50c5c2a

6 files changed

Lines changed: 611 additions & 124 deletions

File tree

llama/CMakeLists.txt

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
cmake_minimum_required(VERSION 3.15)
2-
project(llm_plugin C CXX)
2+
project(llm C CXX)
33

44
set(CMAKE_CXX_STANDARD 17)
55
set(CMAKE_C_STANDARD 11)
@@ -49,42 +49,40 @@ add_subdirectory(${LLAMA_DIR}/ggml)
4949
add_subdirectory(${LLAMA_DIR})
5050

5151
# -----------------------------
52-
# Plugin sources
52+
# Build plugin as a shared library (.so)
5353
# -----------------------------
5454
set(PLUGIN_SOURCES
5555
main.cpp
56+
llama-sb.cpp
5657
../include/param.cpp
5758
../include/hashmap.cpp
5859
../include/apiexec.cpp
5960
)
6061

61-
# -----------------------------
62-
# Build plugin as a shared library (.so)
63-
# -----------------------------
64-
add_library(llm_plugin SHARED ${PLUGIN_SOURCES})
62+
add_library(llm SHARED ${PLUGIN_SOURCES})
6563

66-
target_include_directories(llm_plugin PRIVATE
64+
target_include_directories(llm PRIVATE
6765
${LLAMA_DIR}/include
6866
${LLAMA_DIR}/ggml/include
6967
${CMAKE_CURRENT_SOURCE_DIR}/../include
7068
${CMAKE_CURRENT_SOURCE_DIR}/..
7169
)
7270

73-
target_link_libraries(llm_plugin PRIVATE
71+
target_link_libraries(llm PRIVATE
7472
llama
7573
ggml
7674
)
7775

7876
# Include all static code into plugin
79-
target_link_options(llm_plugin PRIVATE
77+
target_link_options(llm PRIVATE
8078
-Wl,--whole-archive
81-
$<TARGET_FILE:llama>
82-
$<TARGET_FILE:ggml>
79+
$<TARGET_FILE:llama>
80+
$<TARGET_FILE:ggml>
8381
-Wl,--no-whole-archive
8482
)
8583

8684
# Ensure position-independent code for .so
87-
set_target_properties(llm_plugin PROPERTIES
85+
set_target_properties(llm PROPERTIES
8886
POSITION_INDEPENDENT_CODE ON
8987
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
9088
)
@@ -103,11 +101,41 @@ target_include_directories(llm_test PRIVATE
103101
)
104102

105103
target_link_libraries(llm_test PRIVATE
106-
llm_plugin
104+
llm
107105
llama
108106
ggml
109107
)
110108

111109
set_target_properties(llm_test PROPERTIES
112110
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
113111
)
112+
113+
# ------------------------------------------------------------------
114+
# Android native library
115+
# ------------------------------------------------------------------
116+
if (ANDROID)
117+
# CMake sets ANDROID when using the Android toolchain
118+
# Re‑use the same source files for the Android .so
119+
add_library(llm_android SHARED
120+
main.cpp
121+
llama-sb.cpp
122+
../include/param.cpp
123+
../include/hashmap.cpp
124+
../include/apiexec.cpp
125+
)
126+
127+
# Optional: set the SONAME / versioning if you need it
128+
set_target_properties(llm_android PROPERTIES
129+
OUTPUT_NAME "libllm"
130+
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${ANDROID_ABI}")
131+
132+
target_link_libraries(llm_test PRIVATE
133+
log
134+
llm
135+
llama
136+
ggml
137+
)
138+
139+
# Export the location so Gradle can copy it later
140+
set(MY_NATIVE_LIB_PATH "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${ANDROID_ABI}/libllm.so")
141+
endif()

llama/llama-sb.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// This file is part of SmallBASIC
2+
//
3+
// This program is distributed under the terms of the GPL v2.0 or later
4+
// Download the GNU Public License (GPL) from www.gnu.org
5+
//
6+
// Copyright(C) 2026 Chris Warren-Smith
7+
8+
#include <cstdio>
9+
#include <cstring>
10+
#include <string>
11+
#include <vector>
12+
13+
#include "llama.h"
14+
#include "llama-sb.h"
15+
16+
Llama::Llama() :
17+
_model(nullptr),
18+
_ctx(nullptr),
19+
_sampler(nullptr),
20+
_vocab(nullptr),
21+
_temperature(0),
22+
_n_ctx(0) {
23+
}
24+
25+
bool Llama::create(string model_path, int n_ctx, bool disable_log) {
26+
if (disable_log) {
27+
// only print errors
28+
llama_log_set([](enum ggml_log_level level, const char * text, void * /* user_data */) {
29+
if (level >= GGML_LOG_LEVEL_ERROR) {
30+
fprintf(stderr, "%s", text);
31+
}
32+
}, nullptr);
33+
}
34+
35+
ggml_backend_load_all();
36+
37+
llama_model_params mparams = llama_model_default_params();
38+
mparams.n_gpu_layers = 0;
39+
40+
_model = llama_model_load_from_file(model_path.c_str(), mparams);
41+
if (!_model) {
42+
_last_error = "failed to load model";
43+
} else {
44+
llama_context_params cparams = llama_context_default_params();
45+
cparams.n_ctx = n_ctx;
46+
cparams.n_batch = n_ctx;
47+
48+
_ctx = llama_init_from_model(_model, cparams);
49+
if (!_ctx) {
50+
_last_error = "failed to create context";
51+
} else {
52+
_vocab = llama_model_get_vocab(_model);
53+
configure_sampler(0);
54+
}
55+
}
56+
return _last_error.empty();
57+
}
58+
59+
Llama::~Llama() {
60+
if (_sampler) {
61+
llama_sampler_free(_sampler);
62+
}
63+
if (_ctx) {
64+
llama_free(_ctx);
65+
}
66+
if (_model) {
67+
llama_model_free(_model);
68+
}
69+
}
70+
71+
string Llama::build_chat_prompt(const string &user_msg) {
72+
_chat_prompt += "User: ";
73+
_chat_prompt += user_msg;
74+
_chat_prompt += "\nAssistant: ";
75+
return _chat_prompt;
76+
}
77+
78+
void Llama::configure_sampler(float temperature) {
79+
if (temperature != _temperature || _sampler == nullptr) {
80+
if (_sampler) {
81+
llama_sampler_free(_sampler);
82+
}
83+
auto sparams = llama_sampler_chain_default_params();
84+
_sampler = llama_sampler_chain_init(sparams);
85+
_temperature = temperature;
86+
87+
// llama_sampler_chain_reset(sampler);
88+
if (temperature <= 0.0f) {
89+
llama_sampler_chain_add(_sampler, llama_sampler_init_greedy());
90+
} else {
91+
llama_sampler_chain_add(_sampler, llama_sampler_init_temp(temperature));
92+
}
93+
}
94+
}
95+
96+
static std::vector<llama_token> tokenize(const llama_vocab *vocab, const string &text) {
97+
int n = -llama_tokenize(vocab, text.c_str(), text.size(), nullptr, 0, true, true);
98+
std::vector<llama_token> tokens(n);
99+
llama_tokenize(vocab, text.c_str(), text.size(), tokens.data(), tokens.size(), true, true);
100+
return tokens;
101+
}
102+
103+
string Llama::generate(const string &prompt, int max_tokens, float temperature, bool echo, bool clear_cache) {
104+
string out;
105+
106+
if (clear_cache) {
107+
// llama_kv_cache_clear(_ctx);
108+
}
109+
110+
auto prompt_tokens = tokenize(_vocab, prompt);
111+
configure_sampler(temperature);
112+
113+
llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());
114+
115+
if (llama_decode(_ctx, batch)) {
116+
_last_error = "decode failed";
117+
return out;
118+
}
119+
120+
if (echo) {
121+
out += prompt;
122+
}
123+
124+
for (int i = 0; i < max_tokens; ++i) {
125+
llama_token tok = llama_sampler_sample(_sampler, _ctx, -1);
126+
127+
if (llama_vocab_is_eog(_vocab, tok)) {
128+
break;
129+
}
130+
131+
char buf[128];
132+
int n = llama_token_to_piece(_vocab, tok, buf, sizeof(buf), 0, true);
133+
134+
if (n > 0) {
135+
out.append(buf, n);
136+
}
137+
batch = llama_batch_get_one(&tok, 1);
138+
if (llama_decode(_ctx, batch)) {
139+
break;
140+
}
141+
}
142+
143+
return out;
144+
}
145+

llama/llama-sb.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// This file is part of SmallBASIC
2+
//
3+
// This program is distributed under the terms of the GPL v2.0 or later
4+
// Download the GNU Public License (GPL) from www.gnu.org
5+
//
6+
// Copyright(C) 2026 Chris Warren-Smith
7+
8+
#pragma once
9+
10+
#include <string>
11+
#include "llama.h"
12+
13+
using namespace std;
14+
15+
struct Llama {
16+
explicit Llama();
17+
~Llama();
18+
19+
bool create(string model_path, int n_ctx, bool disable_log);
20+
string generate(const string &prompt,
21+
int max_tokens = 128,
22+
float temperature = 0.8f,
23+
bool echo = true,
24+
bool clear_cache = true);
25+
const char *last_error() { return _last_error.c_str(); }
26+
27+
private:
28+
string build_chat_prompt(const string &user_msg);
29+
void configure_sampler(float temperature);
30+
31+
llama_model *_model;
32+
llama_context *_ctx;
33+
llama_sampler *_sampler;
34+
const llama_vocab *_vocab;
35+
string _chat_prompt;
36+
string _last_error;
37+
float _temperature;
38+
int _n_ctx;
39+
};

llama/llama.cpp

0 commit comments

Comments
 (0)