Skip to content

Commit a9abd5f

Browse files
author
Chris Warren-Smith
committed
LLM: plugin module - initial commit
1 parent 53939fd commit a9abd5f

5 files changed

Lines changed: 303 additions & 0 deletions

File tree

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,6 @@
3131
[submodule "gtk-server/uthash"]
3232
path = gtk-server/uthash
3333
url = https://github.com/troydhanson/uthash.git
34+
[submodule "llama/llama.cpp"]
35+
path = llama/llama.cpp
36+
url = https://github.com/ggerganov/llama.cpp

llama/CMakeLists.txt

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
cmake_minimum_required(VERSION 3.15)
2+
project(llm_plugin C CXX)
3+
4+
set(CMAKE_CXX_STANDARD 17)
5+
set(CMAKE_C_STANDARD 11)
6+
7+
# -----------------------------
8+
# Path to llama.cpp
9+
# -----------------------------
10+
set(LLAMA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp)
11+
12+
# -----------------------------
13+
# FORCE CPU-only static builds
14+
# -----------------------------
15+
# Disable all shared libraries globally
16+
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
17+
18+
# llama.cpp specific static settings
19+
set(LLAMA_STATIC ON CACHE BOOL "" FORCE)
20+
set(LLAMA_SHARED OFF CACHE BOOL "" FORCE)
21+
set(LLAMA_BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
22+
set(LLAMA_BUILD_LLAMA_SHARED OFF CACHE BOOL "" FORCE)
23+
set(LLAMA_BUILD_GGML_SHARED OFF CACHE BOOL "" FORCE)
24+
set(LLAMA_SERVER_BUILD OFF CACHE BOOL "" FORCE)
25+
set(LLAMA_BUILD_TESTS OFF CACHE BOOL "" FORCE)
26+
set(LLAMA_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
27+
28+
# ggml specific static settings
29+
set(GGML_STATIC ON CACHE BOOL "" FORCE)
30+
set(GGML_SHARED OFF CACHE BOOL "" FORCE)
31+
set(GGML_BUILD_SHARED OFF CACHE BOOL "" FORCE)
32+
set(GGML_BUILD_TESTS OFF CACHE BOOL "" FORCE)
33+
set(GGML_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
34+
35+
# CPU-only flags
36+
set(GGML_CUDA OFF CACHE BOOL "" FORCE)
37+
set(GGML_METAL OFF CACHE BOOL "" FORCE)
38+
set(GGML_OPENCL OFF CACHE BOOL "" FORCE)
39+
set(GGML_KOMPUTE OFF CACHE BOOL "" FORCE)
40+
set(GGML_SYCL OFF CACHE BOOL "" FORCE)
41+
set(GGML_ACCELERATE OFF CACHE BOOL "" FORCE)
42+
set(GGML_NATIVE ON CACHE BOOL "" FORCE)
43+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
44+
45+
# -----------------------------
46+
# Add llama.cpp subdirectories
47+
# -----------------------------
48+
add_subdirectory(${LLAMA_DIR}/ggml)
49+
add_subdirectory(${LLAMA_DIR})
50+
51+
# -----------------------------
52+
# Plugin sources
53+
# -----------------------------
54+
set(PLUGIN_SOURCES
55+
main.cpp
56+
../include/param.cpp
57+
../include/hashmap.cpp
58+
../include/apiexec.cpp
59+
)
60+
61+
# -----------------------------
62+
# Build plugin as a shared library (.so)
63+
# -----------------------------
64+
add_library(llm_plugin SHARED ${PLUGIN_SOURCES})
65+
66+
target_include_directories(llm_plugin PRIVATE
67+
${LLAMA_DIR}/include
68+
${LLAMA_DIR}/ggml/include
69+
${CMAKE_CURRENT_SOURCE_DIR}/../include
70+
${CMAKE_CURRENT_SOURCE_DIR}/..
71+
)
72+
73+
target_link_libraries(llm_plugin PRIVATE
74+
llama
75+
ggml
76+
)
77+
78+
# Include all static code into plugin
79+
target_link_options(llm_plugin PRIVATE
80+
-Wl,--whole-archive
81+
$<TARGET_FILE:llama>
82+
$<TARGET_FILE:ggml>
83+
-Wl,--no-whole-archive
84+
)
85+
86+
# Ensure position-independent code for .so
87+
set_target_properties(llm_plugin PROPERTIES
88+
POSITION_INDEPENDENT_CODE ON
89+
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib
90+
)
91+
92+
# -----------------------------
93+
# Optional test application
94+
# -----------------------------
95+
add_executable(llm_test
96+
test_main.cpp
97+
)
98+
99+
target_include_directories(llm_test PRIVATE
100+
${LLAMA_DIR}/include
101+
${LLAMA_DIR}/ggml/include
102+
${CMAKE_CURRENT_SOURCE_DIR}/../include
103+
)
104+
105+
target_link_libraries(llm_test PRIVATE
106+
llm_plugin
107+
llama
108+
ggml
109+
)
110+
111+
set_target_properties(llm_test PROPERTIES
112+
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
113+
)

llama/llama.cpp

Submodule llama.cpp added at e4ae383

llama/main.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// This file is part of SmallBASIC
2+
//
3+
// This program is distributed under the terms of the GPL v2.0 or later
4+
// Download the GNU Public License (GPL) from www.gnu.org
5+
//
6+
// Copyright(C) 2026 Chris Warren-Smith
7+
8+
#include "config.h"
9+
10+
#include <cstring>
11+
#include <cstdio>
12+
#include "robin-hood-hashing/src/include/robin_hood.h"
13+
#include "include/log.h"
14+
#include "include/var.h"
15+
#include "include/module.h"
16+
#include "include/param.h"
17+
18+
#define CLASS_IOTASK_ID 1
19+
20+
int g_nextId = 1;
21+
22+
FUNC_SIG lib_func[] = {
23+
};
24+
25+
FUNC_SIG lib_proc[] = {
26+
};
27+
28+
SBLIB_API int sblib_proc_count() {
29+
return 0;
30+
}
31+
32+
SBLIB_API int sblib_func_count() {
33+
return 0;
34+
}
35+
36+
//
37+
// Program startup
38+
//
39+
int sblib_init(const char *sourceFile) {
40+
return 1;
41+
}
42+
43+
#if defined(ANDROID_MODULE)
44+
//
45+
// Retrieves the _app->activity->clazz value sent from App/JNI to Java to IOIOLoader
46+
//
47+
extern "C" JNIEXPORT void JNICALL Java_ioio_smallbasic_android_ModuleLoader_init
48+
(JNIEnv *env, jclass clazz, jobject activity) {
49+
logEntered();
50+
jclass longClass = env->FindClass("java/lang/Long");
51+
jmethodID longValueMethod = env->GetMethodID(longClass, "longValue", "()J");
52+
g_activity = (jobject)env->CallLongMethod(activity, longValueMethod);
53+
g_env = env;
54+
}
55+
56+
#endif
57+
58+
//
59+
// Release ioio variables falling out of scope
60+
//
61+
SBLIB_API void sblib_free(int cls_id, int id) {
62+
if (id != -1) {
63+
switch (cls_id) {
64+
case CLASS_IOTASK_ID:
65+
break;
66+
}
67+
}
68+
}
69+
70+
//
71+
// Program termination
72+
//
73+
void sblib_close(void) {
74+
}

llama/test_main.cpp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#include <cstdio>
2+
#include <cstdlib>
3+
#include <string>
4+
#include <vector>
5+
6+
#define LLAMA_BUILD_EXAMPLES
7+
#include "llama.h"
8+
9+
int main() {
10+
const char *model_path = "model.gguf";
11+
12+
// --- Model load ---
13+
llama_model_params model_params = llama_model_default_params();
14+
llama_model *model = llama_model_load_from_file(model_path, model_params);
15+
if (!model) {
16+
fprintf(stderr, "failed to load model\n");
17+
return 1;
18+
}
19+
20+
const llama_vocab *vocab = llama_model_get_vocab(model);
21+
22+
// --- Context creation ---
23+
llama_context_params ctx_params = llama_context_default_params();
24+
llama_context *ctx = llama_init_from_model(model, ctx_params);
25+
if (!ctx) {
26+
fprintf(stderr, "failed to create context\n");
27+
llama_model_free(model);
28+
return 1;
29+
}
30+
31+
// --- Tokenize ---
32+
std::string prompt = "Hello, Llama!";
33+
34+
int n_tokens = llama_tokenize(
35+
vocab,
36+
prompt.c_str(),
37+
prompt.size(),
38+
nullptr,
39+
0,
40+
true, // add BOS
41+
false
42+
);
43+
44+
if (n_tokens <= 0) return 1;
45+
46+
std::vector<llama_token> tokens(n_tokens);
47+
48+
llama_tokenize(
49+
vocab,
50+
prompt.c_str(),
51+
prompt.size(),
52+
tokens.data(),
53+
n_tokens,
54+
true,
55+
false
56+
);
57+
58+
// --- Build batch for prompt ---
59+
llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
60+
61+
for (size_t i = 0; i < tokens.size(); i++) {
62+
batch.token[i] = tokens[i];
63+
batch.pos[i] = i;
64+
batch.seq_id[i] = 0;
65+
batch.logits[i] = false;
66+
batch.n_tokens++;
67+
}
68+
69+
if (llama_decode(ctx, batch) != 0) {
70+
fprintf(stderr, "decode failed\n");
71+
return 1;
72+
}
73+
74+
// --- Sampler ---
75+
llama_sampler *sampler = llama_sampler_init_greedy();
76+
77+
// --- Generation loop ---
78+
for (int i = 0; i < 100; i++) {
79+
llama_token tok = llama_sampler_sample(sampler, ctx, 0);
80+
81+
//std::string piece = llama_vocab_token_to_piece(vocab, tok);
82+
char piece_buf[512];
83+
int32_t piece_len = llama_token_to_piece(
84+
vocab,
85+
tok,
86+
piece_buf,
87+
sizeof(piece_buf),
88+
0, // lstrip
89+
true // print special tokens
90+
);
91+
92+
if (piece_len > 0) {
93+
printf("%s", piece_buf);
94+
}
95+
96+
// feed token back
97+
llama_batch next = llama_batch_init(1, 0, 1);
98+
next.token[0] = tok;
99+
next.pos[0] = 0;
100+
next.seq_id[0]= 0;
101+
next.logits[0]= false;
102+
next.n_tokens = 1;
103+
104+
if (llama_decode(ctx, next) != 0) break;
105+
}
106+
107+
printf("\n");
108+
109+
llama_sampler_free(sampler);
110+
llama_free(ctx);
111+
llama_model_free(model);
112+
}

0 commit comments

Comments
 (0)