Skip to content

Commit 9583757

Browse files
committed
[ml service] Support flexible tensors for compatibility with filters like llama.cpp
- Refactor tensor allocation logic to support flexible tensor - Enable compatibility with filters such as llama.cpp by allowing dynamic tensor management Signed-off-by: hyunil park <hyunil46.park@samsung.com>
1 parent b8c2944 commit 9583757

6 files changed

Lines changed: 227 additions & 25 deletions

File tree

c/include/nnstreamer-tizen-internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ typedef struct {
3737
char *models; /**< Comma separated neural network model files. */
3838
char *custom_option; /**< Custom option string for neural network framework. */
3939
char *fw_name; /**< The explicit framework name given by user */
40+
int invoke_dynamic; /**< True for supporting invoke with flexible output. */
41+
int invoke_async; /**< The sub-plugin must support asynchronous output to use this option. If set to TRUE, the sub-plugin can generate multiple outputs asynchronously per single input. Otherwise, only synchronous single-output is expected and async callback/handle are ignored. */
4042
} ml_single_preset;
4143

4244
/**

c/src/ml-api-common.c

Lines changed: 77 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,26 @@ _ml_tensors_data_clone_no_alloc (const ml_tensors_data_s * data_src,
832832
return ML_ERROR_NONE;
833833
}
834834

835+
/**
836+
* @brief Allocates zero-initialized memory of the given size for the tensor at the specified index
837+
* in the tensor data structure, and sets the size value for that tensor.
838+
*/
839+
static int
840+
_ml_tensor_data_alloc (ml_tensors_data_s * data, int index, const size_t size)
841+
{
842+
if (!data || index < 0)
843+
_ml_error_report_return (ML_ERROR_INVALID_PARAMETER,
844+
"Invalid parameter: data pointer is null or index is out of range.");
845+
846+
data->tensors[index].size = size;
847+
data->tensors[index].data = g_malloc0 (size);
848+
if (data->tensors[index].data == NULL)
849+
_ml_error_report_return (ML_ERROR_OUT_OF_MEMORY,
850+
"Failed to allocate memory for tensor data.");
851+
852+
return ML_ERROR_NONE;
853+
}
854+
835855
/**
836856
* @brief Copies the tensor data frame.
837857
*/
@@ -840,7 +860,8 @@ ml_tensors_data_clone (const ml_tensors_data_h in, ml_tensors_data_h * out)
840860
{
841861
int status;
842862
unsigned int i;
843-
ml_tensors_data_s *_in, *_out;
863+
ml_tensors_data_s *_in, *_out = NULL;
864+
ml_tensors_info_s *_info = NULL;
844865

845866
check_feature_state (ML_FEATURE);
846867

@@ -862,12 +883,25 @@ ml_tensors_data_clone (const ml_tensors_data_h in, ml_tensors_data_h * out)
862883
}
863884

864885
_out = (ml_tensors_data_s *) (*out);
886+
_info = (ml_tensors_info_s *) _in->info;
887+
888+
if (_info->info.format == _NNS_TENSOR_FORMAT_FLEXIBLE) {
889+
for (i = 0; i < _in->num_tensors; i++) {
890+
status = _ml_tensor_data_alloc (_out, i, _in->tensors[i].size);
891+
if (status != ML_ERROR_NONE) {
892+
goto error;
893+
}
894+
}
895+
}
865896

866897
for (i = 0; i < _out->num_tensors; ++i) {
867898
memcpy (_out->tensors[i].data, _in->tensors[i].data, _in->tensors[i].size);
868899
}
869900

870901
error:
902+
if (status != ML_ERROR_NONE)
903+
_ml_tensors_data_destroy_internal (_out, TRUE);
904+
871905
G_UNLOCK_UNLESS_NOLOCK (*_in);
872906
return status;
873907
}
@@ -914,6 +948,7 @@ int
914948
ml_tensors_data_create (const ml_tensors_info_h info, ml_tensors_data_h * data)
915949
{
916950
gint status = ML_ERROR_STREAMS_PIPE;
951+
ml_tensors_info_s *_info = NULL;
917952
ml_tensors_data_s *_data = NULL;
918953
guint i;
919954
bool valid;
@@ -944,21 +979,29 @@ ml_tensors_data_create (const ml_tensors_info_h info, ml_tensors_data_h * data)
944979
status);
945980
}
946981

982+
_info = (ml_tensors_info_s *) info;
983+
if (_info->info.format == _NNS_TENSOR_FORMAT_FLEXIBLE) {
984+
_ml_logw
985+
("[ml_tensors_data_create] format is FLEXIBLE, skipping tensor memory allocation. "
986+
"Use ml_tensors_data_set_tensor_data() to update data buffer.");
987+
*data = _data;
988+
return ML_ERROR_NONE;
989+
}
990+
947991
for (i = 0; i < _data->num_tensors; i++) {
948-
_data->tensors[i].data = g_malloc0 (_data->tensors[i].size);
949-
if (_data->tensors[i].data == NULL) {
950-
goto failed_oom;
951-
}
992+
status = _ml_tensor_data_alloc (_data, i, _data->tensors[i].size);
993+
if (status != ML_ERROR_NONE)
994+
goto error;
952995
}
953996

954997
*data = _data;
955998
return ML_ERROR_NONE;
956999

957-
failed_oom:
958-
_ml_tensors_data_destroy_internal (_data, TRUE);
1000+
error:
1001+
if (status != ML_ERROR_NONE)
1002+
_ml_tensors_data_destroy_internal (_data, TRUE);
9591003

960-
_ml_error_report_return (ML_ERROR_OUT_OF_MEMORY,
961-
"Failed to allocate memory blocks for tensors data. Check if it's out-of-memory.");
1004+
return status;
9621005
}
9631006

9641007
/**
@@ -1009,6 +1052,8 @@ int
10091052
ml_tensors_data_set_tensor_data (ml_tensors_data_h data, unsigned int index,
10101053
const void *raw_data, const size_t data_size)
10111054
{
1055+
1056+
ml_tensors_info_s *_info = NULL;
10121057
ml_tensors_data_s *_data;
10131058
int status = ML_ERROR_NONE;
10141059

@@ -1033,6 +1078,29 @@ ml_tensors_data_set_tensor_data (ml_tensors_data_h data, unsigned int index,
10331078
goto report;
10341079
}
10351080

1081+
/**
1082+
* By default, the tensor format is _NNS_TENSOR_FORMAT_STATIC.
1083+
* In this case, memory allocation and the setting of _data->tensors[index].size
1084+
* are already handled in ml_tensors_data_create().
1085+
* So for the STATIC format, both the `size` and `data` pointer should already be valid here.
1086+
*
1087+
* For FLEXIBLE format, memory may not be allocated yet and will be handled here.
1088+
*/
1089+
_info = (ml_tensors_info_s *) _data->info;
1090+
if (_info && _info->info.format == _NNS_TENSOR_FORMAT_FLEXIBLE) {
1091+
if (_data->tensors[index].data != NULL) {
1092+
g_free (_data->tensors[index].data);
1093+
_data->tensors[index].data = NULL;
1094+
_data->tensors[index].size = 0;
1095+
}
1096+
_ml_logw
1097+
("Memory allocation was not performed in ml_tensor_data_create() when tensor format is _NNS_TENSOR_FORMAT_FLEXIBLE.");
1098+
status = _ml_tensor_data_alloc (_data, index, data_size);
1099+
if (status != ML_ERROR_NONE) {
1100+
goto report;
1101+
}
1102+
}
1103+
10361104
if (data_size <= 0 || _data->tensors[index].size < data_size) {
10371105
_ml_error_report
10381106
("The parameter, data_size (%zu), is invalid. It should be larger than 0 and not larger than the required size of tensors[index: %u] (%zu).",

c/src/ml-api-inference-single.c

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,9 @@ typedef struct
139139
gboolean invoking; /**< invoke running flag */
140140
ml_tensors_data_h in_tensors; /**< input tensor wrapper for processing */
141141
ml_tensors_data_h out_tensors; /**< output tensor wrapper for processing */
142-
gboolean is_flexible; /**< true if tensor filter handles flexible input/output */
143142

144143
GList *destroy_data_list; /**< data to be freed by filter */
144+
tensor_format format; /**< current format */
145145
} ml_single;
146146

147147
/**
@@ -781,11 +781,10 @@ ml_single_set_info_in_handle (ml_single_h single, gboolean is_input,
781781
ml_tensors_info_h info = NULL;
782782

783783
ml_single_get_gst_info (single_h, is_input, &gst_info);
784-
if (single_h->is_flexible) {
785-
gst_info.format = _NNS_TENSOR_FORMAT_FLEXIBLE;
786-
gst_info.num_tensors = 1U; /* TODO: Consider multiple input tensors filter */
784+
if (single_h->format == _NNS_TENSOR_FORMAT_FLEXIBLE) {
785+
gst_info.format = single_h->format;
786+
gst_info.num_tensors = 1U; /* TODO: Consider multiple input tensors filter */
787787
}
788-
789788
_ml_tensors_info_create_from_gst (&info, &gst_info);
790789

791790
gst_tensors_info_free (&gst_info);
@@ -854,7 +853,6 @@ ml_single_create_handle (ml_nnfw_type_e nnfw)
854853
single_h->output = NULL;
855854
single_h->destroy_data_list = NULL;
856855
single_h->invoking = FALSE;
857-
single_h->is_flexible = FALSE;
858856

859857
gst_tensors_info_init (&single_h->in_info);
860858
gst_tensors_info_init (&single_h->out_info);
@@ -955,7 +953,6 @@ ml_single_open_custom (ml_single_h * single, ml_single_preset * info)
955953
gchar **list_models;
956954
guint i, num_models;
957955
char *hw_name;
958-
gboolean invoke_dynamic = FALSE;
959956

960957
check_feature_state (ML_FEATURE_INFERENCE);
961958

@@ -1078,10 +1075,14 @@ ml_single_open_custom (ml_single_h * single, ml_single_preset * info)
10781075
fw_name = _ml_get_nnfw_subplugin_name (nnfw); /* retry for "auto" */
10791076
}
10801077
hw_name = _ml_nnfw_to_str_prop (hw);
1078+
10811079
g_object_set (filter_obj, "framework", fw_name, "accelerator", hw_name,
1082-
"model", converted_models, NULL);
1080+
"model", converted_models, "invoke-dynamic", info->invoke_dynamic, NULL);
10831081
g_free (hw_name);
10841082

1083+
if (info->invoke_dynamic)
1084+
single_h->format = _NNS_TENSOR_FORMAT_FLEXIBLE;
1085+
10851086
if (info->custom_option) {
10861087
g_object_set (filter_obj, "custom", info->custom_option, NULL);
10871088
}
@@ -1105,11 +1106,6 @@ ml_single_open_custom (ml_single_h * single, ml_single_preset * info)
11051106
*
11061107
*/
11071108

1108-
if (invoke_dynamic) {
1109-
single_h->is_flexible = TRUE;
1110-
g_object_set (filter_obj, "invoke-dynamic", TRUE, NULL);
1111-
}
1112-
11131109
if (nnfw == ML_NNFW_TYPE_NNTR_INF) {
11141110
if (!in_tensors_info || !out_tensors_info) {
11151111
if (!in_tensors_info) {
@@ -1230,6 +1226,14 @@ ml_single_open_with_option (ml_single_h * single, const ml_option_h option)
12301226
if (ML_ERROR_NONE == ml_option_get (option, "framework_name", &value) ||
12311227
ML_ERROR_NONE == ml_option_get (option, "framework", &value))
12321228
info.fw_name = (gchar *) value;
1229+
if (ML_ERROR_NONE == ml_option_get (option, "invoke_dynamic", &value)) {
1230+
if (strcasecmp ((gchar *) value, "TRUE") == 0)
1231+
info.invoke_dynamic = TRUE;
1232+
}
1233+
if (ML_ERROR_NONE == ml_option_get (option, "invoke_async", &value)) {
1234+
if (strcasecmp ((gchar *) value, "TRUE") == 0)
1235+
info.invoke_async = TRUE;
1236+
}
12331237

12341238
return ml_single_open_custom (single, &info);
12351239
}
@@ -1345,10 +1349,8 @@ _ml_single_invoke_validate_data (ml_single_h single,
13451349
"The %d-th input tensor is not valid. There is no valid dimension metadata for this tensor.",
13461350
i);
13471351

1348-
if (single_h->is_flexible) {
1349-
/* Skip data size check for flexible */
1352+
if (single_h->format == _NNS_TENSOR_FORMAT_FLEXIBLE)
13501353
continue;
1351-
}
13521354

13531355
raw_size = _model->tensors[i].size;
13541356
if (G_UNLIKELY (_data->tensors[i].size != raw_size))

c/src/ml-api-service-extension.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,23 @@ _ml_extension_conf_parse_single (ml_service_s * mls, JsonObject * single)
339339
ml_option_set (option, "custom", g_strdup (custom), g_free);
340340
}
341341

342+
if (json_object_has_member (single, "invoke_dynamic")) {
343+
const gchar *invoke_dynamic =
344+
json_object_get_string_member (single, "invoke_dynamic");
345+
346+
if (STR_IS_VALID (invoke_dynamic))
347+
ml_option_set (option, "invoke_dynamic", g_strdup (invoke_dynamic),
348+
g_free);
349+
}
350+
351+
if (json_object_has_member (single, "invoke_async")) {
352+
const gchar *invoke_async =
353+
json_object_get_string_member (single, "invoke_async");
354+
355+
if (STR_IS_VALID (invoke_async))
356+
ml_option_set (option, "invoke_async", g_strdup (invoke_async), g_free);
357+
}
358+
342359
error:
343360
if (status == ML_ERROR_NONE)
344361
status = ml_single_open_with_option (&ext->single, option);

tests/capi/unittest_capi_service_extension.cc

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,109 @@ _extension_test_imgclf (ml_service_h handle, gboolean is_pipeline)
390390
_free_test_data (tdata);
391391
}
392392

393+
/**
394+
* @brief Callback function for scenario test.
395+
*/
396+
static void
397+
_extension_test_llamacpp_cb (
398+
ml_service_event_e event, ml_information_h event_data, void *user_data)
399+
{
400+
extension_test_data_s *tdata = (extension_test_data_s *) user_data;
401+
ml_tensors_data_h data = NULL;
402+
void *_raw = NULL;
403+
size_t _size = 0;
404+
int status;
405+
406+
switch (event) {
407+
case ML_SERVICE_EVENT_NEW_DATA:
408+
ASSERT_TRUE (event_data != NULL);
409+
410+
status = ml_information_get (event_data, "data", &data);
411+
EXPECT_EQ (status, ML_ERROR_NONE);
412+
413+
status = ml_tensors_data_get_tensor_data (data, 0U, &_raw, &_size);
414+
EXPECT_EQ (status, ML_ERROR_NONE);
415+
416+
g_print ("%s", (char *) _raw);
417+
418+
if (tdata)
419+
tdata->received++;
420+
break;
421+
default:
422+
break;
423+
}
424+
}
425+
426+
/**
427+
* @brief Internal function to run test with ml-service extension handle.
428+
*/
429+
static inline void
430+
_extension_test_llamacpp (ml_service_h handle, gboolean is_pipeline)
431+
{
432+
extension_test_data_s *tdata;
433+
ml_tensors_info_h info;
434+
ml_tensors_data_h input;
435+
int status;
436+
437+
const gchar input_text[] = "Hello my name is";
438+
439+
tdata = _create_test_data (is_pipeline);
440+
ASSERT_TRUE (tdata != NULL);
441+
442+
status = ml_service_set_event_cb (handle, _extension_test_llamacpp_cb, tdata);
443+
EXPECT_EQ (status, ML_ERROR_NONE);
444+
445+
/* Create and push input data. */
446+
status = ml_service_get_input_information (handle, NULL, &info);
447+
EXPECT_EQ (status, ML_ERROR_NONE);
448+
449+
ml_tensors_data_create (info, &input);
450+
451+
ml_tensors_data_set_tensor_data (input, 0U, input_text, strlen (input_text));
452+
453+
status = ml_service_request (handle, NULL, input);
454+
EXPECT_EQ (status, ML_ERROR_NONE);
455+
456+
g_usleep (5000000U);
457+
EXPECT_GT (tdata->received, 0);
458+
459+
/* Clear callback before releasing tdata. */
460+
status = ml_service_set_event_cb (handle, NULL, NULL);
461+
EXPECT_EQ (status, ML_ERROR_NONE);
462+
463+
ml_tensors_info_destroy (info);
464+
ml_tensors_data_destroy (input);
465+
466+
_free_test_data (tdata);
467+
}
468+
469+
/**
470+
* @brief Usage of ml-service extension API.
471+
*/
472+
TEST (MLServiceExtension, scenarioConfigLlamacpp)
473+
{
474+
ml_service_h handle;
475+
int status;
476+
477+
g_autofree gchar *model_file = _get_model_path ("llama-2-7b-chat.Q2_K.gguf");
478+
if (!g_file_test (model_file, G_FILE_TEST_EXISTS)) {
479+
g_critical ("Skipping scenarioConfigLlamacpp test due to missing model file. "
480+
"Please download model file from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF");
481+
return;
482+
}
483+
484+
g_autofree gchar *config = get_config_path ("config_single_llamacpp.conf");
485+
486+
status = ml_service_new (config, &handle);
487+
ASSERT_EQ (status, ML_ERROR_NONE);
488+
489+
_extension_test_llamacpp (handle, FALSE);
490+
491+
status = ml_service_destroy (handle);
492+
EXPECT_EQ (status, ML_ERROR_NONE);
493+
}
494+
495+
393496
/**
394497
* @brief Usage of ml-service extension API.
395498
*/
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"single" :
3+
{
4+
"framework" : "llamacpp",
5+
"model" : ["../tests/test_models/models/llama-2-7b-chat.Q2_K.gguf"],
6+
"custom" : "num_predict:32",
7+
"invoke_dynamic" : "true",
8+
"invoke_async" : "false"
9+
}
10+
}

0 commit comments

Comments
 (0)