Skip to content

Commit 55bb81c

Browse files
committed
Libyang erroneously interprets patterns with multiple Unicode blocks: libyang maps every Unicode block to
Latin-1 Supplement Unicode block, if multiple Unicode blocks are present within a single [ ]. This commit corrects the fault and introduces multiple unit tests related to the correct behavior. On branch unicode_block_bug Changes to be committed: modified: src/ly_common.c modified: tests/utests/types/string.c
1 parent f302d86 commit 55bb81c

2 files changed

Lines changed: 52 additions & 3 deletions

File tree

src/ly_common.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
* @brief common internal definitions for libyang
55
*
66
* Copyright (c) 2018 - 2026 CESNET, z.s.p.o.
7+
* Copyright (c) 2026 Nokia
78
*
89
* This source code is licensed under BSD 3-Clause License (the "License").
910
* You may not use this file except in compliance with the License.
@@ -813,7 +814,7 @@ ly_pat_compile_xmlschema_chblocks_xmlschema2perl(const char *pattern, char **reg
813814
{NULL, NULL}
814815
};
815816

816-
size_t idx, idx2, start, end;
817+
size_t idx, idx2, start, end, ublock;
817818
char *perl_regex, *ptr;
818819

819820
perl_regex = *regex;
@@ -849,6 +850,7 @@ ly_pat_compile_xmlschema_chblocks_xmlschema2perl(const char *pattern, char **reg
849850
return ly_err_new(err, LY_EVALID, 0, NULL, NULL, "Regular expression \"%s\" is not valid (\"%s\": %s).",
850851
pattern, perl_regex + start + 5, "unknown block name");
851852
}
853+
ublock = idx;
852854

853855
/* make the space in the string and replace the block (but we cannot include brackets if it was already enclosed in them) */
854856
for (idx2 = 0, idx = 0; idx2 < start; ++idx2) {
@@ -863,10 +865,10 @@ ly_pat_compile_xmlschema_chblocks_xmlschema2perl(const char *pattern, char **reg
863865
if (idx) {
864866
/* skip brackets */
865867
memmove(perl_regex + start + (URANGE_LEN - 2), perl_regex + end, strlen(perl_regex + end) + 1);
866-
memcpy(perl_regex + start, ublock2urange[idx][1] + 1, URANGE_LEN - 2);
868+
memcpy(perl_regex + start, ublock2urange[ublock][1] + 1, URANGE_LEN - 2);
867869
} else {
868870
memmove(perl_regex + start + URANGE_LEN, perl_regex + end, strlen(perl_regex + end) + 1);
869-
memcpy(perl_regex + start, ublock2urange[idx][1], URANGE_LEN);
871+
memcpy(perl_regex + start, ublock2urange[ublock][1], URANGE_LEN);
870872
}
871873
}
872874

tests/utests/types/string.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
* @brief test for string values
55
*
66
* Copyright (c) 2021 CESNET, z.s.p.o.
7+
* Copyright (c) 2026 Nokia
78
*
89
* This source code is licensed under BSD 3-Clause License (the "License").
910
* You may not use this file except in compliance with the License.
@@ -817,6 +818,52 @@ test_data_xml(void **state)
817818
CHECK_LOG_CTX("Unsatisfied pattern - \"abc\" does not match \"a.*b\".", "/T_ANCHOR:port", 1);
818819
TEST_ERROR_XML("T_ANCHOR", "cab");
819820
CHECK_LOG_CTX("Unsatisfied pattern - \"cab\" does not match \"a.*b\".", "/T_ANCHOR:port", 1);
821+
822+
/* Unicode block test 1 - Basic Latin */
823+
schema = MODULE_CREATE_YANG("T_UB_1", "leaf port {type string { pattern '\\p{IsBasicLatin}+';} } ");
824+
UTEST_ADD_MODULE(schema, LYS_IN_YANG, NULL, NULL);
825+
TEST_SUCCESS_XML("T_UB_1", "B4s1cLatin!", STRING, "B4s1cLatin!");
826+
827+
/* Unicode block test 2 - Basic Latin within brackets */
828+
schema = MODULE_CREATE_YANG("T_UB_2", "leaf port {type string { pattern '[\\p{IsBasicLatin}]+';} } ");
829+
UTEST_ADD_MODULE(schema, LYS_IN_YANG, NULL, NULL);
830+
TEST_SUCCESS_XML("T_UB_2", "B4s1cLatin!", STRING, "B4s1cLatin!");
831+
832+
/* Unicode block test 3 - Latin-1 Supplement */
833+
schema = MODULE_CREATE_YANG("T_UB_3", "leaf port {type string { pattern '[\\p{IsLatin-1Supplement}]+';} } ");
834+
UTEST_ADD_MODULE(schema, LYS_IN_YANG, NULL, NULL);
835+
TEST_SUCCESS_XML("T_UB_3", "ÁÉÍÓÖÜ", STRING, "ÁÉÍÓÖÜ");
836+
837+
/* Unicode block test 4 - Latin-1 Supplement */
838+
schema = MODULE_CREATE_YANG("T_UB_4", "leaf port {type string { pattern '[\\p{IsLatin-1Supplement}]+';} } ");
839+
UTEST_ADD_MODULE(schema, LYS_IN_YANG, NULL, NULL);
840+
TEST_SUCCESS_XML("T_UB_4", "ÁÉÍÓÖÜ", STRING, "ÁÉÍÓÖÜ");
841+
842+
/* Unicode block test 5 - Latin Extended-A */
843+
schema = MODULE_CREATE_YANG("T_UB_5", "leaf port {type string { pattern '[\\p{IsLatinExtended-A}]+';} } ");
844+
UTEST_ADD_MODULE(schema, LYS_IN_YANG, NULL, NULL);
845+
TEST_SUCCESS_XML("T_UB_5", "ŐŰőű", STRING, "ŐŰőű");
846+
847+
/* Unicode block test 6 - Basic Latin, Latin-1 Supplement, and Latin Extended-A */
848+
schema = MODULE_CREATE_YANG("T_UB_6", "leaf port {type string {"
849+
" pattern '[\\p{IsBasicLatin}\\p{IsLatin-1Supplement}\\p{IsLatinExtended-A}]+';"
850+
"}} ");
851+
UTEST_ADD_MODULE(schema, LYS_IN_YANG, NULL, NULL);
852+
TEST_SUCCESS_XML("T_UB_6", "Árvíztűrő tükörfúrógép!", STRING, "Árvíztűrő tükörfúrógép!");
853+
854+
/* Unicode block test 7 - Unknown Unicode block */
855+
schema = MODULE_CREATE_YANG("T_UB_7", "leaf port {type string { pattern '\\p{IsUnknownUnicodeBlock}+';} } ");
856+
UTEST_INVALID_MODULE(schema, LYS_IN_YANG, NULL, LY_EVALID);
857+
CHECK_LOG_CTX("Regular expression \"\\p{IsUnknownUnicodeBlock}+\" "
858+
"is not valid (\"UnknownUnicodeBlock}+\": unknown block name).", "/T_UB_7:port", 0);
859+
860+
/* Unicode block test 8 - Unknown Unicode block with Basic Latin */
861+
schema = MODULE_CREATE_YANG("T_UB_8", "leaf port {type string { "
862+
" pattern '[\\p{IsBasicLatin}\\p{IsUnknownUnicodeBlock}]+';"
863+
"}} ");
864+
UTEST_INVALID_MODULE(schema, LYS_IN_YANG, NULL, LY_EVALID);
865+
CHECK_LOG_CTX("Regular expression \"[\\p{IsBasicLatin}\\p{IsUnknownUnicodeBlock}]+\" "
866+
"is not valid (\"UnknownUnicodeBlock}]+\": unknown block name).", "/T_UB_8:port", 0);
820867
}
821868

822869
static void

0 commit comments

Comments
 (0)