gnucash stable: Multiple changes pushed

John Ralls jralls at code.gnucash.org
Thu Jun 12 20:15:07 EDT 2025


Updated	 via  https://github.com/Gnucash/gnucash/commit/331fbf3d (commit)
	 via  https://github.com/Gnucash/gnucash/commit/94c343fb (commit)
	 via  https://github.com/Gnucash/gnucash/commit/d7624ab7 (commit)
	from  https://github.com/Gnucash/gnucash/commit/acfc8e6e (commit)



commit 331fbf3dec561b3973d4ffce60f36959feeb9dc1
Merge: acfc8e6ec9 94c343fb5f
Author: John Ralls <jralls at ceridwen.us>
Date:   Thu Jun 12 16:52:55 2025 -0700

    Merge John Ralls's branch 'bug799521' into stable


commit 94c343fb5f3f753b151bf76dc59584466e5390f2
Author: John Ralls <jralls at ceridwen.us>
Date:   Sat May 31 14:52:54 2025 -0700

    Bug 799521 - Segmentation fault on Autocomplete of Description with ß
    
    Replace GLib UTF-8 normalize and casefold comparison with
    gnc_unicode_has_substring_base_chars and use
    gnc_unicode_compare_base_chars for the exact match
    comparison. gnc_unicode_has_substring_base_chars is able to capture the
    correct length of the match in source string and so avoids the buffer
    overrun when applying the casefolded substring to the source
    string. Both functions compare the strings for equivalence correctly
    for the current locale, which casefolded comparison didn't do well.

diff --git a/gnucash/register/register-gnome/completioncell-gnome.c b/gnucash/register/register-gnome/completioncell-gnome.c
index a1cbffbb2c..9e636e924f 100644
--- a/gnucash/register/register-gnome/completioncell-gnome.c
+++ b/gnucash/register/register-gnome/completioncell-gnome.c
@@ -33,6 +33,7 @@
 #include <config.h>
 
 #include <string.h>
+#include <stdbool.h>
 #include <gdk/gdkkeysyms.h>
 
 #include "completioncell.h"
@@ -43,6 +44,7 @@
 #include "gnucash-sheetP.h"
 #include "table-allgui.h"
 #include "gnc-glib-utils.h"
+#include <gnc-unicode.h>
 
 typedef struct _PopBox
 {
@@ -63,7 +65,6 @@ typedef struct _PopBox
 
     gboolean      sort_enabled; // sort of list store enabled
     gboolean      register_is_reversed; // whether the register is reversed
-    gboolean      stop_searching; // set when there are no results
 
     gboolean      strict; // text entry must be in the list
     gboolean      in_list_select; // item selected in the list
@@ -129,8 +130,6 @@ gnc_completion_cell_init (CompletionCell* cell)
 
     cell->cell.gui_private = box;
 
-    box->stop_searching = FALSE;
-
     box->strict = FALSE;
     box->in_list_select = FALSE;
     box->occurrence = 0;
@@ -423,7 +422,6 @@ item_store_clear (CompletionCell* cell)
     if (box->sort_enabled) // if sorting, disable it
         set_sort_column_enabled (box, FALSE);
 
-    box->stop_searching = FALSE;
     gtk_list_store_clear (box->item_store);
 
     if (box->sort_enabled) // if sorting, enable it
@@ -511,18 +509,6 @@ list_store_append (GtkListStore *store, char* string,
                                       FOUND_LOCATION_COL, found_location, -1);
 }
 
-static char*
-normalize_and_fold (char* utf8_string)
-{
-    char *normalized = g_utf8_normalize (utf8_string, -1, G_NORMALIZE_NFC);
-    if (!normalized)
-        return NULL;
-
-    char *folded = g_utf8_casefold (normalized, -1);
-    g_free (normalized);
-    return folded;
-}
-
 static gint
 test_and_add (PopBox* box, const gchar *text, gint start_pos,
               gpointer key, gint occurrence_difference)
@@ -530,19 +516,15 @@ test_and_add (PopBox* box, const gchar *text, gint start_pos,
     gint ret_value = -1;
     gint text_length = g_utf8_strlen (text, -1);
 
-    if (start_pos > text_length)
+    if (start_pos >= text_length)
        return ret_value;
 
     gchar *sub_text = g_utf8_substring (text, start_pos, text_length);
-    gchar *sub_text_norm_fold = normalize_and_fold (sub_text);
-    gchar *found_text_ptr = g_strstr_len (sub_text_norm_fold, -1, box->newval);
-
-    if (found_text_ptr)
+    int pos = 0, len = 0;
+    if (gnc_unicode_has_substring_base_chars (box->newval, sub_text, &pos, &len))
     {
         gchar *markup = NULL, *prefix = NULL, *match = NULL, *suffix = NULL;
-        glong newval_length = g_utf8_strlen (box->newval, -1);
-        gulong found_location = g_utf8_pointer_to_offset (sub_text_norm_fold,
-                                                          found_text_ptr) + start_pos;
+        gint found_location = start_pos + pos;
         gboolean have_boundary = FALSE;
         gint prefix_length;
         gint weight;
@@ -554,18 +536,18 @@ test_and_add (PopBox* box, const gchar *text, gint start_pos,
 
         prefix_length = g_utf8_strlen (prefix, -1);
 
-        match = g_utf8_substring (text, found_location, found_location + newval_length);
+        match = g_utf8_substring (text, found_location, found_location + len);
 
-        if (found_location - start_pos >= 1)
+        if (pos >= 1)
         {
-            gunichar prev = g_utf8_get_char (g_utf8_offset_to_pointer (sub_text, found_location - start_pos - 1));
+            gunichar prev = g_utf8_get_char (g_utf8_offset_to_pointer (sub_text, pos - 1));
             if (prev && (g_unichar_isspace (prev) || g_unichar_ispunct (prev)))
                 have_boundary = TRUE;
             else
                 ret_value = found_location + 1;
         }
 
-        suffix = g_utf8_substring (text, found_location + newval_length, text_length);
+        suffix = g_utf8_substring (text, found_location + len, text_length);
 
         markup = g_markup_printf_escaped ("%s<b>%s</b>%s%s", prefix, match, suffix, " ");
 
@@ -573,7 +555,7 @@ test_and_add (PopBox* box, const gchar *text, gint start_pos,
         {
             weight = occurrence_difference; // sorted by recent first
 
-            if (g_strcmp0 (sub_text_norm_fold, box->newval) == 0) // exact match
+            if (gnc_unicode_compare_base_chars (sub_text, box->newval) == 0) // exact match
                 weight = 1;
 
             list_store_append (box->item_store, key, markup, weight, found_location);
@@ -583,7 +565,6 @@ test_and_add (PopBox* box, const gchar *text, gint start_pos,
         g_free (match);
         g_free (suffix);
     }
-    g_free (sub_text_norm_fold);
     g_free (sub_text);
     return ret_value;
 }
@@ -639,18 +620,15 @@ select_first_entry_in_list (PopBox* box)
 }
 
 static void
-populate_list_store (CompletionCell* cell, const gchar* str)
+populate_list_store (CompletionCell* cell, gchar* str)
 {
     PopBox* box = cell->cell.gui_private;
 
     box->in_list_select = FALSE;
     box->item_edit->popup_allocation_height = -1;
 
-    if (box->stop_searching)
-        return;
-
     if (str && *str)
-        box->newval = normalize_and_fold ((gchar*)str);
+        box->newval = g_strdup(str);
     else
         return;
 
@@ -689,7 +667,6 @@ populate_list_store (CompletionCell* cell, const gchar* str)
     // if no entries, do not show popup
     if (gtk_tree_model_iter_n_children (GTK_TREE_MODEL(box->item_store), NULL) == 1)
     {
-        box->stop_searching = TRUE;
         hide_popup (box);
     }
     else
@@ -714,7 +691,6 @@ gnc_completion_cell_modify_verify (BasicCell* bcell,
 {
     CompletionCell* cell = (CompletionCell*) bcell;
     PopBox* box = cell->cell.gui_private;
-    glong newval_chars = g_utf8_strlen (newval, newval_len);
 
     if (box->in_list_select)
     {
@@ -727,14 +703,6 @@ gnc_completion_cell_modify_verify (BasicCell* bcell,
         return;
     }
 
-    // check to enable searching
-    if (((*cursor_position < newval_chars) &&
-         (g_utf8_strlen (bcell->value, -1) < newval_chars)) ||
-         (g_utf8_strlen (bcell->value, -1) > newval_chars))
-    {
-         box->stop_searching = FALSE;
-    }
-
     // Are were deleting or inserting in the middle.
     if (change == NULL || *cursor_position < bcell->value_chars)
         *start_selection = *end_selection = *cursor_position;

commit d7624ab75cb5f1b7de2e7449a12513b22aeaa921
Author: John Ralls <jralls at ceridwen.us>
Date:   Sat May 31 13:32:09 2025 -0700

    [Core Utils] Add ICU-based substring search.
    
    See
    https://unicode-org.github.io/icu/userguide/collation/string-search.html
    for a discussion of ICU's string search capabilities. This new file
    implements finding substrings and string comparisons at all but the
    quaternary strength, that being of somewhat rarefied utility.

diff --git a/CMakeLists.txt b/CMakeLists.txt
index afbc837828..9a5aaf4e6f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -562,7 +562,7 @@ get_filename_component(PERL_DIR ${PERL_EXECUTABLE} DIRECTORY)
 find_program(POD2MAN_EXECUTABLE pod2man HINTS ${PERL_DIR})
 
 #ICU
-find_package(ICU REQUIRED COMPONENTS uc i18n)
+find_package(ICU 54.0 REQUIRED COMPONENTS uc i18n)
 
 pkg_check_modules (LIBSECRET libsecret-1>=0.18)
 IF (LIBSECRET_FOUND)
diff --git a/libgnucash/core-utils/CMakeLists.txt b/libgnucash/core-utils/CMakeLists.txt
index fc9e402abe..4cd6d972cc 100644
--- a/libgnucash/core-utils/CMakeLists.txt
+++ b/libgnucash/core-utils/CMakeLists.txt
@@ -11,6 +11,7 @@ set (core_utils_SOURCES
   gnc-filepath-utils.cpp
   gnc-gkeyfile-utils.c
   gnc-glib-utils.c
+  gnc-unicode.cpp
   gnc-locale-utils.c
   gnc-locale-utils.cpp
   gnc-path.c
@@ -28,6 +29,7 @@ set(core_utils_noinst_HEADERS
   gnc-filepath-utils.h
   gnc-gkeyfile-utils.h
   gnc-glib-utils.h
+  gnc-unicode.h
   gnc-locale-utils.h
   gnc-locale-utils.hpp
   gnc-path.h
@@ -54,6 +56,7 @@ target_link_libraries(gnc-core-utils
         ${Boost_LIBRARIES}
         ${GOBJECT_LDFLAGS}
         ${GTK_MAC_LDFLAGS}
+        ${ICU_LIBRARIES}
         "$<$<BOOL:${MAC_INTEGRATION}>:${OSX_EXTRA_LIBRARIES}>")
 
 target_compile_definitions(gnc-core-utils
diff --git a/libgnucash/core-utils/gnc-unicode.cpp b/libgnucash/core-utils/gnc-unicode.cpp
new file mode 100644
index 0000000000..79f2bef849
--- /dev/null
+++ b/libgnucash/core-utils/gnc-unicode.cpp
@@ -0,0 +1,220 @@
+/********************************************************************
+ * gnc-icu-locale.cpp -- Localization with ICU.                        *
+ *                                                                  *
+ * Copyright (C) 2025 John Ralls <jralls at ceridwen.us                *
+ *                                                                  *
+ * This program is free software; you can redistribute it and/or    *
+ * modify it under the terms of the GNU General Public License as   *
+ * published by the Free Software Foundation; either version 2 of   *
+ * the License, or (at your option) any later version.              *
+ *                                                                  *
+ * This program is distributed in the hope that it will be useful,  *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of   *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    *
+ * GNU General Public License for more details.                     *
+ *                                                                  *
+ * You should have received a copy of the GNU General Public License*
+ * along with this program; if not, contact:                        *
+ *                                                                  *
+ * Free Software Foundation           Voice:  +1-617-542-5942       *
+ * 51 Franklin Street, Fifth Floor    Fax:    +1-617-542-2652       *
+ * Boston, MA  02110-1301,  USA       gnu at gnu.org                   *
+ ********************************************************************/
+
+#include "gnc-unicode.h"
+
+#include <memory>
+#include <unicode/stsearch.h>
+#include <unicode/tblcoll.h>
+#include <unicode/coll.h>
+#include "gnc-locale-utils.h"
+#include <glib-2.0/glib.h>
+
+constexpr const char *logdomain{"gnc.locale"};
+
+enum class CompareStrength {
+  PRIMARY,
+  SECONDARY,
+  TERTIARY,
+  QUATERNARY,
+  IDENTICAL
+};
+
+static void
+collator_set_strength(icu::Collator* collator, CompareStrength strength)
+{
+    switch (strength)
+    {
+        case CompareStrength::PRIMARY:
+            collator->setStrength(icu::Collator::PRIMARY);
+            break;
+        case CompareStrength::SECONDARY:
+            collator->setStrength(icu::Collator::SECONDARY);
+            break;
+        case CompareStrength::TERTIARY:
+            collator->setStrength(icu::Collator::TERTIARY);
+            break;
+        case CompareStrength::QUATERNARY:
+            collator->setStrength(icu::Collator::QUATERNARY);
+            break;
+        case CompareStrength::IDENTICAL:
+            collator->setStrength(icu::Collator::IDENTICAL);
+            break;
+    }
+}
+
+static bool
+unicode_has_substring_internal(const char* needle, const char* haystack,
+                       int* position, int* length,
+                       CompareStrength strength)
+{
+    UErrorCode status{U_ZERO_ERROR};
+    auto locale{gnc_locale_name()};
+    auto u_needle{icu::UnicodeString::fromUTF8(needle)};
+    auto u_haystack{icu::UnicodeString::fromUTF8(haystack)};
+    icu::StringSearch search(u_needle, u_haystack, locale, nullptr, status);
+    g_free(locale);
+
+    if (U_SUCCESS(status))
+    {
+        auto collator = search.getCollator();
+        collator_set_strength(collator, strength);
+        search.reset();
+    }
+
+    if (U_FAILURE(status))
+    {
+        g_log(logdomain, G_LOG_LEVEL_ERROR,
+              "StringSearch creation failed for %s", haystack);
+        return false;
+    }
+
+    auto pos{search.first(status)};
+    if (U_FAILURE(status))
+    {
+        g_log(logdomain, G_LOG_LEVEL_ERROR,
+              "StringSearch encountered an error finding %s in %s",
+              needle, haystack);
+        return false;
+    }
+    if (pos == USEARCH_DONE)
+    {
+        g_log(logdomain, G_LOG_LEVEL_DEBUG, "%s not found in %s",
+              needle, haystack);
+        return false;
+    }
+
+    if (position && length)
+    {
+        *position = pos;
+        *length = search.getMatchedLength();
+    }
+
+    g_log(logdomain, G_LOG_LEVEL_DEBUG, "%s found in %s at index %d",
+          needle, haystack, pos);
+    return true;
+}
+
+bool
+gnc_unicode_has_substring_base_chars(const char* needle,
+                                     const char* haystack,
+                                     int* position,
+                                     int* length)
+{
+    return unicode_has_substring_internal(needle, haystack, position, length,
+                                          CompareStrength::PRIMARY);
+}
+
+bool
+gnc_unicode_has_substring_accented_chars(const char* needle,
+                                         const char* haystack,
+                                         int* position,
+                                         int* length)
+{
+    return unicode_has_substring_internal(needle, haystack, position, length,
+                                          CompareStrength::SECONDARY);
+}
+
+bool
+gnc_unicode_has_substring_accented_case_sensitive(const char* needle,
+                                                  const char* haystack,
+                                                  int* position,
+                                                  int* length)
+{
+    return unicode_has_substring_internal(needle, haystack, position, length,
+                                          CompareStrength::TERTIARY);
+}
+
+bool
+gnc_unicode_has_substring_identical(const char* needle,
+                                    const char*haystack,
+                                    int* position,
+                                    int* length)
+{
+    auto location = strstr(needle, haystack);
+    if (location && location != haystack)
+    {
+        *position = static_cast<int>(location - haystack);
+        *length = strlen(needle);
+        return true;
+    }
+    return false;
+}
+
+static int
+unicode_compare_internal(const char* one, const char* two,
+                         CompareStrength strength)
+{
+    UErrorCode status{U_ZERO_ERROR};
+    auto locale{gnc_locale_name()};
+    std::unique_ptr<icu::Collator> coll(
+        icu::Collator::createInstance(icu::Locale(locale), status));
+    g_free(locale);
+
+    if (U_SUCCESS(status))
+        collator_set_strength(coll.get(), strength);
+
+    if (U_FAILURE(status))
+    {
+        g_log(logdomain, G_LOG_LEVEL_ERROR,
+              "Failed to create collator for locale %s: %s",
+              locale, u_errorName(status));
+        return -99;
+    }
+
+    auto result = coll->compare(one, two, status);
+
+    if (U_FAILURE(status))
+    {
+        g_log(logdomain, G_LOG_LEVEL_ERROR,
+              "Comparison of %s and %s in locale %s failed: %s",
+              one, two, locale, u_errorName(status));
+        return -99;
+    }
+
+    return result == UCOL_LESS ? -1 : UCOL_EQUAL ? 0 : 1;
+}
+
+int
+gnc_unicode_compare_base_chars(const char* one, const char* two)
+{
+    return unicode_compare_internal(one, two, CompareStrength::PRIMARY);
+}
+
+int
+gnc_unicode_compare_accented_chars(const char* one, const char* two)
+{
+    return unicode_compare_internal(one, two, CompareStrength::SECONDARY);
+}
+
+int
+gnc_unicode_compare_accented_case_sensitive(const char* one, const char* two)
+{
+    return unicode_compare_internal(one, two, CompareStrength::TERTIARY);
+}
+
+int
+gnc_unicode_compare_identical(const char* one, const char* two)
+{
+    return strcmp(one, two);
+}
diff --git a/libgnucash/core-utils/gnc-unicode.h b/libgnucash/core-utils/gnc-unicode.h
new file mode 100644
index 0000000000..c5c255d103
--- /dev/null
+++ b/libgnucash/core-utils/gnc-unicode.h
@@ -0,0 +1,144 @@
+/********************************************************************
+ * gnc-icu-locale.h -- Localization with ICU.                        *
+ *                                                                  *
+ * Copyright (C) 2025 John Ralls <jralls at ceridwen.us                *
+ *                                                                  *
+ * This program is free software; you can redistribute it and/or    *
+ * modify it under the terms of the GNU General Public License as   *
+ * published by the Free Software Foundation; either version 2 of   *
+ * the License, or (at your option) any later version.              *
+ *                                                                  *
+ * This program is distributed in the hope that it will be useful,  *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of   *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    *
+ * GNU General Public License for more details.                     *
+ *                                                                  *
+ * You should have received a copy of the GNU General Public License*
+ * along with this program; if not, contact:                        *
+ *                                                                  *
+ * Free Software Foundation           Voice:  +1-617-542-5942       *
+ * 51 Franklin Street, Fifth Floor    Fax:    +1-617-542-2652       *
+ * Boston, MA  02110-1301,  USA       gnu at gnu.org                   *
+ ********************************************************************/
+
+#pragma once
+#include <stdbool.h>
+
+/** @addtogroup Localization These functions perform string comparison
+    and collation according to the Unicode Common Locale Data
+    Repository rules. The CLDR specifies five levels of
+    comparison.
+
+    - The primary or base level considers all variant codepoints
+    representing a character to be equivalent regardless of case or
+    decorations like accents and vowel or tone marks.
+    - The secondary level differentiates between letters with
+    decorations but still ignores case.
+    - The tertiary level differentiates based on case, decorations,
+    and variants, for example A and â’¶.
+    - The Quaternary level differentiates words with punctuation, for
+    example "ab" and "a-b".
+    - Identical differentiates all codepoints with no implicit
+      normalization so a character constructed with combining marks
+      will compare different from the same character represented as a
+      single codepoint.
+*/
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/** Find the first Unicode-equivalent UTF-8-encoded substring in a
+ * UTF-8-encoded string comparing characters at the CLDR primary
+ * level, setting the starting position and length of the matching
+ * part of the string.
+ * @param needle The substring to search for
+ * @param haystack the string to search in
+ * @param output the position of needle in haystack
+ * @param output the length of the match
+ * @return true if needle is found in haystack
+ */
+bool gnc_unicode_has_substring_base_chars(const char* needle, const
+                                          char* haystack, int* position,
+                                          int* length);
+
+/** Find the first Unicode-equivalent UTF-8-encoded substring in a
+ * UTF-8-encoded string comparing characters at the CLDR secondary
+ * level, setting the starting position and length of the matching
+ * part of the string.
+ * @param needle The substring to search for
+ * @param haystack the string to search in
+ * @param output the position of needle in haystack
+ * @param output the length of the match
+ * @return true if needle is found in haystack
+ */
+bool gnc_unicode_has_substring_accented_chars(const char* needle, const
+                                              char* haystack, int* position,
+                                              int* length);
+
+/** Find the first Unicode-equivalent UTF-8-encoded substring in a
+ * UTF-8-encoded string comparing characters at the CLDR tertiary
+ * level, setting the starting position and length of the matching
+ * part of the string.
+ * @param needle The substring to search for
+ * @param haystack the string to search in
+ * @param output the position of needle in haystack
+ * @param output the length of the match
+ * @return true if needle is found in haystack
+ */
+bool gnc_unicode_has_substring_accented_case_sensitive(const char* needle, const
+                                                       char* haystack, int* position,
+                                                       int* length);
+/** Find the first Unicode-equivalent UTF-8-encoded substring in a
+ * UTF-8-encoded string comparing characters at the CLDR identical
+ * level, setting the starting position and length of the matching
+ * part of the string.
+ * @param needle The substring to search for
+ * @param haystack the string to search in
+ * @param output the position of needle in haystack
+ * @param output the length of the match
+ * @return true if needle is found in haystack
+ */
+bool gnc_unicode_has_substring_identical(const char* needle, const
+                                         char* haystack, int* position,
+                                         int* length);
+/** Compare two UTF-8 encoded strings for equivalence at the CLDR
+ * primary level in the current locale. Errors are logged to
+ * gnc.locale.
+ * @param one a string
+ * @param two another string
+ * @return 0 if one and two are equivalent, -1 if one is less that
+ * two, 1 if one is greater than two, -99 on error.
+ */
+int gnc_unicode_compare_base_chars(const char* one, const char* two);
+
+/** Compare two UTF-8 encoded strings for equivalence at the CLDR
+ * secondary level in the current locale. Errors are logged to
+ * gnc.locale.
+ * @param one a string
+ * @param two another string
+ * @return 0 if one and two are equivalent, -1 if one is less that
+ * two, 1 if one is greater than two, -99 on error.
+ */
+int gnc_unicode_compare_accented_chars(const char* one, const char* two);
+
+/** Compare two UTF-8 encoded strings for equivalence at the CLDR
+ * tertiary level in the current locale. Errors are logged to
+ * gnc.locale.
+ * @param one a string
+ * @param two another string
+ * @return 0 if one and two are equivalent, -1 if one is less that
+ * two, 1 if one is greater than two, -99 on error.
+ */
+int gnc_unicode_compare_accented_case_sensitive(const char* one, const char* two);
+/** Compare two UTF-8 encoded strings for equivalence at the CLDR
+ * identical level in the current locale.
+ * @param one a string
+ * @param two another string
+ * @return 0 if one and two are equivalent, -1 if one is less that
+ * two, 1 if one is greater than two, -99 on error.
+ */
+int gnc_unicode_compare_identical(const char* one, const char* two);
+#ifdef __cplusplus
+}
+#endif
diff --git a/libgnucash/core-utils/test/CMakeLists.txt b/libgnucash/core-utils/test/CMakeLists.txt
index 5d10837ff4..f5879a696f 100644
--- a/libgnucash/core-utils/test/CMakeLists.txt
+++ b/libgnucash/core-utils/test/CMakeLists.txt
@@ -49,6 +49,22 @@ set(test_gnc_path_util_SOURCES
 gnc_add_test(test-gnc-path-util "${test_gnc_path_util_SOURCES}"
   gtest_core_utils_INCLUDES gtest_core_utils_LIBS "GNC_UNINSTALLED=yes")
 
+set(gtest_icu_locale_INCLUDES
+  ${CMAKE_BINARY_DIR}/common
+  ${MODULEPATH})
+
+set(gtest_icu_locale_LIBS
+  ${Boost_LIBRARIES}
+  ICU::uc
+  ICU::i18n
+  PkgConfig::GLIB2
+  gtest)
+
+gnc_add_test(test-gnc-unicode
+  "${MODULEPATH}/gnc-unicode.cpp;${MODULEPATH}/gnc-locale-utils.c;gtest-gnc-unicode.cpp"
+  gtest_icu_locale_INCLUDES
+  gtest_icu_locale_LIBS)
+
 set_dist_list(test_core_utils_DIST CMakeLists.txt
   test-gnc-glib-utils.c test-resolve-file-path.c test-userdata-dir.c
-  test-userdata-dir-invalid-home.c gtest-path-utilities.cpp)
+  test-userdata-dir-invalid-home.c gtest-gnc-unicode.cpp gtest-path-utilities.cpp)
diff --git a/libgnucash/core-utils/test/gtest-gnc-unicode.cpp b/libgnucash/core-utils/test/gtest-gnc-unicode.cpp
new file mode 100644
index 0000000000..4f97b79492
--- /dev/null
+++ b/libgnucash/core-utils/test/gtest-gnc-unicode.cpp
@@ -0,0 +1,135 @@
+/********************************************************************\
+ * test-icu-locale.cpp -- Unit tests for GncQuotes            *
+ *                                                                  *
+ * Copyright 2025 John Ralls <jralls at ceridwen.us>                   *
+ *                                                                  *
+ * This program is free software; you can redistribute it and/or    *
+ * modify it under the terms of the GNU General Public License as   *
+ * published by the Free Software Foundation; either version 2 of   *
+ * the License, or (at your option) any later version.              *
+ *                                                                  *
+ * This program is distributed in the hope that it will be useful,  *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of   *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    *
+ * GNU General Public License for more details.                     *
+ *                                                                  *
+ * You should have received a copy of the GNU General Public License*
+ * along with this program; if not, contact:                        *
+ *                                                                  *
+ * Free Software Foundation           Voice:  +1-617-542-5942       *
+ * 51 Franklin Street, Fifth Floor    Fax:    +1-617-542-2652       *
+ * Boston, MA  02110-1301,  USA       gnu at gnu.org                   *
+ *                                                                  *
+\********************************************************************/
+
+#include <gnc-unicode.h>
+#include <stdbool.h>
+#include <gtest/gtest.h>
+
+TEST(GncUnicode, test_ss_base_chars)
+{
+    int pos = 0, len = 0;
+
+    auto result = gnc_unicode_has_substring_base_chars("besi", "Necklace for Bessie",
+                                         &pos, &len);
+    EXPECT_FALSE(result);
+    EXPECT_EQ(0, pos);
+    EXPECT_EQ(0, len);
+
+    result = gnc_unicode_has_substring_base_chars("bessi", "Necklace for Bessie",
+                                         &pos, &len);
+    EXPECT_TRUE(result);
+    EXPECT_EQ(13, pos);
+    EXPECT_EQ(5, len);
+    }
+TEST(GncUnicode, test_ss_accented)
+{
+    int pos = 0, len = 0;
+    auto result = gnc_unicode_has_substring_accented_chars("bessi", "Necklace for Bessie",
+                                         &pos, &len);
+    EXPECT_TRUE(result);
+    EXPECT_EQ(13, pos);
+    EXPECT_EQ(5, len);
+}
+
+TEST(GncUnicode, test_ss_accented_case_sensitive)
+{
+    int pos = 0, len = 0;
+    auto result = gnc_unicode_has_substring_accented_case_sensitive("bessi", "Necklace for Bessie",
+                                         &pos, &len);
+    EXPECT_FALSE(result);
+    EXPECT_EQ(0, pos);
+    EXPECT_EQ(0, len);
+
+    result = gnc_unicode_has_substring_accented_case_sensitive("Bessi", "Necklace for Bessie",
+                                         &pos, &len);
+    EXPECT_TRUE(result);
+    EXPECT_EQ(13, pos);
+    EXPECT_EQ(5, len);
+}
+
+TEST(GncUnicode, test_german_ss_literal)
+{
+    int pos = 0, len = 0;
+    auto result = gnc_unicode_has_substring_base_chars("be\xc3\x9fi", "Necklace for Be\xc3\x9fie",
+                                         &pos, &len);
+    EXPECT_TRUE(result);
+    EXPECT_EQ(13, pos);
+    EXPECT_EQ(4, len);
+
+    pos = 0, len = 0;
+    result = gnc_unicode_has_substring_accented_chars("be\xc3\x9fi", "Necklace for Be\xc3\x9fie",
+                                         &pos, &len);
+    EXPECT_TRUE(result);
+    EXPECT_EQ(13, pos);
+    EXPECT_EQ(4, len);
+
+    pos = 0, len = 0;
+    result = gnc_unicode_has_substring_accented_case_sensitive("be\xc3\x9fi", "Necklace for Be\xc3\x9fie",
+                                         &pos, &len);
+    EXPECT_FALSE(result);
+    EXPECT_EQ(0, pos);
+    EXPECT_EQ(0, len);
+
+    result = gnc_unicode_has_substring_accented_case_sensitive("Be\xc3\x9fi", "Necklace for Be\xc3\x9fie",
+                                         &pos, &len);
+    EXPECT_TRUE(result);
+    EXPECT_EQ(13, pos);
+    EXPECT_EQ(4, len);
+}
+
+TEST(GncUnicode, test_german_ss_decorated_base_chars_nocap)
+{
+    int pos = 0, len = 0;
+    auto result = gnc_unicode_has_substring_base_chars("bessi", "Necklace for Be\xc3\x9fie",
+                                         &pos, &len);
+    EXPECT_TRUE(result);
+    EXPECT_EQ(13, pos);
+    EXPECT_EQ(4, len);
+}
+
+TEST(GncUnicode, test_german_ss_decorated_accented_cap)
+{
+    int pos = 0, len = 0;
+    auto result = gnc_unicode_has_substring_accented_case_sensitive("bessi", "Necklace for Be\xc3\x9fie",
+                                         &pos, &len);
+    EXPECT_FALSE(result);
+    EXPECT_EQ(0, pos);
+    EXPECT_EQ(0, len);
+
+    result = gnc_unicode_has_substring_accented_case_sensitive("Bessi", "Necklace for Be\xc3\x9fie",
+                                         &pos, &len);
+    EXPECT_FALSE(result);
+    EXPECT_EQ(0, pos);
+    EXPECT_EQ(0, len);
+    }
+
+TEST(GncUnicode, test_german_ss_decorated_accented_nocap)
+{
+    int pos = 0, len = 0;
+    auto result = gnc_unicode_has_substring_accented_chars("bessi", "Necklace for Be\xc3\x9fie",
+                                         &pos, &len);
+    EXPECT_FALSE(result);
+    EXPECT_EQ(0, pos);
+    EXPECT_EQ(0, len);
+}
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 0e69772a51..14fc124d62 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -620,6 +620,7 @@ libgnucash/core-utils/gnc-locale-utils.c
 libgnucash/core-utils/gnc-locale-utils.cpp
 libgnucash/core-utils/gnc-path.c
 libgnucash/core-utils/gnc-prefs.c
+libgnucash/core-utils/gnc-unicode.cpp
 libgnucash/core-utils/gnc-version.c
 libgnucash/engine/Account.cpp
 libgnucash/engine/cap-gains.cpp



Summary of changes:
 CMakeLists.txt                                     |   2 +-
 .../register/register-gnome/completioncell-gnome.c |  58 ++----
 libgnucash/core-utils/CMakeLists.txt               |   3 +
 libgnucash/core-utils/gnc-unicode.cpp              | 220 +++++++++++++++++++++
 libgnucash/core-utils/gnc-unicode.h                | 144 ++++++++++++++
 libgnucash/core-utils/test/CMakeLists.txt          |  18 +-
 libgnucash/core-utils/test/gtest-gnc-unicode.cpp   | 135 +++++++++++++
 po/POTFILES.in                                     |   1 +
 8 files changed, 534 insertions(+), 47 deletions(-)
 create mode 100644 libgnucash/core-utils/gnc-unicode.cpp
 create mode 100644 libgnucash/core-utils/gnc-unicode.h
 create mode 100644 libgnucash/core-utils/test/gtest-gnc-unicode.cpp



More information about the gnucash-changes mailing list