gnucash stable: Improve GncNumeric(std::string) to handle unicode spaces.

John Ralls jralls at code.gnucash.org
Fri Mar 7 16:20:02 EST 2025


Updated	 via  https://github.com/Gnucash/gnucash/commit/0e15e12e (commit)
	from  https://github.com/Gnucash/gnucash/commit/1fa4f71f (commit)



commit 0e15e12ea98d9231b6f4ae2109f477d0f8fd70eb
Author: John Ralls <jralls at ceridwen.us>
Date:   Fri Mar 7 13:19:10 2025 -0800

    Improve GncNumeric(std::string) to handle unicode spaces.

diff --git a/libgnucash/engine/gnc-numeric.cpp b/libgnucash/engine/gnc-numeric.cpp
index 62aacfaa60..629aca0a8c 100644
--- a/libgnucash/engine/gnc-numeric.cpp
+++ b/libgnucash/engine/gnc-numeric.cpp
@@ -31,6 +31,7 @@
 #include <cstdint>
 #include <sstream>
 #include <boost/regex.hpp>
+#include <boost/regex/icu.hpp>
 #include <boost/locale/encoding_utf.hpp>
 
 #include <config.h>
@@ -118,7 +119,9 @@ GncNumeric::GncNumeric(double d) : m_num(0), m_den(1)
 }
 
 using boost::regex;
+using boost::u32regex;
 using boost::regex_search;
+using boost::u32regex_search;
 using boost::smatch;
 
 
@@ -251,7 +254,7 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) {
     static const std::string opt_signed_int("(-?[0-9]*)");
     static const std::string opt_signed_separated_int("(-?[0-9]{1,3})");
     static const std::string unsigned_int("([0-9]+)");
-    static const std::string eu_separated_int("(?:[\\s'.]([0-9]{3}))?");
+    static const std::string eu_separated_int("(?:[[:space:]'.]([0-9]{3}))?");
     static const std::string en_separated_int("(?:\\,([0-9]{3}))?");
     static const std::string eu_decimal_part("(?:\\,([0-9]+))?");
     static const std::string en_decimal_part("(?:\\.([0-9]+))?");
@@ -272,7 +275,8 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) {
     static const regex hex_over_num(begin + hex_frag + slash + unsigned_int + end);
     static const regex num_over_hex(begin + opt_signed_int + slash + hex_frag + end);
     static const regex decimal(begin + opt_signed_int + "[.,]" + unsigned_int + end);
-    static const regex sep_decimal(begin + begin_group + eu_sep_decimal + or_op + en_sep_decimal + end_group + end);
+    static const u32regex sep_decimal =
+        boost::make_u32regex(begin + begin_group + eu_sep_decimal + or_op + en_sep_decimal + end_group + end);
     static const regex scientific("(?:(-?[0-9]+[.,]?)|(-?[0-9]*)[.,]([0-9]+))[Ee](-?[0-9]+)");
     static const regex has_hex_prefix(".*0[xX]$");
     smatch m, x;
@@ -345,7 +349,7 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) {
         m_den = denom;
         return;
     }
-    if (regex_search(str, m, sep_decimal))
+    if (u32regex_search(str, m, sep_decimal))
     {
         /* There's a bit of magic here because of the complexity of
          * the regex. It supports two formats, one for locales that
diff --git a/libgnucash/engine/test/gtest-gnc-numeric.cpp b/libgnucash/engine/test/gtest-gnc-numeric.cpp
index 58bd1162ca..aca62033c7 100644
--- a/libgnucash/engine/test/gtest-gnc-numeric.cpp
+++ b/libgnucash/engine/test/gtest-gnc-numeric.cpp
@@ -156,8 +156,8 @@ TEST(gncnumeric_constructors, test_string_constructor)
     GncNumeric neg_continental_decimal("-123,456");
     EXPECT_EQ(-123456, neg_continental_decimal.num());
     EXPECT_EQ(1000, neg_continental_decimal.denom());
-    ASSERT_NO_THROW(GncNumeric swiss_thousep_decimal("123 456 789,123"));
-    GncNumeric swiss_thousep_decimal("123 456 789,123");
+    ASSERT_NO_THROW(GncNumeric swiss_thousep_decimal("123""\xe2\x80\xaf""456""\xe2\x80\xaf""789,123"));
+    GncNumeric swiss_thousep_decimal("123""\xe2\x80\xaf""456""\xe2\x80\xaf""789,123");
     EXPECT_EQ(123456789123, swiss_thousep_decimal.num());
     EXPECT_EQ(1000, swiss_thousep_decimal.denom());
     GncNumeric from_scientific("1.234e4");



Summary of changes:
 libgnucash/engine/gnc-numeric.cpp            | 10 +++++++---
 libgnucash/engine/test/gtest-gnc-numeric.cpp |  4 ++--
 2 files changed, 9 insertions(+), 5 deletions(-)



More information about the gnucash-changes mailing list