gnucash maint: Multiple changes pushed

John Ralls jralls at code.gnucash.org
Fri Feb 28 23:50:36 EST 2020


Updated	 via  https://github.com/Gnucash/gnucash/commit/95857a8b (commit)
	 via  https://github.com/Gnucash/gnucash/commit/7509b542 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/01c76e23 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/41863be9 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/d07d4b96 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/322f2d99 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/a1318497 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/da60560a (commit)
	 via  https://github.com/Gnucash/gnucash/commit/9b3085a4 (commit)
	from  https://github.com/Gnucash/gnucash/commit/b9d625d2 (commit)



commit 95857a8b99f343c95b099249e3e67a7ed51f4d0e
Merge: b9d625d25 7509b542d
Author: John Ralls <jralls at ceridwen.us>
Date:   Fri Feb 28 21:29:58 2020 -0700

    Merge Christian Gruber's 'fix_bug_797587' into maint.


commit 7509b542da67aeb42bff0b98a20ffc2989f6e146
Author: Christian Gruber <christian.gruber at posteo.de>
Date:   Mon Feb 17 23:31:12 2020 +0100

    Simplify function build_bayes()
    
    Inline function parse_bayes_imap_info() into build_bayes() and remove
    temp_guid.

diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index 83d49a748..ea976be49 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -5664,24 +5664,15 @@ build_non_bayes (const char *key, const GValue *value, gpointer user_data)
     g_free (guid_string);
 }
 
-static std::tuple<std::string, std::string>
-parse_bayes_imap_info (std::string const & imap_bayes_entry)
-{
-    auto guid_start = imap_bayes_entry.size() - GUID_ENCODING_LENGTH;
-    std::string keyword {imap_bayes_entry.substr (1, guid_start - 2)};
-    std::string account_guid {imap_bayes_entry.substr (guid_start)};
-    return std::tuple <std::string, std::string> {keyword, account_guid};
-}
-
 static void
 build_bayes (const char *suffix, KvpValue * value, GncImapInfo & imapInfo)
 {
-    auto parsed_key = parse_bayes_imap_info (suffix);
+    size_t guid_start = strlen(suffix) - GUID_ENCODING_LENGTH;
+    std::string account_guid {&suffix[guid_start]};
     GncGUID guid;
     try
     {
-        auto temp_guid = gnc::GUID::from_string (std::get <1> (parsed_key));
-        guid = temp_guid;
+        guid = gnc::GUID::from_string (account_guid);
     }
     catch (const gnc::guid_syntax_exception& err)
     {
@@ -5693,7 +5684,7 @@ build_bayes (const char *suffix, KvpValue * value, GncImapInfo & imapInfo)
     imap_node->source_account = imapInfo.source_account;
     imap_node->map_account = map_account;
     imap_node->head = g_strdup_printf ("%s%s", IMAP_FRAME_BAYES, suffix);
-    imap_node->match_string = g_strdup (std::get <0> (parsed_key).c_str ());
+    imap_node->match_string = g_strndup (&suffix[1], guid_start - 2);
     imap_node->category = g_strdup(" ");
     imap_node->count = g_strdup_printf ("%" G_GINT64_FORMAT, count);
     imapInfo.list = g_list_prepend (imapInfo.list, imap_node);

commit 01c76e23913ea870fb331fc6c747d67f9bdd7f85
Author: Christian Gruber <christian.gruber at posteo.de>
Date:   Mon Feb 17 23:29:28 2020 +0100

    Remove unused template of function for_each_slot_prefix()
    
    for_each_slot_prefix() is not used anywhere with two arguments

diff --git a/libgnucash/engine/kvp-frame.hpp b/libgnucash/engine/kvp-frame.hpp
index c84cf18a4..253eec318 100644
--- a/libgnucash/engine/kvp-frame.hpp
+++ b/libgnucash/engine/kvp-frame.hpp
@@ -235,20 +235,6 @@ struct KvpFrameImpl
     KvpValue * set_impl (std::string const &, KvpValue *) noexcept;
 };
 
-template<typename func_type>
-void KvpFrame::for_each_slot_prefix(std::string const & prefix,
-        func_type const & func) const noexcept
-{
-    std::for_each (m_valuemap.begin(), m_valuemap.end(),
-        [&prefix,&func](const KvpFrameImpl::map_type::value_type & a)
-        {
-            /* Testing for prefix matching */
-            if (strncmp(a.first, prefix.c_str(), prefix.size()) == 0)
-                func (&a.first[prefix.size()], a.second);
-        }
-    );
-}
-
 template<typename func_type, typename data_type>
 void KvpFrame::for_each_slot_prefix(std::string const & prefix,
         func_type const & func, data_type & data) const noexcept

commit 41863be9c7251098764c5a36e25f401fe536162d
Author: Christian Gruber <christian.gruber at posteo.de>
Date:   Mon Feb 17 22:56:15 2020 +0100

    Avoid copying local instance of AccountTokenCount

diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index 805daeea4..83d49a748 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -5284,11 +5284,8 @@ build_token_info(char const * suffix, KvpValue * value, TokenAccountsInfo & toke
     if (strlen(suffix) == GUID_ENCODING_LENGTH)
     {
         tokenInfo.total_count += value->get<int64_t>();
-        AccountTokenCount this_account;
         /*By convention, the key ends with the account GUID.*/
-        this_account.account_guid = std::string{suffix};
-        this_account.token_count = value->get<int64_t>();
-        tokenInfo.accounts.push_back(this_account);
+        tokenInfo.accounts.emplace_back(AccountTokenCount{std::string{suffix}, value->get<int64_t>()});
     }
 }
 

commit d07d4b962fc89ef332626b984b8fb3149033113d
Author: Christian Gruber <christian.gruber at posteo.de>
Date:   Sun Feb 2 22:38:44 2020 +0100

    Fix tokenize_string()
    
    This fix prevents empty strings as tokens and removes duplicated tokens.
    Function tokenize_string() is used for bayesian import matching, where
    empty token strings or duplicated tokens lead to wrong results within
    probability calculation for matching of a transaction to an account.
    
    Empty token strings can occur if (see function g_strsplit())
    * two or more spaces occur directly after another
    * the string begins or ends with spaces

diff --git a/gnucash/import-export/import-backend.c b/gnucash/import-export/import-backend.c
index ebc37c3a7..888b913b2 100644
--- a/gnucash/import-export/import-backend.c
+++ b/gnucash/import-export/import-backend.c
@@ -387,8 +387,24 @@ tokenize_string(GList* existing_tokens, const char *string)
     /* add each token to the token GList */
     while (stringpos && *stringpos)
     {
-        /* prepend the char* to the token GList */
-        existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
+        if (strlen(*stringpos) > 0)
+        {
+            /* check for duplicated tokens */
+            gboolean duplicated = FALSE;
+            for (GList* token = existing_tokens; token != NULL; token = token->next)
+            {
+                if (g_strcmp0(token->data, *stringpos) == 0)
+                {
+                    duplicated = TRUE;
+                    break;
+                }
+            }
+            if (duplicated == FALSE)
+            {
+                /* prepend the char* to the token GList */
+                existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
+            }
+        }
 
         /* then move to the next string */
         stringpos++;

commit 322f2d99de89849b4d193afcb779520aabcafa4f
Author: Christian Gruber <christian.gruber at posteo.de>
Date:   Sun Feb 2 17:55:30 2020 +0100

    Rework key prefix matching
    
    Use C string comparison instead of C++ function std::mismatch to increase
    performance.

diff --git a/libgnucash/engine/kvp-frame.hpp b/libgnucash/engine/kvp-frame.hpp
index 1419e626e..c84cf18a4 100644
--- a/libgnucash/engine/kvp-frame.hpp
+++ b/libgnucash/engine/kvp-frame.hpp
@@ -242,11 +242,8 @@ void KvpFrame::for_each_slot_prefix(std::string const & prefix,
     std::for_each (m_valuemap.begin(), m_valuemap.end(),
         [&prefix,&func](const KvpFrameImpl::map_type::value_type & a)
         {
-            std::string temp_key {a.first};
-            if (temp_key.size() < prefix.size())
-                return;
             /* Testing for prefix matching */
-            if (std::mismatch(prefix.begin(), prefix.end(), temp_key.begin()).first == prefix.end())
+            if (strncmp(a.first, prefix.c_str(), prefix.size()) == 0)
                 func (&a.first[prefix.size()], a.second);
         }
     );
@@ -259,11 +256,8 @@ void KvpFrame::for_each_slot_prefix(std::string const & prefix,
     std::for_each (m_valuemap.begin(), m_valuemap.end(),
         [&prefix,&func,&data](const KvpFrameImpl::map_type::value_type & a)
         {
-            std::string temp_key {a.first};
-            if (temp_key.size() < prefix.size())
-                return;
             /* Testing for prefix matching */
-            if (std::mismatch(prefix.begin(), prefix.end(), temp_key.begin()).first == prefix.end())
+            if (strncmp(a.first, prefix.c_str(), prefix.size()) == 0)
                 func (&a.first[prefix.size()], a.second, data);
         }
     );

commit a13184978a07e4c6735e5d5893076ffd8117c6a1
Author: Christian Gruber <christian.gruber at posteo.de>
Date:   Mon Jan 27 23:01:25 2020 +0100

    Fix calculation of token info to find exactly matching tokens only
    
    In get_first_pass_probabilities() function
    qof_instance_foreach_slot_prefix() is called with a prefix path
    including closing slash after token now. This avoids, that also entries
    with token as a substring are included in token info, where key only
    starts with token.
    
    Finally function build_token_info() checks, if the key suffix after the
    token consists only of the GUID. This avoids, that also entries with the
    same prefix and slashes are included in token info.

diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index fee1187bc..805daeea4 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -5281,13 +5281,15 @@ struct AccountInfo
 static void
 build_token_info(char const * suffix, KvpValue * value, TokenAccountsInfo & tokenInfo)
 {
-    tokenInfo.total_count += value->get<int64_t>();
-    AccountTokenCount this_account;
-    std::string account_guid {suffix};
-    /*By convention, the key ends with the account GUID.*/
-    this_account.account_guid = account_guid.substr(account_guid.size() - GUID_ENCODING_LENGTH);
-    this_account.token_count = value->get<int64_t>();
-    tokenInfo.accounts.push_back(this_account);
+    if (strlen(suffix) == GUID_ENCODING_LENGTH)
+    {
+        tokenInfo.total_count += value->get<int64_t>();
+        AccountTokenCount this_account;
+        /*By convention, the key ends with the account GUID.*/
+        this_account.account_guid = std::string{suffix};
+        this_account.token_count = value->get<int64_t>();
+        tokenInfo.accounts.push_back(this_account);
+    }
 }
 
 /** We scale the probability values by probability_factor.
@@ -5332,7 +5334,7 @@ get_first_pass_probabilities(GncImportMatchMap * imap, GList * tokens)
     for (auto current_token = tokens; current_token; current_token = current_token->next)
     {
         TokenAccountsInfo tokenInfo{};
-        auto path = std::string{IMAP_FRAME_BAYES "/"} + static_cast <char const *> (current_token->data);
+        auto path = std::string{IMAP_FRAME_BAYES "/"} + static_cast <char const *> (current_token->data) + "/";
         qof_instance_foreach_slot_prefix (QOF_INSTANCE (imap->acc), path, &build_token_info, tokenInfo);
         for (auto const & current_account_token : tokenInfo.accounts)
         {

commit da60560ac4ad1994a79185eaaafaaa0363a21076
Author: Christian Gruber <christian.gruber at posteo.de>
Date:   Mon Jan 27 22:43:09 2020 +0100

    Change behaviour of KvpFrame::for_each_slot_prefix()
    
    Provided function func is now called with key suffix only instead of
    full key (prefix is omitted). This is neccessary for fixing function
    build_token_info() in the next commit.

diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index 194976dbd..fee1187bc 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -5279,11 +5279,11 @@ struct AccountInfo
 };
 
 static void
-build_token_info(char const * key, KvpValue * value, TokenAccountsInfo & tokenInfo)
+build_token_info(char const * suffix, KvpValue * value, TokenAccountsInfo & tokenInfo)
 {
     tokenInfo.total_count += value->get<int64_t>();
     AccountTokenCount this_account;
-    std::string account_guid {key};
+    std::string account_guid {suffix};
     /*By convention, the key ends with the account GUID.*/
     this_account.account_guid = account_guid.substr(account_guid.size() - GUID_ENCODING_LENGTH);
     this_account.token_count = value->get<int64_t>();
@@ -5665,38 +5665,36 @@ build_non_bayes (const char *key, const GValue *value, gpointer user_data)
     g_free (guid_string);
 }
 
-static std::tuple<std::string, std::string, std::string>
+static std::tuple<std::string, std::string>
 parse_bayes_imap_info (std::string const & imap_bayes_entry)
 {
-    auto header_length = strlen (IMAP_FRAME_BAYES);
-    std::string header {imap_bayes_entry.substr (0, header_length)};
     auto guid_start = imap_bayes_entry.size() - GUID_ENCODING_LENGTH;
-    std::string keyword {imap_bayes_entry.substr (header_length + 1, guid_start - header_length - 2)};
+    std::string keyword {imap_bayes_entry.substr (1, guid_start - 2)};
     std::string account_guid {imap_bayes_entry.substr (guid_start)};
-    return std::tuple <std::string, std::string, std::string> {header, keyword, account_guid};
+    return std::tuple <std::string, std::string> {keyword, account_guid};
 }
 
 static void
-build_bayes (const char *key, KvpValue * value, GncImapInfo & imapInfo)
+build_bayes (const char *suffix, KvpValue * value, GncImapInfo & imapInfo)
 {
-    auto parsed_key = parse_bayes_imap_info (key);
+    auto parsed_key = parse_bayes_imap_info (suffix);
     GncGUID guid;
     try
     {
-        auto temp_guid = gnc::GUID::from_string (std::get <2> (parsed_key));
+        auto temp_guid = gnc::GUID::from_string (std::get <1> (parsed_key));
         guid = temp_guid;
     }
     catch (const gnc::guid_syntax_exception& err)
     {
-        PWARN("Invalid GUID string from %s", key);
+        PWARN("Invalid GUID string from %s%s", IMAP_FRAME_BAYES, suffix);
     }
     auto map_account = xaccAccountLookup (&guid, gnc_account_get_book (imapInfo.source_account));
     auto imap_node = static_cast <GncImapInfo*> (g_malloc (sizeof (GncImapInfo)));
     auto count = value->get <int64_t> ();
     imap_node->source_account = imapInfo.source_account;
     imap_node->map_account = map_account;
-    imap_node->head = g_strdup (key);
-    imap_node->match_string = g_strdup (std::get <1> (parsed_key).c_str ());
+    imap_node->head = g_strdup_printf ("%s%s", IMAP_FRAME_BAYES, suffix);
+    imap_node->match_string = g_strdup (std::get <0> (parsed_key).c_str ());
     imap_node->category = g_strdup(" ");
     imap_node->count = g_strdup_printf ("%" G_GINT64_FORMAT, count);
     imapInfo.list = g_list_prepend (imapInfo.list, imap_node);
diff --git a/libgnucash/engine/kvp-frame.hpp b/libgnucash/engine/kvp-frame.hpp
index 23eb0c615..1419e626e 100644
--- a/libgnucash/engine/kvp-frame.hpp
+++ b/libgnucash/engine/kvp-frame.hpp
@@ -247,7 +247,7 @@ void KvpFrame::for_each_slot_prefix(std::string const & prefix,
                 return;
             /* Testing for prefix matching */
             if (std::mismatch(prefix.begin(), prefix.end(), temp_key.begin()).first == prefix.end())
-                func (a.first, a.second);
+                func (&a.first[prefix.size()], a.second);
         }
     );
 }
@@ -264,7 +264,7 @@ void KvpFrame::for_each_slot_prefix(std::string const & prefix,
                 return;
             /* Testing for prefix matching */
             if (std::mismatch(prefix.begin(), prefix.end(), temp_key.begin()).first == prefix.end())
-                func (a.first, a.second, data);
+                func (&a.first[prefix.size()], a.second, data);
         }
     );
 }

commit 9b3085a429660dd442176ba63de196a476edc4e0
Author: Christian Gruber <christian.gruber at posteo.de>
Date:   Mon Jan 27 22:38:26 2020 +0100

    Add test cases to check for exact token matching

diff --git a/libgnucash/engine/test/gtest-import-map.cpp b/libgnucash/engine/test/gtest-import-map.cpp
index f2e4509e6..33d64ab18 100644
--- a/libgnucash/engine/test/gtest-import-map.cpp
+++ b/libgnucash/engine/test/gtest-import-map.cpp
@@ -271,6 +271,14 @@ TEST_F(ImapBayesTest, FindAccountBayes)
     EXPECT_EQ(t_expense_account2, account);
     account = gnc_account_imap_find_account_bayes(t_imap, t_list5);
     EXPECT_EQ(nullptr, account);
+
+    // only imap entries with exact token matching should be considered
+    root->set_path({std::string{IMAP_FRAME_BAYES} + "/" + pepper + waldo + "/" + acct2_guid}, new KvpValue{*value});
+    account = gnc_account_imap_find_account_bayes(t_imap, t_list3);
+    EXPECT_EQ(t_expense_account1, account);
+    root->set_path({std::string{IMAP_FRAME_BAYES} + "/" + pepper + "/" + waldo + "/" + acct2_guid}, new KvpValue{*value});
+    account = gnc_account_imap_find_account_bayes(t_imap, t_list3);
+    EXPECT_EQ(t_expense_account1, account);
 }
 
 TEST_F(ImapBayesTest, AddAccountBayes)



Summary of changes:
 gnucash/import-export/import-backend.c      | 20 ++++++++++++--
 libgnucash/engine/Account.cpp               | 42 +++++++++++------------------
 libgnucash/engine/kvp-frame.hpp             | 24 ++---------------
 libgnucash/engine/test/gtest-import-map.cpp |  8 ++++++
 4 files changed, 43 insertions(+), 51 deletions(-)



More information about the gnucash-changes mailing list