gnucash maint: Multiple changes pushed
John Ralls
jralls at code.gnucash.org
Fri Feb 28 23:50:36 EST 2020
Updated via https://github.com/Gnucash/gnucash/commit/95857a8b (commit)
via https://github.com/Gnucash/gnucash/commit/7509b542 (commit)
via https://github.com/Gnucash/gnucash/commit/01c76e23 (commit)
via https://github.com/Gnucash/gnucash/commit/41863be9 (commit)
via https://github.com/Gnucash/gnucash/commit/d07d4b96 (commit)
via https://github.com/Gnucash/gnucash/commit/322f2d99 (commit)
via https://github.com/Gnucash/gnucash/commit/a1318497 (commit)
via https://github.com/Gnucash/gnucash/commit/da60560a (commit)
via https://github.com/Gnucash/gnucash/commit/9b3085a4 (commit)
from https://github.com/Gnucash/gnucash/commit/b9d625d2 (commit)
commit 95857a8b99f343c95b099249e3e67a7ed51f4d0e
Merge: b9d625d25 7509b542d
Author: John Ralls <jralls at ceridwen.us>
Date: Fri Feb 28 21:29:58 2020 -0700
Merge Christian Gruber's 'fix_bug_797587' into maint.
commit 7509b542da67aeb42bff0b98a20ffc2989f6e146
Author: Christian Gruber <christian.gruber at posteo.de>
Date: Mon Feb 17 23:31:12 2020 +0100
Simplify function build_bayes()
Inline function parse_bayes_imap_info() into build_bayes() and remove
temp_guid.
diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index 83d49a748..ea976be49 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -5664,24 +5664,15 @@ build_non_bayes (const char *key, const GValue *value, gpointer user_data)
g_free (guid_string);
}
-static std::tuple<std::string, std::string>
-parse_bayes_imap_info (std::string const & imap_bayes_entry)
-{
- auto guid_start = imap_bayes_entry.size() - GUID_ENCODING_LENGTH;
- std::string keyword {imap_bayes_entry.substr (1, guid_start - 2)};
- std::string account_guid {imap_bayes_entry.substr (guid_start)};
- return std::tuple <std::string, std::string> {keyword, account_guid};
-}
-
static void
build_bayes (const char *suffix, KvpValue * value, GncImapInfo & imapInfo)
{
- auto parsed_key = parse_bayes_imap_info (suffix);
+ size_t guid_start = strlen(suffix) - GUID_ENCODING_LENGTH;
+ std::string account_guid {&suffix[guid_start]};
GncGUID guid;
try
{
- auto temp_guid = gnc::GUID::from_string (std::get <1> (parsed_key));
- guid = temp_guid;
+ guid = gnc::GUID::from_string (account_guid);
}
catch (const gnc::guid_syntax_exception& err)
{
@@ -5693,7 +5684,7 @@ build_bayes (const char *suffix, KvpValue * value, GncImapInfo & imapInfo)
imap_node->source_account = imapInfo.source_account;
imap_node->map_account = map_account;
imap_node->head = g_strdup_printf ("%s%s", IMAP_FRAME_BAYES, suffix);
- imap_node->match_string = g_strdup (std::get <0> (parsed_key).c_str ());
+ imap_node->match_string = g_strndup (&suffix[1], guid_start - 2);
imap_node->category = g_strdup(" ");
imap_node->count = g_strdup_printf ("%" G_GINT64_FORMAT, count);
imapInfo.list = g_list_prepend (imapInfo.list, imap_node);
commit 01c76e23913ea870fb331fc6c747d67f9bdd7f85
Author: Christian Gruber <christian.gruber at posteo.de>
Date: Mon Feb 17 23:29:28 2020 +0100
Remove unused template of function for_each_slot_prefix()
for_each_slot_prefix() is not used anywhere with two arguments
diff --git a/libgnucash/engine/kvp-frame.hpp b/libgnucash/engine/kvp-frame.hpp
index c84cf18a4..253eec318 100644
--- a/libgnucash/engine/kvp-frame.hpp
+++ b/libgnucash/engine/kvp-frame.hpp
@@ -235,20 +235,6 @@ struct KvpFrameImpl
KvpValue * set_impl (std::string const &, KvpValue *) noexcept;
};
-template<typename func_type>
-void KvpFrame::for_each_slot_prefix(std::string const & prefix,
- func_type const & func) const noexcept
-{
- std::for_each (m_valuemap.begin(), m_valuemap.end(),
- [&prefix,&func](const KvpFrameImpl::map_type::value_type & a)
- {
- /* Testing for prefix matching */
- if (strncmp(a.first, prefix.c_str(), prefix.size()) == 0)
- func (&a.first[prefix.size()], a.second);
- }
- );
-}
-
template<typename func_type, typename data_type>
void KvpFrame::for_each_slot_prefix(std::string const & prefix,
func_type const & func, data_type & data) const noexcept
commit 41863be9c7251098764c5a36e25f401fe536162d
Author: Christian Gruber <christian.gruber at posteo.de>
Date: Mon Feb 17 22:56:15 2020 +0100
Avoid copying local instance of AccountTokenCount
diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index 805daeea4..83d49a748 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -5284,11 +5284,8 @@ build_token_info(char const * suffix, KvpValue * value, TokenAccountsInfo & toke
if (strlen(suffix) == GUID_ENCODING_LENGTH)
{
tokenInfo.total_count += value->get<int64_t>();
- AccountTokenCount this_account;
/*By convention, the key ends with the account GUID.*/
- this_account.account_guid = std::string{suffix};
- this_account.token_count = value->get<int64_t>();
- tokenInfo.accounts.push_back(this_account);
+ tokenInfo.accounts.emplace_back(AccountTokenCount{std::string{suffix}, value->get<int64_t>()});
}
}
commit d07d4b962fc89ef332626b984b8fb3149033113d
Author: Christian Gruber <christian.gruber at posteo.de>
Date: Sun Feb 2 22:38:44 2020 +0100
Fix tokenize_string()
This fix prevents empty strings as tokens and removes duplicated tokens.
Function tokenize_string() is used for bayesian import matching, where
empty token strings or duplicated tokens lead to wrong results within
probability calculation for matching of a transaction to an account.
Empty token strings can occur if (see function g_strsplit())
* two or more spaces occur directly after another
* the string begins or ends with spaces
diff --git a/gnucash/import-export/import-backend.c b/gnucash/import-export/import-backend.c
index ebc37c3a7..888b913b2 100644
--- a/gnucash/import-export/import-backend.c
+++ b/gnucash/import-export/import-backend.c
@@ -387,8 +387,24 @@ tokenize_string(GList* existing_tokens, const char *string)
/* add each token to the token GList */
while (stringpos && *stringpos)
{
- /* prepend the char* to the token GList */
- existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
+ if (strlen(*stringpos) > 0)
+ {
+ /* check for duplicated tokens */
+ gboolean duplicated = FALSE;
+ for (GList* token = existing_tokens; token != NULL; token = token->next)
+ {
+ if (g_strcmp0(token->data, *stringpos) == 0)
+ {
+ duplicated = TRUE;
+ break;
+ }
+ }
+ if (duplicated == FALSE)
+ {
+ /* prepend the char* to the token GList */
+ existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
+ }
+ }
/* then move to the next string */
stringpos++;
commit 322f2d99de89849b4d193afcb779520aabcafa4f
Author: Christian Gruber <christian.gruber at posteo.de>
Date: Sun Feb 2 17:55:30 2020 +0100
Rework key prefix matching
Use C string comparison instead of C++ function std::mismatch to increase
performance.
diff --git a/libgnucash/engine/kvp-frame.hpp b/libgnucash/engine/kvp-frame.hpp
index 1419e626e..c84cf18a4 100644
--- a/libgnucash/engine/kvp-frame.hpp
+++ b/libgnucash/engine/kvp-frame.hpp
@@ -242,11 +242,8 @@ void KvpFrame::for_each_slot_prefix(std::string const & prefix,
std::for_each (m_valuemap.begin(), m_valuemap.end(),
[&prefix,&func](const KvpFrameImpl::map_type::value_type & a)
{
- std::string temp_key {a.first};
- if (temp_key.size() < prefix.size())
- return;
/* Testing for prefix matching */
- if (std::mismatch(prefix.begin(), prefix.end(), temp_key.begin()).first == prefix.end())
+ if (strncmp(a.first, prefix.c_str(), prefix.size()) == 0)
func (&a.first[prefix.size()], a.second);
}
);
@@ -259,11 +256,8 @@ void KvpFrame::for_each_slot_prefix(std::string const & prefix,
std::for_each (m_valuemap.begin(), m_valuemap.end(),
[&prefix,&func,&data](const KvpFrameImpl::map_type::value_type & a)
{
- std::string temp_key {a.first};
- if (temp_key.size() < prefix.size())
- return;
/* Testing for prefix matching */
- if (std::mismatch(prefix.begin(), prefix.end(), temp_key.begin()).first == prefix.end())
+ if (strncmp(a.first, prefix.c_str(), prefix.size()) == 0)
func (&a.first[prefix.size()], a.second, data);
}
);
commit a13184978a07e4c6735e5d5893076ffd8117c6a1
Author: Christian Gruber <christian.gruber at posteo.de>
Date: Mon Jan 27 23:01:25 2020 +0100
Fix calculation of token info to find exactly matching tokens only
In get_first_pass_probabilities() function
qof_instance_foreach_slot_prefix() is called with a prefix path
including closing slash after token now. This avoids, that also entries
with token as a substring are included in token info, where key only
starts with token.
Finally function build_token_info() checks, if the key suffix after the
token consists only of the GUID. This avoids, that also entries with the
same prefix and slashes are included in token info.
diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index fee1187bc..805daeea4 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -5281,13 +5281,15 @@ struct AccountInfo
static void
build_token_info(char const * suffix, KvpValue * value, TokenAccountsInfo & tokenInfo)
{
- tokenInfo.total_count += value->get<int64_t>();
- AccountTokenCount this_account;
- std::string account_guid {suffix};
- /*By convention, the key ends with the account GUID.*/
- this_account.account_guid = account_guid.substr(account_guid.size() - GUID_ENCODING_LENGTH);
- this_account.token_count = value->get<int64_t>();
- tokenInfo.accounts.push_back(this_account);
+ if (strlen(suffix) == GUID_ENCODING_LENGTH)
+ {
+ tokenInfo.total_count += value->get<int64_t>();
+ AccountTokenCount this_account;
+ /*By convention, the key ends with the account GUID.*/
+ this_account.account_guid = std::string{suffix};
+ this_account.token_count = value->get<int64_t>();
+ tokenInfo.accounts.push_back(this_account);
+ }
}
/** We scale the probability values by probability_factor.
@@ -5332,7 +5334,7 @@ get_first_pass_probabilities(GncImportMatchMap * imap, GList * tokens)
for (auto current_token = tokens; current_token; current_token = current_token->next)
{
TokenAccountsInfo tokenInfo{};
- auto path = std::string{IMAP_FRAME_BAYES "/"} + static_cast <char const *> (current_token->data);
+ auto path = std::string{IMAP_FRAME_BAYES "/"} + static_cast <char const *> (current_token->data) + "/";
qof_instance_foreach_slot_prefix (QOF_INSTANCE (imap->acc), path, &build_token_info, tokenInfo);
for (auto const & current_account_token : tokenInfo.accounts)
{
commit da60560ac4ad1994a79185eaaafaaa0363a21076
Author: Christian Gruber <christian.gruber at posteo.de>
Date: Mon Jan 27 22:43:09 2020 +0100
Change behaviour of KvpFrame::for_each_slot_prefix()
Provided function func is now called with key suffix only instead of
full key (prefix is omitted). This is neccessary for fixing function
build_token_info() in the next commit.
diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index 194976dbd..fee1187bc 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -5279,11 +5279,11 @@ struct AccountInfo
};
static void
-build_token_info(char const * key, KvpValue * value, TokenAccountsInfo & tokenInfo)
+build_token_info(char const * suffix, KvpValue * value, TokenAccountsInfo & tokenInfo)
{
tokenInfo.total_count += value->get<int64_t>();
AccountTokenCount this_account;
- std::string account_guid {key};
+ std::string account_guid {suffix};
/*By convention, the key ends with the account GUID.*/
this_account.account_guid = account_guid.substr(account_guid.size() - GUID_ENCODING_LENGTH);
this_account.token_count = value->get<int64_t>();
@@ -5665,38 +5665,36 @@ build_non_bayes (const char *key, const GValue *value, gpointer user_data)
g_free (guid_string);
}
-static std::tuple<std::string, std::string, std::string>
+static std::tuple<std::string, std::string>
parse_bayes_imap_info (std::string const & imap_bayes_entry)
{
- auto header_length = strlen (IMAP_FRAME_BAYES);
- std::string header {imap_bayes_entry.substr (0, header_length)};
auto guid_start = imap_bayes_entry.size() - GUID_ENCODING_LENGTH;
- std::string keyword {imap_bayes_entry.substr (header_length + 1, guid_start - header_length - 2)};
+ std::string keyword {imap_bayes_entry.substr (1, guid_start - 2)};
std::string account_guid {imap_bayes_entry.substr (guid_start)};
- return std::tuple <std::string, std::string, std::string> {header, keyword, account_guid};
+ return std::tuple <std::string, std::string> {keyword, account_guid};
}
static void
-build_bayes (const char *key, KvpValue * value, GncImapInfo & imapInfo)
+build_bayes (const char *suffix, KvpValue * value, GncImapInfo & imapInfo)
{
- auto parsed_key = parse_bayes_imap_info (key);
+ auto parsed_key = parse_bayes_imap_info (suffix);
GncGUID guid;
try
{
- auto temp_guid = gnc::GUID::from_string (std::get <2> (parsed_key));
+ auto temp_guid = gnc::GUID::from_string (std::get <1> (parsed_key));
guid = temp_guid;
}
catch (const gnc::guid_syntax_exception& err)
{
- PWARN("Invalid GUID string from %s", key);
+ PWARN("Invalid GUID string from %s%s", IMAP_FRAME_BAYES, suffix);
}
auto map_account = xaccAccountLookup (&guid, gnc_account_get_book (imapInfo.source_account));
auto imap_node = static_cast <GncImapInfo*> (g_malloc (sizeof (GncImapInfo)));
auto count = value->get <int64_t> ();
imap_node->source_account = imapInfo.source_account;
imap_node->map_account = map_account;
- imap_node->head = g_strdup (key);
- imap_node->match_string = g_strdup (std::get <1> (parsed_key).c_str ());
+ imap_node->head = g_strdup_printf ("%s%s", IMAP_FRAME_BAYES, suffix);
+ imap_node->match_string = g_strdup (std::get <0> (parsed_key).c_str ());
imap_node->category = g_strdup(" ");
imap_node->count = g_strdup_printf ("%" G_GINT64_FORMAT, count);
imapInfo.list = g_list_prepend (imapInfo.list, imap_node);
diff --git a/libgnucash/engine/kvp-frame.hpp b/libgnucash/engine/kvp-frame.hpp
index 23eb0c615..1419e626e 100644
--- a/libgnucash/engine/kvp-frame.hpp
+++ b/libgnucash/engine/kvp-frame.hpp
@@ -247,7 +247,7 @@ void KvpFrame::for_each_slot_prefix(std::string const & prefix,
return;
/* Testing for prefix matching */
if (std::mismatch(prefix.begin(), prefix.end(), temp_key.begin()).first == prefix.end())
- func (a.first, a.second);
+ func (&a.first[prefix.size()], a.second);
}
);
}
@@ -264,7 +264,7 @@ void KvpFrame::for_each_slot_prefix(std::string const & prefix,
return;
/* Testing for prefix matching */
if (std::mismatch(prefix.begin(), prefix.end(), temp_key.begin()).first == prefix.end())
- func (a.first, a.second, data);
+ func (&a.first[prefix.size()], a.second, data);
}
);
}
commit 9b3085a429660dd442176ba63de196a476edc4e0
Author: Christian Gruber <christian.gruber at posteo.de>
Date: Mon Jan 27 22:38:26 2020 +0100
Add test cases to check for exact token matching
diff --git a/libgnucash/engine/test/gtest-import-map.cpp b/libgnucash/engine/test/gtest-import-map.cpp
index f2e4509e6..33d64ab18 100644
--- a/libgnucash/engine/test/gtest-import-map.cpp
+++ b/libgnucash/engine/test/gtest-import-map.cpp
@@ -271,6 +271,14 @@ TEST_F(ImapBayesTest, FindAccountBayes)
EXPECT_EQ(t_expense_account2, account);
account = gnc_account_imap_find_account_bayes(t_imap, t_list5);
EXPECT_EQ(nullptr, account);
+
+ // only imap entries with exact token matching should be considered
+ root->set_path({std::string{IMAP_FRAME_BAYES} + "/" + pepper + waldo + "/" + acct2_guid}, new KvpValue{*value});
+ account = gnc_account_imap_find_account_bayes(t_imap, t_list3);
+ EXPECT_EQ(t_expense_account1, account);
+ root->set_path({std::string{IMAP_FRAME_BAYES} + "/" + pepper + "/" + waldo + "/" + acct2_guid}, new KvpValue{*value});
+ account = gnc_account_imap_find_account_bayes(t_imap, t_list3);
+ EXPECT_EQ(t_expense_account1, account);
}
TEST_F(ImapBayesTest, AddAccountBayes)
Summary of changes:
gnucash/import-export/import-backend.c | 20 ++++++++++++--
libgnucash/engine/Account.cpp | 42 +++++++++++------------------
libgnucash/engine/kvp-frame.hpp | 24 ++---------------
libgnucash/engine/test/gtest-import-map.cpp | 8 ++++++
4 files changed, 43 insertions(+), 51 deletions(-)
More information about the gnucash-changes
mailing list