gnucash maint: Multiple changes pushed

John Ralls jralls at code.gnucash.org
Sun Aug 2 14:52:25 EDT 2020


Updated	 via  https://github.com/Gnucash/gnucash/commit/efc34b24 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/a9f79cf7 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/1f592ce1 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/2be2ff8a (commit)
	 via  https://github.com/Gnucash/gnucash/commit/0da826f3 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/85515214 (commit)
	from  https://github.com/Gnucash/gnucash/commit/ea2d89fa (commit)



commit efc34b247f4caa611b8967ee5e233ef3a94e155b
Merge: ea2d89fac a9f79cf79
Author: John Ralls <jralls at ceridwen.us>
Date:   Sun Aug 2 11:20:50 2020 -0700

    Merge Jean Laroche's  'improve_ofx_import_speed' into maint.


commit a9f79cf79c6b681428eaee78a5bca8f192e33b98
Author: jean <you at example.com>
Date:   Tue Jul 21 13:39:26 2020 -0700

    Add a flag to the account structure to defer balance computation

diff --git a/gnucash/import-export/import-backend.c b/gnucash/import-export/import-backend.c
index 6ffdcf258..a7ca8b958 100644
--- a/gnucash/import-export/import-backend.c
+++ b/gnucash/import-export/import-backend.c
@@ -623,10 +623,6 @@ void split_find_match (GNCImportTransInfo * trans_info,
         Transaction *new_trans = gnc_import_TransInfo_get_trans (trans_info);
         Split *new_trans_fsplit = gnc_import_TransInfo_get_fsplit (trans_info);
 
-        // Do not consider transactions that have been previously matched.
-        if (gnc_import_split_has_online_id (split)) // JEAN THIS CAN NOW BE REMOVED.
-            return;
-
         /* Matching heuristics */
 
         /* Amount heuristics */
@@ -1081,7 +1077,7 @@ static gint check_trans_online_id(Transaction *trans1, void *user_data)
     }
 }
 
-static gint collect_trans_online_id(Transaction *trans, void *user_data) //JEAN COLLECT
+static gint collect_trans_online_id(Transaction *trans, void *user_data)
 {
     Split *split;
     GHashTable *id_hash = user_data;
@@ -1101,7 +1097,7 @@ static gint collect_trans_online_id(Transaction *trans, void *user_data) //JEAN
 
 /** Checks whether the given transaction's online_id already exists in
   its parent account. */
-gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* id_hash)
+gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* acct_id_hash)
 {
     gboolean online_id_exists = FALSE;
     Account *dest_acct;
@@ -1113,15 +1109,13 @@ gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* id_hash)
     // Create a hash per account of a hash of all transactions IDs. Then the test below will be fast if
     // we have many transactions to import.
     dest_acct = xaccSplitGetAccount (source_split);
-    if (!g_hash_table_contains (id_hash, dest_acct))
+    if (!g_hash_table_contains (acct_id_hash, dest_acct))
     {
         GHashTable* new_hash = g_hash_table_new (g_str_hash, g_str_equal);
-        g_hash_table_insert (id_hash, dest_acct, new_hash);
-        printf ("CREATE HASH\n");
+        g_hash_table_insert (acct_id_hash, dest_acct, new_hash);
         xaccAccountForEachTransaction (dest_acct, collect_trans_online_id, new_hash);
-        printf ("CREATE DONE\n");
     }
-    online_id_exists = g_hash_table_contains (g_hash_table_lookup (id_hash, dest_acct),
+    online_id_exists = g_hash_table_contains (g_hash_table_lookup (acct_id_hash, dest_acct),
                                               gnc_import_get_split_online_id (source_split));
     
     /* If it does, abort the process for this transaction, since it is
diff --git a/gnucash/import-export/import-backend.h b/gnucash/import-export/import-backend.h
index 3602476e0..3a78278b1 100644
--- a/gnucash/import-export/import-backend.h
+++ b/gnucash/import-export/import-backend.h
@@ -64,7 +64,7 @@ typedef enum _action
  *
  * @param trans The transaction for which to check for an existing
  * online_id. */
-gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* id_hash);
+gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* acct_id_hash);
 
 /** Evaluates the match between trans_info and split using the provided parameters.
  *
diff --git a/gnucash/import-export/import-main-matcher.c b/gnucash/import-export/import-main-matcher.c
index 2ef3fa512..f08a606d5 100644
--- a/gnucash/import-export/import-main-matcher.c
+++ b/gnucash/import-export/import-main-matcher.c
@@ -75,7 +75,7 @@ struct _main_matcher_info
     gboolean add_toggled;   // flag to indicate that add has been toggled to stop selection
     gint id;
     GSList* temp_trans_list; // Temporary list of imported transactions
-    GHashTable* id_hash;     // Hash table, per account, of list of transaction IDs.
+    GHashTable* acct_id_hash;     // Hash table, per account, of list of transaction IDs.
     GSList* edited_accounts; // List of accounts currently edited.
 };
 
@@ -147,36 +147,29 @@ static void refresh_model_row (
 
 static gboolean delete_hash (gpointer key, gpointer value, gpointer user_data)
 {
-    // Value is a hash table that needs to be destroyed. JEAN: I should free the memory used for the keys.
+    // Value is a hash table that needs to be destroyed. 
     g_hash_table_destroy (value);
     return TRUE;
 }
 
 static void
-commit_all (GNCImportMainMatcher *info)
+update_all_balances (GNCImportMainMatcher *info)
 {
-    // Commit the edits for all accounts for which we've called inc_account_edit.
     for (GSList* iter = info->edited_accounts; iter; iter=iter->next)
     {
-        int level = qof_instance_get_editlevel (iter->data);
-        // This should not happen.
-        if (level != 1)
-            PERR ("import-main-macher.c: Edit level not 1");
-        while (qof_instance_get_editlevel (iter->data) > 0)
-            xaccAccountCommitEdit (iter->data);
+        gnc_account_set_defer_bal_computation(iter->data,FALSE);
+        xaccAccountRecomputeBalance(iter->data);
     }
     g_slist_free (info->edited_accounts);
     info->edited_accounts = NULL;
 }
 
 static void
-inc_account_edit (GNCImportMainMatcher *info, Account* acc)
+defer_bal_computation (GNCImportMainMatcher *info, Account* acc)
 {
-    // Call BeginEdit on accounts that haven't been edited yet. This will speeds things up
-    // a lot by preventing commits until we're all done.
-    if (qof_instance_get_editlevel (acc) == 0)
+    if (!gnc_account_get_defer_bal_computation(acc))
     {
-        xaccAccountBeginEdit (acc);
+        gnc_account_set_defer_bal_computation (acc, TRUE);
         info->edited_accounts = g_slist_prepend (info->edited_accounts, acc);
     }
 }
@@ -186,7 +179,6 @@ void gnc_gen_trans_list_delete (GNCImportMainMatcher *info)
     GtkTreeModel *model;
     GtkTreeIter iter;
     GNCImportTransInfo *trans_info;
-    Split* first_split = NULL;
 
     if (info == NULL)
         return;
@@ -219,14 +211,14 @@ void gnc_gen_trans_list_delete (GNCImportMainMatcher *info)
     else
         gnc_import_Settings_delete (info->user_settings);
 
-    // We've called xaccAccountBeginEdit on many accounts to defer the commits until now.
-    // Let's do it now that we're done.
-    commit_all (info);
-
     g_slist_free_full (info->temp_trans_list,(GDestroyNotify) gnc_import_TransInfo_delete);
     info->temp_trans_list = NULL;
-    g_hash_table_foreach_remove (info->id_hash, delete_hash, NULL);
-    info->id_hash = NULL;
+    
+    // We've deferred balance computations on many accounts. Let's do it now that we're done.
+    update_all_balances (info);
+    
+    g_hash_table_foreach_remove (info->acct_id_hash, delete_hash, NULL);
+    info->acct_id_hash = NULL;
     g_free (info);
 }
 
@@ -269,15 +261,14 @@ on_matcher_ok_clicked (GtkButton *button, GNCImportMainMatcher *info)
     /* Don't run any queries and/or split sorts while processing the matcher
     results. */
     gnc_suspend_gui_refresh();
-    printf("START OK LOOP\n");
     do
     {
         gtk_tree_model_get (model, &iter,
                             DOWNLOADED_COL_DATA, &trans_info,
                             -1);
 
-        // JEAN: the account of all splits should be set as edited. IF THERE'S ONLY 1 SPLIT, A SPLIT
-        // WILL BE CREATED with unbalanced, and the account will not have its edit level increased...
+        // Note: if there's only 1 split (unbalanced) one will be created with the unbalanced account,
+        // and for that account the defer balance will not be set. So things will be slow.
 
         if (gnc_import_process_trans_item (NULL, trans_info))
         {
@@ -289,7 +280,6 @@ on_matcher_ok_clicked (GtkButton *button, GNCImportMainMatcher *info)
         }
     }
     while (gtk_tree_model_iter_next (model, &iter));
-    printf("DONE OK LOOP\n");
     
     gnc_gen_trans_list_delete (info);
 
@@ -392,8 +382,8 @@ run_account_picker_dialog (GNCImportMainMatcher *info,
              &ok_pressed);
     if (ok_pressed)
     {
-        gnc_import_TransInfo_set_destacc (trans_info, new_acc, TRUE); // JEAN: BeginEdit on this account
-        inc_account_edit (info, new_acc);
+        gnc_import_TransInfo_set_destacc (trans_info, new_acc, TRUE);
+        defer_bal_computation (info, new_acc);
     }
 }
 
@@ -547,8 +537,8 @@ gnc_gen_trans_assign_transfer_account (GtkTreeView *treeview,
                 }
                 if (ok_pressed)
                 {
-                    gnc_import_TransInfo_set_destacc (trans_info, *new_acc, TRUE); // JEAN Set Begin_edit on this account.
-                    inc_account_edit (info, *new_acc);
+                    gnc_import_TransInfo_set_destacc (trans_info, *new_acc, TRUE);
+                    defer_bal_computation (info, *new_acc);
                 }
             }
             break;
@@ -623,7 +613,7 @@ gnc_gen_trans_assign_transfer_account_to_selection_cb (
     }
     g_list_free_full (selected_rows, (GDestroyNotify) gtk_tree_path_free);
 
-    // now reselect the transaction rows. // JEAN THIS GETS REALLY SLOW AS WELL
+    // now reselect the transaction rows. This is very slow if there are lots of transactions.
     for (l = refs; l != NULL; l = l->next)
     {
         GtkTreePath *path = gtk_tree_row_reference_get_path (l->data);
@@ -932,7 +922,7 @@ gnc_gen_trans_init_view (GNCImportMainMatcher *info,
     g_signal_connect (view, "popup-menu",
                       G_CALLBACK(gnc_gen_trans_onPopupMenu_cb), info);
     
-    info->id_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
+    info->acct_id_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
 }
 
 static void
@@ -1480,11 +1470,9 @@ void gnc_gen_trans_list_add_trans (GNCImportMainMatcher *gui, Transaction *trans
     Split* split = NULL;
     int i=0;
     
-    // JEAN call xaccAccountBeginEdit on trans_info->first_split->acc, but only once.
-    // This makes the deletion a lot faster because the commit is only done once at the end.
     split = xaccTransGetSplit (trans, 0);
     acc = xaccSplitGetAccount (split);
-    inc_account_edit (gui, acc);
+    defer_bal_computation (gui, acc);
     
     gnc_gen_trans_list_add_trans_with_ref_id (gui, trans, 0);
     return;
@@ -1513,7 +1501,7 @@ void gnc_gen_trans_list_add_trans_with_ref_id (GNCImportMainMatcher *gui, Transa
     g_assert (gui);
     g_assert (trans);
   
-    if (gnc_import_exists_online_id (trans, gui->id_hash))
+    if (gnc_import_exists_online_id (trans, gui->acct_id_hash))
         return;
     else
     {
@@ -1538,7 +1526,7 @@ create_list_of_potential_matches (GNCImportMainMatcher *gui, GtkTreeModel* model
     static const int secs_per_day = 86400;
     GSList* imported_trans;
     GList* potential_match;
-    GHashTable* lists_per_accounts = g_hash_table_new (g_direct_hash, g_direct_equal);
+    GHashTable* lists_per_accounts = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, (GDestroyNotify)g_slist_free);
 
     // Go through all imported transactions, gather the list of accounts, and min/max date range.
     for (imported_trans=gui->temp_trans_list; imported_trans!=NULL; imported_trans=imported_trans->next)
@@ -1571,21 +1559,15 @@ create_list_of_potential_matches (GNCImportMainMatcher *gui, GtkTreeModel* model
         if (gnc_import_split_has_online_id (potential_match->data))
             continue;
         split_account = xaccSplitGetAccount (potential_match->data);
-        // This will be NULL the first time around, which is fine for a GSList
-        per_account_list = g_hash_table_lookup (lists_per_accounts, split_account);
+        // g_hash_table_steal_extended would do the two calls in one shot but is not available until 2.58
+        per_account_list = g_hash_table_lookup (lists_per_accounts, import_trans_account);
+        g_hash_table_steal (lists_per_accounts, import_trans_account);
         per_account_list = g_slist_prepend (per_account_list, potential_match->data);
-        g_hash_table_replace (lists_per_accounts, import_trans_account, per_account_list);
+        g_hash_table_insert (lists_per_accounts, import_trans_account, per_account_list);
     }
     return lists_per_accounts;
 }
 
-static gboolean
-destroy_helper (gpointer key, gpointer value, gpointer data)
-{
-    g_slist_free (value);
-    return TRUE;
-}
-
 typedef struct _match_struct
 {
     GNCImportTransInfo* transaction_info;
@@ -1620,11 +1602,9 @@ gnc_gen_trans_list_create_matches (GNCImportMainMatcher *gui)
     GSList* imported_trans;
 
     query = qof_query_create_for (GNC_ID_SPLIT);
-    printf("CREATE LIST\n");
     // Gather the list of splits that could match one of the imported transactions, this return a hash table.
     lists_per_accounts = create_list_of_potential_matches (gui, model, query, match_date_hardlimit);
 
-    printf("START LOOP\n");
     // For each imported transaction, grab the list of potential matches corresponding to that account,
     // and evaluate how good a match they are.
     for (imported_trans=gui->temp_trans_list; imported_trans!=NULL; imported_trans=imported_trans->next)
@@ -1649,10 +1629,8 @@ gnc_gen_trans_list_create_matches (GNCImportMainMatcher *gui)
         gtk_tree_store_append (GTK_TREE_STORE (model), &iter, NULL);
         refresh_model_row (gui, model, &iter, transaction_info);
     }
-    printf("END LOOP LIST\n");
     
     qof_query_destroy (query);
-    g_hash_table_foreach (lists_per_accounts, (GHFunc) destroy_helper, NULL);
     g_hash_table_destroy (lists_per_accounts);
     return;
 }
diff --git a/gnucash/import-export/test/gtest-import-backend.cpp b/gnucash/import-export/test/gtest-import-backend.cpp
index eedcfba92..345f46eb2 100644
--- a/gnucash/import-export/test/gtest-import-backend.cpp
+++ b/gnucash/import-export/test/gtest-import-backend.cpp
@@ -120,6 +120,8 @@ protected:
         m_dest_acc   = new MockAccount();
         m_trans      = new MockTransaction();
         m_split      = new MockSplit();
+        m_splitList  = NULL;
+        m_splitList  = g_list_prepend (m_splitList, m_split);
 
         using namespace testing;
 
@@ -141,6 +143,7 @@ protected:
     MockAccount*      m_dest_acc;
     MockTransaction*  m_trans;
     MockSplit*        m_split;
+    GList*            m_splitList;
 };
 
 
@@ -160,6 +163,8 @@ TEST_F(ImportBackendTest, CreateTransInfo)
     // Define first split
     ON_CALL(*m_trans, getSplit(0))
         .WillByDefault(Return(m_split));
+    ON_CALL(*m_trans, getSplitList())
+        .WillByDefault(Return(m_splitList));
     // define description of the transaction
     ON_CALL(*m_trans, getDescription())
         .WillByDefault(Return("This is the description"));
@@ -228,6 +233,8 @@ TEST_F(ImportBackendBayesTest, CreateTransInfo)
     // Define first split
     ON_CALL(*m_trans, getSplit(0))
         .WillByDefault(Return(m_split));
+    ON_CALL(*m_trans, getSplitList())
+        .WillByDefault(Return(m_splitList));
     // Transaction has no further splits
     ON_CALL(*m_trans, getSplit(Gt(0)))
         .WillByDefault(Return(nullptr));
diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp
index 0cc63df10..fe49b2404 100644
--- a/libgnucash/engine/Account.cpp
+++ b/libgnucash/engine/Account.cpp
@@ -1859,6 +1859,29 @@ gnc_account_set_balance_dirty (Account *acc)
     priv->balance_dirty = TRUE;
 }
 
+void gnc_account_set_defer_bal_computation (Account *acc, gboolean defer)
+{
+    AccountPrivate *priv;
+    
+    g_return_if_fail (GNC_IS_ACCOUNT (acc));
+    
+    if (qof_instance_get_destroying (acc))
+        return;
+    
+    priv = GET_PRIVATE (acc);
+    priv->defer_bal_computation = defer;
+}
+
+gboolean gnc_account_get_defer_bal_computation (Account *acc)
+{
+    AccountPrivate *priv;
+    if (!acc)
+        return false;
+    priv = GET_PRIVATE (acc);
+    return priv->defer_bal_computation;
+}
+
+
 /********************************************************************\
 \********************************************************************/
 
@@ -2214,7 +2237,7 @@ xaccAccountRecomputeBalance (Account * acc)
 
     priv = GET_PRIVATE(acc);
     if (qof_instance_get_editlevel(acc) > 0) return;
-    if (!priv->balance_dirty) return;
+    if (!priv->balance_dirty || priv->defer_bal_computation) return;
     if (qof_instance_get_destroying(acc)) return;
     if (qof_book_shutting_down(qof_instance_get_book(acc))) return;
 
diff --git a/libgnucash/engine/Account.h b/libgnucash/engine/Account.h
index e23401d99..8fb4bfa10 100644
--- a/libgnucash/engine/Account.h
+++ b/libgnucash/engine/Account.h
@@ -360,7 +360,18 @@ void gnc_account_set_balance_dirty (Account *acc);
  *
  *  @param acc Set the flag on this account. */
 void gnc_account_set_sort_dirty (Account *acc);
-
+    
+/** Set the defer balance flag. If defer is true, the account balance
+ * is not automatically computed, which can save a lot of time if
+ * multiple operations have to be done on the same account. If
+ * defer is false, further operations on account will cause the
+ * balance to be recomputed as normal.
+ *
+ *  @param acc Set the flag on this account.
+ *
+ *  @param defer New value for the flag. */
+void gnc_account_set_defer_bal_computation (Account *acc, gboolean defer);
+    
 /** Insert the given split from an account.
  *
  *  @param acc The account to which the split should be added.
@@ -403,6 +414,8 @@ const char * xaccAccountGetNotes (const Account *account);
 const char * xaccAccountGetLastNum (const Account *account);
 /** Get the account's lot order policy */
 GNCPolicy *gnc_account_get_policy (Account *account);
+/** Get the account's flag for deferred balance computation */
+gboolean gnc_account_get_defer_bal_computation (Account *acc);
 
 /** The following recompute the partial balances (stored with the
  *  transaction) and the total balance, for this account
diff --git a/libgnucash/engine/AccountP.h b/libgnucash/engine/AccountP.h
index 995bc4cdc..ea147aa83 100644
--- a/libgnucash/engine/AccountP.h
+++ b/libgnucash/engine/AccountP.h
@@ -126,6 +126,7 @@ typedef struct AccountPrivate
      * in any way desired.  Handy for specialty traversals of the
      * account tree. */
     short mark;
+    gboolean defer_bal_computation;
 } AccountPrivate;
 
 struct account_s
diff --git a/libgnucash/engine/mocks/gmock-Transaction.cpp b/libgnucash/engine/mocks/gmock-Transaction.cpp
index dd02f6ea8..ae3483cf1 100644
--- a/libgnucash/engine/mocks/gmock-Transaction.cpp
+++ b/libgnucash/engine/mocks/gmock-Transaction.cpp
@@ -46,6 +46,13 @@ xaccTransGetSplit (const Transaction *trans, int i)
     return ((MockTransaction*)trans)->getSplit(i);
 }
 
+SplitList *
+xaccTransGetSplitList (const Transaction *trans)
+{
+    g_return_val_if_fail(GNC_IS_MOCK_TRANSACTION(trans), NULL);
+    return trans ? ((MockTransaction*)trans)->getSplitList() : NULL;
+}
+
 Split *
 xaccTransFindSplitByAccount(const Transaction *trans, const Account *acc)
 {
diff --git a/libgnucash/engine/mocks/gmock-Transaction.h b/libgnucash/engine/mocks/gmock-Transaction.h
index 0790a7c03..5cb26a250 100644
--- a/libgnucash/engine/mocks/gmock-Transaction.h
+++ b/libgnucash/engine/mocks/gmock-Transaction.h
@@ -52,6 +52,7 @@ public:
     MOCK_METHOD0(beginEdit, void());
     MOCK_METHOD0(commitEdit, void());
     MOCK_METHOD1(getSplit, Split *(int));
+    MOCK_METHOD0(getSplitList, SplitList *());
     MOCK_METHOD1(findSplitByAccount, Split *(const Account*));
     MOCK_METHOD0(getDate, time64());
     MOCK_METHOD1(setDatePostedSecsNormalized, void(time64));

commit 1f592ce1914f1db3f9845f5e8898a413cafb1794
Author: jean <you at example.com>
Date:   Tue Jul 21 12:19:28 2020 -0700

    Fix FOO and move deletion where it should be

diff --git a/gnucash/import-export/import-backend.h b/gnucash/import-export/import-backend.h
index ffaeab3ff..3602476e0 100644
--- a/gnucash/import-export/import-backend.h
+++ b/gnucash/import-export/import-backend.h
@@ -64,7 +64,7 @@ typedef enum _action
  *
  * @param trans The transaction for which to check for an existing
  * online_id. */
-gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* Foo);
+gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* id_hash);
 
 /** Evaluates the match between trans_info and split using the provided parameters.
  *
diff --git a/gnucash/import-export/import-main-matcher.c b/gnucash/import-export/import-main-matcher.c
index 6e9da29a9..2ef3fa512 100644
--- a/gnucash/import-export/import-main-matcher.c
+++ b/gnucash/import-export/import-main-matcher.c
@@ -156,12 +156,14 @@ static void
 commit_all (GNCImportMainMatcher *info)
 {
     // Commit the edits for all accounts for which we've called inc_account_edit.
-    for (;info->edited_accounts; info->edited_accounts=info->edited_accounts->next)
+    for (GSList* iter = info->edited_accounts; iter; iter=iter->next)
     {
-        // JEAN Check that the counter is 1 so the edit does	 something.
-        int level = qof_instance_get_editlevel (info->edited_accounts->data);
-        g_assert(level == 1);
-        xaccAccountCommitEdit (info->edited_accounts->data);
+        int level = qof_instance_get_editlevel (iter->data);
+        // This should not happen.
+        if (level != 1)
+            PERR ("import-main-macher.c: Edit level not 1");
+        while (qof_instance_get_editlevel (iter->data) > 0)
+            xaccAccountCommitEdit (iter->data);
     }
     g_slist_free (info->edited_accounts);
     info->edited_accounts = NULL;
@@ -203,7 +205,6 @@ void gnc_gen_trans_list_delete (GNCImportMainMatcher *info)
                 info->transaction_processed_cb (trans_info, FALSE,
                                                 info->user_data);
             }
-            gnc_import_TransInfo_delete (trans_info);
         }
         while (gtk_tree_model_iter_next (model, &iter));
     }
@@ -221,7 +222,9 @@ void gnc_gen_trans_list_delete (GNCImportMainMatcher *info)
     // We've called xaccAccountBeginEdit on many accounts to defer the commits until now.
     // Let's do it now that we're done.
     commit_all (info);
-    
+
+    g_slist_free_full (info->temp_trans_list,(GDestroyNotify) gnc_import_TransInfo_delete);
+    info->temp_trans_list = NULL;
     g_hash_table_foreach_remove (info->id_hash, delete_hash, NULL);
     info->id_hash = NULL;
     g_free (info);
@@ -1649,8 +1652,6 @@ gnc_gen_trans_list_create_matches (GNCImportMainMatcher *gui)
     printf("END LOOP LIST\n");
     
     qof_query_destroy (query);
-    g_slist_free (gui->temp_trans_list);
-    gui->temp_trans_list = NULL;
     g_hash_table_foreach (lists_per_accounts, (GHFunc) destroy_helper, NULL);
     g_hash_table_destroy (lists_per_accounts);
     return;

commit 2be2ff8af452e887db29f41235238aad2f2ce77c
Author: jean <you at example.com>
Date:   Sun Jul 19 23:02:53 2020 -0700

    To further increase the import speed, it's necessary to prevent any account commit to happen
    until the very end of the import (OK or Cancel), because account commits trigger very lengthy balance
    computations. For this, I call xaccAccountBeginEdit on all the accounts involved in the import,
    keeping a list of them so BeginEdit is called only once. At the end of the import, commit is called
    on all the accounts in the list. Note that when the user selects a target account for an imported
    transaction, xaccAccountBeginEdit is called on the target account, and it is added to the list.
    
    Another area of improvement is avoiding re-checking all register transactions to verify whether
    a given imported transaction has already been matched. Instead, a hash table of split online IDs
    is computed once (per account), and verified for each incoming transactions.
    
    Finally, the list of register transactions that are potential matches for the imported ones is
    further pruned ahead of time to only keep transactions that do not have an online ID. This avoid
    the repeated checks that were previously happening in the match-score loop.
    
    With this, importing 6000 transactions into a 6000 split account becomes fairly fast (a few seconds
    on my slowish machine).
    
    There are still slow areas: If you select all 6000 imported transactions and assign a destination
    account to all of them, the process is impossibly sluggish because of repeated path operations
    (selections, freeing) in the tree view.
    If you do not specify a target account for any of the 6000 imported transactions, an "imbalance"
    account is used, but the xaccAccountBeginEdit mechanism isn't applied to it by the new code, so
    each imported transaction will trigger a commit, and therefore a slow balance recomputation.
    
    Remove use of xaccTransGetSplit

diff --git a/gnucash/import-export/import-backend.c b/gnucash/import-export/import-backend.c
index 867b095c0..6ffdcf258 100644
--- a/gnucash/import-export/import-backend.c
+++ b/gnucash/import-export/import-backend.c
@@ -426,8 +426,6 @@ TransactionGetTokens(GNCImportTransInfo *info)
     time64 transtime;
     struct tm *tm_struct;
     char local_day_of_week[16];
-    Split* split;
-    int split_index;
 
     g_return_val_if_fail (info, NULL);
     if (info->match_tokens) return info->match_tokens;
@@ -458,12 +456,10 @@ TransactionGetTokens(GNCImportTransInfo *info)
     tokens = g_list_prepend(tokens, g_strdup(local_day_of_week));
 
     /* make tokens from the memo of each split of this transaction */
-    split_index = 0;
-    while ((split = xaccTransGetSplit(transaction, split_index)))
+    for (GList *split=xaccTransGetSplitList (transaction); split; split=split->next)
     {
-        text = xaccSplitGetMemo(split);
+        text = xaccSplitGetMemo(split->data);
         tokens = tokenize_string(tokens, text);
-        split_index++; /* next split */
     }
 
     /* remember the list of tokens for later.. */
@@ -628,7 +624,7 @@ void split_find_match (GNCImportTransInfo * trans_info,
         Split *new_trans_fsplit = gnc_import_TransInfo_get_fsplit (trans_info);
 
         // Do not consider transactions that have been previously matched.
-        if (gnc_import_split_has_online_id (split))
+        if (gnc_import_split_has_online_id (split)) // JEAN THIS CAN NOW BE REMOVED.
             return;
 
         /* Matching heuristics */
@@ -1085,9 +1081,27 @@ static gint check_trans_online_id(Transaction *trans1, void *user_data)
     }
 }
 
+static gint collect_trans_online_id(Transaction *trans, void *user_data) //JEAN COLLECT
+{
+    Split *split;
+    GHashTable *id_hash = user_data;
+    int i=0;
+    
+    const gchar* online_id = gnc_import_get_trans_online_id (trans);
+    if (online_id)
+        g_hash_table_add (id_hash, (void*) online_id);
+
+    for (GList *splits = xaccTransGetSplitList (trans); splits; splits = splits->next)
+    {
+        if (gnc_import_split_has_online_id (splits->data))
+            g_hash_table_add(id_hash, (void*) gnc_import_get_split_online_id (splits->data));
+    }
+    return 0;
+}
+
 /** Checks whether the given transaction's online_id already exists in
   its parent account. */
-gboolean gnc_import_exists_online_id (Transaction *trans)
+gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* id_hash)
 {
     gboolean online_id_exists = FALSE;
     Account *dest_acct;
@@ -1096,13 +1110,20 @@ gboolean gnc_import_exists_online_id (Transaction *trans)
     /* Look for an online_id in the first split */
     source_split = xaccTransGetSplit(trans, 0);
     g_assert(source_split);
-
-    /* DEBUG("%s%d%s","Checking split ",i," for duplicates"); */
-    dest_acct = xaccSplitGetAccount(source_split);
-    online_id_exists = xaccAccountForEachTransaction(dest_acct,
-                       check_trans_online_id,
-                       source_split);
-
+    // Create a hash per account of a hash of all transactions IDs. Then the test below will be fast if
+    // we have many transactions to import.
+    dest_acct = xaccSplitGetAccount (source_split);
+    if (!g_hash_table_contains (id_hash, dest_acct))
+    {
+        GHashTable* new_hash = g_hash_table_new (g_str_hash, g_str_equal);
+        g_hash_table_insert (id_hash, dest_acct, new_hash);
+        printf ("CREATE HASH\n");
+        xaccAccountForEachTransaction (dest_acct, collect_trans_online_id, new_hash);
+        printf ("CREATE DONE\n");
+    }
+    online_id_exists = g_hash_table_contains (g_hash_table_lookup (id_hash, dest_acct),
+                                              gnc_import_get_split_online_id (source_split));
+    
     /* If it does, abort the process for this transaction, since it is
        already in the system. */
     if (online_id_exists == TRUE)
diff --git a/gnucash/import-export/import-backend.h b/gnucash/import-export/import-backend.h
index 4d8d5fe1f..ffaeab3ff 100644
--- a/gnucash/import-export/import-backend.h
+++ b/gnucash/import-export/import-backend.h
@@ -64,7 +64,7 @@ typedef enum _action
  *
  * @param trans The transaction for which to check for an existing
  * online_id. */
-gboolean gnc_import_exists_online_id (Transaction *trans);
+gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* Foo);
 
 /** Evaluates the match between trans_info and split using the provided parameters.
  *
diff --git a/gnucash/import-export/import-main-matcher.c b/gnucash/import-export/import-main-matcher.c
index 2765e4c02..6e9da29a9 100644
--- a/gnucash/import-export/import-main-matcher.c
+++ b/gnucash/import-export/import-main-matcher.c
@@ -53,6 +53,7 @@
 #include "guid.h"
 #include "gnc-session.h"
 #include "Query.h"
+#include "SplitP.h"
 
 #define GNC_PREFS_GROUP "dialogs.import.generic.transaction-list"
 #define IMPORT_MAIN_MATCHER_CM_CLASS "transaction-matcher-dialog"
@@ -74,6 +75,8 @@ struct _main_matcher_info
     gboolean add_toggled;   // flag to indicate that add has been toggled to stop selection
     gint id;
     GSList* temp_trans_list; // Temporary list of imported transactions
+    GHashTable* id_hash;     // Hash table, per account, of list of transaction IDs.
+    GSList* edited_accounts; // List of accounts currently edited.
 };
 
 enum downloaded_cols
@@ -142,11 +145,46 @@ static void refresh_model_row (
                     GNCImportTransInfo *info);
 /* end local prototypes */
 
+static gboolean delete_hash (gpointer key, gpointer value, gpointer user_data)
+{
+    // Value is a hash table that needs to be destroyed. JEAN: I should free the memory used for the keys.
+    g_hash_table_destroy (value);
+    return TRUE;
+}
+
+static void
+commit_all (GNCImportMainMatcher *info)
+{
+    // Commit the edits for all accounts for which we've called inc_account_edit.
+    for (;info->edited_accounts; info->edited_accounts=info->edited_accounts->next)
+    {
+        // JEAN Check that the counter is 1 so the edit does	 something.
+        int level = qof_instance_get_editlevel (info->edited_accounts->data);
+        g_assert(level == 1);
+        xaccAccountCommitEdit (info->edited_accounts->data);
+    }
+    g_slist_free (info->edited_accounts);
+    info->edited_accounts = NULL;
+}
+
+static void
+inc_account_edit (GNCImportMainMatcher *info, Account* acc)
+{
+    // Call BeginEdit on accounts that haven't been edited yet. This will speeds things up
+    // a lot by preventing commits until we're all done.
+    if (qof_instance_get_editlevel (acc) == 0)
+    {
+        xaccAccountBeginEdit (acc);
+        info->edited_accounts = g_slist_prepend (info->edited_accounts, acc);
+    }
+}
+
 void gnc_gen_trans_list_delete (GNCImportMainMatcher *info)
 {
     GtkTreeModel *model;
     GtkTreeIter iter;
     GNCImportTransInfo *trans_info;
+    Split* first_split = NULL;
 
     if (info == NULL)
         return;
@@ -165,7 +203,6 @@ void gnc_gen_trans_list_delete (GNCImportMainMatcher *info)
                 info->transaction_processed_cb (trans_info, FALSE,
                                                 info->user_data);
             }
-
             gnc_import_TransInfo_delete (trans_info);
         }
         while (gtk_tree_model_iter_next (model, &iter));
@@ -180,6 +217,13 @@ void gnc_gen_trans_list_delete (GNCImportMainMatcher *info)
     }
     else
         gnc_import_Settings_delete (info->user_settings);
+
+    // We've called xaccAccountBeginEdit on many accounts to defer the commits until now.
+    // Let's do it now that we're done.
+    commit_all (info);
+    
+    g_hash_table_foreach_remove (info->id_hash, delete_hash, NULL);
+    info->id_hash = NULL;
     g_free (info);
 }
 
@@ -222,13 +266,16 @@ on_matcher_ok_clicked (GtkButton *button, GNCImportMainMatcher *info)
     /* Don't run any queries and/or split sorts while processing the matcher
     results. */
     gnc_suspend_gui_refresh();
-
+    printf("START OK LOOP\n");
     do
     {
         gtk_tree_model_get (model, &iter,
                             DOWNLOADED_COL_DATA, &trans_info,
                             -1);
 
+        // JEAN: the account of all splits should be set as edited. IF THERE'S ONLY 1 SPLIT, A SPLIT
+        // WILL BE CREATED with unbalanced, and the account will not have its edit level increased...
+
         if (gnc_import_process_trans_item (NULL, trans_info))
         {
             if (info->transaction_processed_cb)
@@ -239,7 +286,8 @@ on_matcher_ok_clicked (GtkButton *button, GNCImportMainMatcher *info)
         }
     }
     while (gtk_tree_model_iter_next (model, &iter));
-
+    printf("DONE OK LOOP\n");
+    
     gnc_gen_trans_list_delete (info);
 
     /* Allow GUI refresh again. */
@@ -340,7 +388,10 @@ run_account_picker_dialog (GNCImportMainMatcher *info,
              old_acc,
              &ok_pressed);
     if (ok_pressed)
-        gnc_import_TransInfo_set_destacc (trans_info, new_acc, TRUE);
+    {
+        gnc_import_TransInfo_set_destacc (trans_info, new_acc, TRUE); // JEAN: BeginEdit on this account
+        inc_account_edit (info, new_acc);
+    }
 }
 
 static void
@@ -492,7 +543,10 @@ gnc_gen_trans_assign_transfer_account (GtkTreeView *treeview,
                             gnc_account_get_full_name (*new_acc));
                 }
                 if (ok_pressed)
-                    gnc_import_TransInfo_set_destacc (trans_info, *new_acc, TRUE);
+                {
+                    gnc_import_TransInfo_set_destacc (trans_info, *new_acc, TRUE); // JEAN Set Begin_edit on this account.
+                    inc_account_edit (info, *new_acc);
+                }
             }
             break;
         case GNCImport_CLEAR:
@@ -566,7 +620,7 @@ gnc_gen_trans_assign_transfer_account_to_selection_cb (
     }
     g_list_free_full (selected_rows, (GDestroyNotify) gtk_tree_path_free);
 
-    // now reselect the transaction rows.
+    // now reselect the transaction rows. // JEAN THIS GETS REALLY SLOW AS WELL
     for (l = refs; l != NULL; l = l->next)
     {
         GtkTreePath *path = gtk_tree_row_reference_get_path (l->data);
@@ -874,6 +928,8 @@ gnc_gen_trans_init_view (GNCImportMainMatcher *info,
                       G_CALLBACK(gnc_gen_trans_onButtonPressed_cb), info);
     g_signal_connect (view, "popup-menu",
                       G_CALLBACK(gnc_gen_trans_onPopupMenu_cb), info);
+    
+    info->id_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
 }
 
 static void
@@ -1417,6 +1473,16 @@ refresh_model_row (GNCImportMainMatcher *gui,
 
 void gnc_gen_trans_list_add_trans (GNCImportMainMatcher *gui, Transaction *trans)
 {
+    Account* acc = NULL;
+    Split* split = NULL;
+    int i=0;
+    
+    // JEAN call xaccAccountBeginEdit on trans_info->first_split->acc, but only once.
+    // This makes the deletion a lot faster because the commit is only done once at the end.
+    split = xaccTransGetSplit (trans, 0);
+    acc = xaccSplitGetAccount (split);
+    inc_account_edit (gui, acc);
+    
     gnc_gen_trans_list_add_trans_with_ref_id (gui, trans, 0);
     return;
 }/* end gnc_import_add_trans() */
@@ -1443,8 +1509,8 @@ void gnc_gen_trans_list_add_trans_with_ref_id (GNCImportMainMatcher *gui, Transa
     GtkTreeIter iter;
     g_assert (gui);
     g_assert (trans);
-
-    if (gnc_import_exists_online_id (trans))
+  
+    if (gnc_import_exists_online_id (trans, gui->id_hash))
         return;
     else
     {
@@ -1497,9 +1563,13 @@ create_list_of_potential_matches (GNCImportMainMatcher *gui, GtkTreeModel* model
     // Now put all potential matches into a hash table based on their account.
     for (potential_match=query_return; potential_match!=NULL; potential_match=potential_match->next)
     {
-        Account* split_account = xaccSplitGetAccount (potential_match->data);
+        Account* split_account;
+        GSList* per_account_list;
+        if (gnc_import_split_has_online_id (potential_match->data))
+            continue;
+        split_account = xaccSplitGetAccount (potential_match->data);
         // This will be NULL the first time around, which is fine for a GSList
-        GSList* per_account_list = g_hash_table_lookup (lists_per_accounts, split_account);
+        per_account_list = g_hash_table_lookup (lists_per_accounts, split_account);
         per_account_list = g_slist_prepend (per_account_list, potential_match->data);
         g_hash_table_replace (lists_per_accounts, import_trans_account, per_account_list);
     }
@@ -1509,7 +1579,7 @@ create_list_of_potential_matches (GNCImportMainMatcher *gui, GtkTreeModel* model
 static gboolean
 destroy_helper (gpointer key, gpointer value, gpointer data)
 {
-    g_slist_free(value);
+    g_slist_free (value);
     return TRUE;
 }
 
@@ -1547,9 +1617,11 @@ gnc_gen_trans_list_create_matches (GNCImportMainMatcher *gui)
     GSList* imported_trans;
 
     query = qof_query_create_for (GNC_ID_SPLIT);
+    printf("CREATE LIST\n");
     // Gather the list of splits that could match one of the imported transactions, this return a hash table.
     lists_per_accounts = create_list_of_potential_matches (gui, model, query, match_date_hardlimit);
 
+    printf("START LOOP\n");
     // For each imported transaction, grab the list of potential matches corresponding to that account,
     // and evaluate how good a match they are.
     for (imported_trans=gui->temp_trans_list; imported_trans!=NULL; imported_trans=imported_trans->next)
@@ -1558,7 +1630,7 @@ gnc_gen_trans_list_create_matches (GNCImportMainMatcher *gui)
         Account *importaccount = xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (transaction_info));
         match_struct s = {transaction_info, display_threshold, fuzzy_amount};
         
-        g_slist_foreach(g_hash_table_lookup (lists_per_accounts, importaccount),(GFunc) match_helper, &s);
+        g_slist_foreach (g_hash_table_lookup (lists_per_accounts, importaccount), (GFunc) match_helper, &s);
 
         // Sort the matches, select the best match, and set the action.
         gnc_import_TransInfo_init_matches (transaction_info, gui->user_settings);
@@ -1574,7 +1646,8 @@ gnc_gen_trans_list_create_matches (GNCImportMainMatcher *gui)
         gtk_tree_store_append (GTK_TREE_STORE (model), &iter, NULL);
         refresh_model_row (gui, model, &iter, transaction_info);
     }
-
+    printf("END LOOP LIST\n");
+    
     qof_query_destroy (query);
     g_slist_free (gui->temp_trans_list);
     gui->temp_trans_list = NULL;

commit 0da826f31161fd51ca45b3a6274cae37282fa9b0
Author: jean <you at example.com>
Date:   Fri Jul 17 18:52:01 2020 -0700

    Instead of saving the imported transaction into the treeview, which takes more time
    I now save them into a temporary list. A single query is done for all imported transactions
    and the resulting register splits are put into a hash table of lists with accounts
    as key, which will speed things up when multiple accounts are found.
    My tests of large imports on large accounts seems to ate that most of the time is spent
    verifying whether the imported transactions has already been imported, then computing
    the balance repeatedly for each imported transaction (!)  when the user clicks OK to
    add all the transactions to the account. So there's still room for improvement here!

diff --git a/gnucash/import-export/import-main-matcher.c b/gnucash/import-export/import-main-matcher.c
index f12f3d315..2765e4c02 100644
--- a/gnucash/import-export/import-main-matcher.c
+++ b/gnucash/import-export/import-main-matcher.c
@@ -73,6 +73,7 @@ struct _main_matcher_info
     GtkWidget         *reconcile_after_close;
     gboolean add_toggled;   // flag to indicate that add has been toggled to stop selection
     gint id;
+    GSList* temp_trans_list; // Temporary list of imported transactions
 };
 
 enum downloaded_cols
@@ -110,7 +111,7 @@ gboolean on_matcher_delete_event (GtkWidget *widget, GdkEvent *event, gpointer d
 void on_matcher_help_clicked (GtkButton *button, gpointer user_data);
 void on_matcher_help_close_clicked (GtkButton *button, gpointer user_data);
 
-static void gnc_gen_trans_list_finalize (GNCImportMainMatcher *gui);
+static void gnc_gen_trans_list_create_matches (GNCImportMainMatcher *gui);
 
 /* Local prototypes */
 static void gnc_gen_trans_assign_transfer_account (
@@ -189,12 +190,13 @@ gboolean gnc_gen_trans_list_empty(GNCImportMainMatcher *info)
     GNCImportTransInfo *trans_info;
     g_assert (info);
     model = gtk_tree_view_get_model (info->view);
-    return !gtk_tree_model_get_iter_first (model, &iter);
+    // Check that both the tree model and the temporary list are empty.
+    return !gtk_tree_model_get_iter_first (model, &iter) && !info->temp_trans_list;
 }
 
 void gnc_gen_trans_list_show_all(GNCImportMainMatcher *info)
 {
-    gnc_gen_trans_list_finalize (info);
+    gnc_gen_trans_list_create_matches (info);
     gtk_widget_show_all (GTK_WIDGET (info->main_widget));
 }
 
@@ -1442,46 +1444,43 @@ void gnc_gen_trans_list_add_trans_with_ref_id (GNCImportMainMatcher *gui, Transa
     g_assert (gui);
     g_assert (trans);
 
-
     if (gnc_import_exists_online_id (trans))
         return;
     else
     {
         transaction_info = gnc_import_TransInfo_new (trans, NULL);
         gnc_import_TransInfo_set_ref_id (transaction_info, ref_id);
-        model = gtk_tree_view_get_model (gui->view);
-        gtk_list_store_append (GTK_LIST_STORE(model), &iter);
-        refresh_model_row (gui, model, &iter, transaction_info);
+        // It's much faster to gather the imported transactions into a GSList than directly into the
+        // treeview.
+        gui->temp_trans_list = g_slist_prepend (gui->temp_trans_list, transaction_info);
     }
     return;
 }
 
-// This creates a list of all splits that could match one of the imported transactions based on their
-// account and date.
-static GList*
-create_list_of_potential_matches (GtkTreeModel* model, Query *query, gint match_date_hardlimit)
+// This creates lists of all splits that could match one of the imported transactions based on their
+// account and date and organized per account
+static GHashTable*
+create_list_of_potential_matches (GNCImportMainMatcher *gui, GtkTreeModel* model, Query *query, gint match_date_hardlimit)
 {
-    GList* list_element = NULL;
-    GtkTreeIter iter;
+    GList* query_return = NULL;
     Account *import_trans_account;
     time64 import_trans_time, min_time=G_MAXINT64, max_time=0;
     GList* all_accounts = NULL;
+    static const int secs_per_day = 86400;
+    GSList* imported_trans;
+    GList* potential_match;
+    GHashTable* lists_per_accounts = g_hash_table_new (g_direct_hash, g_direct_equal);
 
     // Go through all imported transactions, gather the list of accounts, and min/max date range.
-    gboolean valid = gtk_tree_model_get_iter_first (model, &iter);
-    while(valid)
+    for (imported_trans=gui->temp_trans_list; imported_trans!=NULL; imported_trans=imported_trans->next)
     {
         GNCImportTransInfo* transaction_info;
-        gtk_tree_model_get (model, &iter,
-                            DOWNLOADED_COL_DATA, &transaction_info,
-                            -1);
+        transaction_info = imported_trans->data;
         import_trans_account = xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (transaction_info));
         all_accounts = g_list_prepend (all_accounts, import_trans_account);
         import_trans_time = xaccTransGetDate (gnc_import_TransInfo_get_trans (transaction_info));
         min_time = MIN (min_time, import_trans_time);
         max_time = MAX (max_time, import_trans_time);
-
-        valid = gtk_tree_model_iter_next (model, &iter);
     }
 
     // Make a query to find splits with the right accounts and dates.
@@ -1489,57 +1488,77 @@ create_list_of_potential_matches (GtkTreeModel* model, Query *query, gint match_
     xaccQueryAddAccountMatch (query, all_accounts,
                               QOF_GUID_MATCH_ANY, QOF_QUERY_AND);
     xaccQueryAddDateMatchTT (query,
-                             TRUE, min_time - match_date_hardlimit * 86400,
-                             TRUE, max_time + match_date_hardlimit * 86400,
+                             TRUE, min_time - match_date_hardlimit * secs_per_day,
+                             TRUE, max_time + match_date_hardlimit * secs_per_day,
                              QOF_QUERY_AND);
-    list_element = qof_query_run (query);
+    query_return = qof_query_run (query);
     g_list_free (all_accounts);
-    return list_element;
+    
+    // Now put all potential matches into a hash table based on their account.
+    for (potential_match=query_return; potential_match!=NULL; potential_match=potential_match->next)
+    {
+        Account* split_account = xaccSplitGetAccount (potential_match->data);
+        // This will be NULL the first time around, which is fine for a GSList
+        GSList* per_account_list = g_hash_table_lookup (lists_per_accounts, split_account);
+        per_account_list = g_slist_prepend (per_account_list, potential_match->data);
+        g_hash_table_replace (lists_per_accounts, import_trans_account, per_account_list);
+    }
+    return lists_per_accounts;
+}
+
+static gboolean
+destroy_helper (gpointer key, gpointer value, gpointer data)
+{
+    g_slist_free(value);
+    return TRUE;
 }
 
+typedef struct _match_struct
+{
+    GNCImportTransInfo* transaction_info;
+    gint display_threshold;
+    double fuzzy_amount;
+} match_struct;
+
 static void
-gnc_gen_trans_list_finalize (GNCImportMainMatcher *gui)
+match_helper (Split* data, match_struct* s)
+{
+    split_find_match (s->transaction_info, data, s->display_threshold, s->fuzzy_amount);
+}
+
+/* This goes through the temporary list of imported transactions, runs a single query to gater potential matching
+ * register transactions based on accounts and dates, then selects potential matching transactions for
+ * each imported transaction. Doing a single query makes things a lot faster than doing one query per
+ * imported transaction.
+ */
+void
+gnc_gen_trans_list_create_matches (GNCImportMainMatcher *gui)
 {
     GtkTreeModel* model = gtk_tree_view_get_model (gui->view);
     GtkListStore* store = GTK_LIST_STORE (model);
     GtkTreeIter iter;
     GNCImportMatchInfo *selected_match;
     gboolean match_selected_manually;
-    Account *importaccount;
-    GList* list_element = NULL;
-    GList* iter_2 = NULL;
+    GHashTable* lists_per_accounts = NULL;
     Query *query;
     gint match_date_hardlimit = gnc_import_Settings_get_match_date_hardlimit (gui->user_settings);
     gint display_threshold = gnc_import_Settings_get_display_threshold (gui->user_settings);
     double fuzzy_amount = gnc_import_Settings_get_fuzzy_amount (gui->user_settings);
-    gboolean valid;
+    GSList* imported_trans;
 
     query = qof_query_create_for (GNC_ID_SPLIT);
-    // Gather the list of splits that could match one of the imported transactions
-    list_element = create_list_of_potential_matches (model, query, match_date_hardlimit);
+    // Gather the list of splits that could match one of the imported transactions, this return a hash table.
+    lists_per_accounts = create_list_of_potential_matches (gui, model, query, match_date_hardlimit);
 
-    // For each imported transaction, go through list_element, select the ones that have the
-    // right account, and evaluate how good a match they are.
-    valid = gtk_tree_model_get_iter_first (model, &iter);
-    while(valid)
+    // For each imported transaction, grab the list of potential matches corresponding to that account,
+    // and evaluate how good a match they are.
+    for (imported_trans=gui->temp_trans_list; imported_trans!=NULL; imported_trans=imported_trans->next)
     {
-        GNCImportTransInfo* transaction_info;
-        Account *importaccount;
-        gtk_tree_model_get (model, &iter,
-                            DOWNLOADED_COL_DATA, &transaction_info,
-                            -1);
-        importaccount = xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (transaction_info));
-
-        iter_2 = list_element;
-        while (iter_2 != NULL)
-        {
-            // One issue here is that the accounts may not match since the query was
-            // called for all accounts so we need to check here.
-            Account* split_account = xaccSplitGetAccount(iter_2->data);
-            if (qof_instance_guid_compare (importaccount, split_account) == 0)
-                split_find_match (transaction_info, iter_2->data, display_threshold, fuzzy_amount);
-            iter_2 = g_list_next (iter_2);
-        }
+        GNCImportTransInfo* transaction_info = imported_trans->data;
+        Account *importaccount = xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (transaction_info));
+        match_struct s = {transaction_info, display_threshold, fuzzy_amount};
+        
+        g_slist_foreach(g_hash_table_lookup (lists_per_accounts, importaccount),(GFunc) match_helper, &s);
 
         // Sort the matches, select the best match, and set the action.
         gnc_import_TransInfo_init_matches (transaction_info, gui->user_settings);
@@ -1551,12 +1570,16 @@ gnc_gen_trans_list_finalize (GNCImportMainMatcher *gui)
             gnc_import_PendingMatches_add_match (gui->pending_matches,
                                                  selected_match,
                                                  match_selected_manually);
-
+        
+        gtk_tree_store_append (GTK_TREE_STORE (model), &iter, NULL);
         refresh_model_row (gui, model, &iter, transaction_info);
-        valid = gtk_tree_model_iter_next (model, &iter);
     }
 
     qof_query_destroy (query);
+    g_slist_free (gui->temp_trans_list);
+    gui->temp_trans_list = NULL;
+    g_hash_table_foreach (lists_per_accounts, (GHFunc) destroy_helper, NULL);
+    g_hash_table_destroy (lists_per_accounts);
     return;
 }
 

commit 85515214767cbafd7489d4edebe2dbce49c5d61f
Author: jean <you at example.com>
Date:   Mon May 11 22:19:35 2020 -0700

    Speeds up the import of ofx files by only doing one query at the end.
    
    The previous ofx import code performed one query for each imported transactions, which
    was quite slow. The change consists of gathering all ofx transactions before doing the
    query. The query must be wider to search for all matching accounts (in case the imported
    transactions come from different accounts) and an enlarge date range (according to the
    earliest and latest imported transaction). The rest of the code is identical to what was
    done before. The final query is performed just before the matching dialog is displayed.

diff --git a/gnucash/import-export/import-backend.c b/gnucash/import-export/import-backend.c
index 747cf29cd..867b095c0 100644
--- a/gnucash/import-export/import-backend.c
+++ b/gnucash/import-export/import-backend.c
@@ -607,7 +607,7 @@ matchmap_store_destination (GncImportMatchMap *matchmap,
 
 /** @brief The transaction matching heuristics are here.
  */
-static void split_find_match (GNCImportTransInfo * trans_info,
+void split_find_match (GNCImportTransInfo * trans_info,
                               Split * split,
                               gint display_threshold,
                               double fuzzy_amount_difference)
@@ -821,66 +821,6 @@ static void split_find_match (GNCImportTransInfo * trans_info,
     }
 }/* end split_find_match */
 
-
-/** /brief Iterate through all splits of the originating account of the given
-   transaction, and find all matching splits there. */
-void gnc_import_find_split_matches(GNCImportTransInfo *trans_info,
-                                   gint process_threshold,
-                                   double fuzzy_amount_difference,
-                                   gint match_date_hardlimit)
-{
-    GList * list_element;
-    Query *query = qof_query_create_for(GNC_ID_SPLIT);
-    g_assert (trans_info);
-
-    /* Get list of splits of the originating account. */
-    {
-        /* We used to traverse *all* splits of the account by using
-           xaccAccountGetSplitList, which is a bad idea because 90% of these
-           splits are outside the date range that is interesting. We should
-           rather use a query according to the date region, which is
-           implemented here.
-        */
-        Account *importaccount =
-            xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (trans_info));
-        time64 download_time = xaccTransGetDate (gnc_import_TransInfo_get_trans (trans_info));
-
-        qof_query_set_book (query, gnc_get_current_book());
-        xaccQueryAddSingleAccountMatch (query, importaccount,
-                                        QOF_QUERY_AND);
-        xaccQueryAddDateMatchTT (query,
-                                 TRUE, download_time - match_date_hardlimit * 86400,
-                                 TRUE, download_time + match_date_hardlimit * 86400,
-                                 QOF_QUERY_AND);
-        list_element = qof_query_run (query);
-        /* Sigh. Doesn't help too much. We still create and run one query
-           for each imported transaction. Maybe it would improve
-           performance further if there is one single (master-)query at
-           the beginning, matching the full date range and all accounts in
-           question. However, this doesn't quite work because this function
-           here is called from each gnc_gen_trans_list_add_trans(), which
-           is called one at a time. Therefore the whole importer would
-           have to change its behaviour: Accept the imported txns via
-           gnc_gen_trans_list_add_trans(), and only when
-           gnc_gen_trans_list_run() is called, then calculate all the
-           different match candidates. That's too much work for now.
-        */
-    }
-
-    /* Traverse that list, calling split_find_match on each one. Note
-       that xaccAccountForEachSplit is declared in Account.h but
-       implemented nowhere :-( */
-    while (list_element != NULL)
-    {
-        split_find_match (trans_info, list_element->data,
-                          process_threshold, fuzzy_amount_difference);
-        list_element = g_list_next (list_element);
-    }
-
-    qof_query_destroy (query);
-}
-
-
 /***********************************************************************
  */
 
@@ -1222,13 +1162,6 @@ gnc_import_TransInfo_init_matches (GNCImportTransInfo *trans_info,
     GNCImportMatchInfo * best_match = NULL;
     g_assert (trans_info);
 
-
-    /* Find all split matches in originating account. */
-    gnc_import_find_split_matches(trans_info,
-                                  gnc_import_Settings_get_display_threshold (settings),
-                                  gnc_import_Settings_get_fuzzy_amount (settings),
-                                  gnc_import_Settings_get_match_date_hardlimit (settings));
-
     if (trans_info->match_list != NULL)
     {
         trans_info->match_list = g_list_sort(trans_info->match_list,
diff --git a/gnucash/import-export/import-backend.h b/gnucash/import-export/import-backend.h
index 2ea81527d..4d8d5fe1f 100644
--- a/gnucash/import-export/import-backend.h
+++ b/gnucash/import-export/import-backend.h
@@ -66,34 +66,20 @@ typedef enum _action
  * online_id. */
 gboolean gnc_import_exists_online_id (Transaction *trans);
 
-/** Iterate through all splits of the originating account of the given
- * transaction, find all matching splits there, and store them in the
- * GNCImportTransInfo structure.
+/** Evaluates the match between trans_info and split using the provided parameters.
  *
- * @param trans_info The TransInfo for which the corresponding
- * matching existing transactions should be found.
+ * @param trans_info The TransInfo for the imported transaction
  *
- * @param process_threshold Each match whose heuristics are smaller
- * than this value is totally ignored.
+ * @param split The register split that should be evaluated for a match.
  *
- * @param fuzzy_amount_difference For fuzzy amount matching, a certain
- * fuzzyness in the matching amount is allowed up to this value. May
- * be e.g. 3.00 dollars for ATM fees, or 0.0 if you only want to allow
- * exact matches.
+ * @param display_threshold Minimum match score to include split in the list of matches.
  *
- * @param match_date_hardlimit The number of days that a matching
- * split may differ from the given transaction before it is discarded
- * immediately. In other words, any split that is more distant from
- * the given transaction than this match_date_hardlimit days will be
- * ignored altogether. For use cases without paper checks (e.g. HBCI),
- * values like 14 (days) might be appropriate, whereas for use cases
- * with paper checks (e.g. OFX, QIF), values like 42 (days) seem more
- * appropriate.
+ * @param fuzzy_amount_difference Maximum amount difference to consider the match good.
  */
-void gnc_import_find_split_matches(GNCImportTransInfo *trans_info,
-                                   gint process_threshold,
-                                   double fuzzy_amount_difference,
-                                   gint match_date_hardlimit);
+void split_find_match (GNCImportTransInfo * trans_info,
+                       Split * split,
+                       gint display_threshold,
+                       double fuzzy_amount_difference);
 
 /** Iterates through all splits of the originating account of
  * trans_info. Sorts the resulting list and sets the selected_match
diff --git a/gnucash/import-export/import-main-matcher.c b/gnucash/import-export/import-main-matcher.c
index a8de55afe..f12f3d315 100644
--- a/gnucash/import-export/import-main-matcher.c
+++ b/gnucash/import-export/import-main-matcher.c
@@ -52,6 +52,7 @@
 #include "gnc-component-manager.h"
 #include "guid.h"
 #include "gnc-session.h"
+#include "Query.h"
 
 #define GNC_PREFS_GROUP "dialogs.import.generic.transaction-list"
 #define IMPORT_MAIN_MATCHER_CM_CLASS "transaction-matcher-dialog"
@@ -109,6 +110,8 @@ gboolean on_matcher_delete_event (GtkWidget *widget, GdkEvent *event, gpointer d
 void on_matcher_help_clicked (GtkButton *button, gpointer user_data);
 void on_matcher_help_close_clicked (GtkButton *button, gpointer user_data);
 
+static void gnc_gen_trans_list_finalize (GNCImportMainMatcher *gui);
+
 /* Local prototypes */
 static void gnc_gen_trans_assign_transfer_account (
                     GtkTreeView *treeview,
@@ -191,6 +194,7 @@ gboolean gnc_gen_trans_list_empty(GNCImportMainMatcher *info)
 
 void gnc_gen_trans_list_show_all(GNCImportMainMatcher *info)
 {
+    gnc_gen_trans_list_finalize (info);
     gtk_widget_show_all (GTK_WIDGET (info->main_widget));
 }
 
@@ -1435,8 +1439,6 @@ void gnc_gen_trans_list_add_trans_with_ref_id (GNCImportMainMatcher *gui, Transa
     GNCImportTransInfo * transaction_info = NULL;
     GtkTreeModel *model;
     GtkTreeIter iter;
-    GNCImportMatchInfo *selected_match;
-    gboolean match_selected_manually;
     g_assert (gui);
     g_assert (trans);
 
@@ -1447,26 +1449,116 @@ void gnc_gen_trans_list_add_trans_with_ref_id (GNCImportMainMatcher *gui, Transa
     {
         transaction_info = gnc_import_TransInfo_new (trans, NULL);
         gnc_import_TransInfo_set_ref_id (transaction_info, ref_id);
+        model = gtk_tree_view_get_model (gui->view);
+        gtk_list_store_append (GTK_LIST_STORE(model), &iter);
+        refresh_model_row (gui, model, &iter, transaction_info);
+    }
+    return;
+}
+
+// This creates a list of all splits that could match one of the imported transactions based on their
+// account and date.
+static GList*
+create_list_of_potential_matches (GtkTreeModel* model, Query *query, gint match_date_hardlimit)
+{
+    GList* list_element = NULL;
+    GtkTreeIter iter;
+    Account *import_trans_account;
+    time64 import_trans_time, min_time=G_MAXINT64, max_time=0;
+    GList* all_accounts = NULL;
+
+    // Go through all imported transactions, gather the list of accounts, and min/max date range.
+    gboolean valid = gtk_tree_model_get_iter_first (model, &iter);
+    while(valid)
+    {
+        GNCImportTransInfo* transaction_info;
+        gtk_tree_model_get (model, &iter,
+                            DOWNLOADED_COL_DATA, &transaction_info,
+                            -1);
+        import_trans_account = xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (transaction_info));
+        all_accounts = g_list_prepend (all_accounts, import_trans_account);
+        import_trans_time = xaccTransGetDate (gnc_import_TransInfo_get_trans (transaction_info));
+        min_time = MIN (min_time, import_trans_time);
+        max_time = MAX (max_time, import_trans_time);
+
+        valid = gtk_tree_model_iter_next (model, &iter);
+    }
 
-        gnc_import_TransInfo_init_matches (transaction_info,
-                                           gui->user_settings);
+    // Make a query to find splits with the right accounts and dates.
+    qof_query_set_book (query, gnc_get_current_book());
+    xaccQueryAddAccountMatch (query, all_accounts,
+                              QOF_GUID_MATCH_ANY, QOF_QUERY_AND);
+    xaccQueryAddDateMatchTT (query,
+                             TRUE, min_time - match_date_hardlimit * 86400,
+                             TRUE, max_time + match_date_hardlimit * 86400,
+                             QOF_QUERY_AND);
+    list_element = qof_query_run (query);
+    g_list_free (all_accounts);
+    return list_element;
+}
 
-        selected_match =
-            gnc_import_TransInfo_get_selected_match (transaction_info);
-        match_selected_manually =
-            gnc_import_TransInfo_get_match_selected_manually (transaction_info);
+static void
+gnc_gen_trans_list_finalize (GNCImportMainMatcher *gui)
+{
+    GtkTreeModel* model = gtk_tree_view_get_model (gui->view);
+    GtkListStore* store = GTK_LIST_STORE (model);
+    GtkTreeIter iter;
+    GNCImportMatchInfo *selected_match;
+    gboolean match_selected_manually;
+    Account *importaccount;
+    GList* list_element = NULL;
+    GList* iter_2 = NULL;
+    Query *query;
+    gint match_date_hardlimit = gnc_import_Settings_get_match_date_hardlimit (gui->user_settings);
+    gint display_threshold = gnc_import_Settings_get_display_threshold (gui->user_settings);
+    double fuzzy_amount = gnc_import_Settings_get_fuzzy_amount (gui->user_settings);
+    gboolean valid;
+
+    query = qof_query_create_for (GNC_ID_SPLIT);
+    // Gather the list of splits that could match one of the imported transactions
+    list_element = create_list_of_potential_matches (model, query, match_date_hardlimit);
+
+    // For each imported transaction, go through list_element, select the ones that have the
+    // right account, and evaluate how good a match they are.
+    valid = gtk_tree_model_get_iter_first (model, &iter);
+    while(valid)
+    {
+        GNCImportTransInfo* transaction_info;
+        Account *importaccount;
+        gtk_tree_model_get (model, &iter,
+                            DOWNLOADED_COL_DATA, &transaction_info,
+                            -1);
+        importaccount = xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (transaction_info));
+
+        iter_2 = list_element;
+        while (iter_2 != NULL)
+        {
+            // One issue here is that the accounts may not match since the query was
+            // called for all accounts so we need to check here.
+            Account* split_account = xaccSplitGetAccount(iter_2->data);
+            if (qof_instance_guid_compare (importaccount, split_account) == 0)
+                split_find_match (transaction_info, iter_2->data, display_threshold, fuzzy_amount);
+            iter_2 = g_list_next (iter_2);
+        }
+
+        // Sort the matches, select the best match, and set the action.
+        gnc_import_TransInfo_init_matches (transaction_info, gui->user_settings);
+
+        selected_match = gnc_import_TransInfo_get_selected_match (transaction_info);
+        match_selected_manually = gnc_import_TransInfo_get_match_selected_manually (transaction_info);
 
         if (selected_match)
             gnc_import_PendingMatches_add_match (gui->pending_matches,
                                                  selected_match,
                                                  match_selected_manually);
 
-        model = gtk_tree_view_get_model (gui->view);
-        gtk_tree_store_append (GTK_TREE_STORE(model), &iter, NULL);
         refresh_model_row (gui, model, &iter, transaction_info);
+        valid = gtk_tree_model_iter_next (model, &iter);
     }
+
+    qof_query_destroy (query);
     return;
-}/* end gnc_import_add_trans_with_ref_id() */
+}
 
 GtkWidget *gnc_gen_trans_list_widget (GNCImportMainMatcher *info)
 {



Summary of changes:
 gnucash/import-export/import-backend.c             | 120 ++++--------
 gnucash/import-export/import-backend.h             |  34 +---
 gnucash/import-export/import-main-matcher.c        | 209 ++++++++++++++++++---
 .../import-export/test/gtest-import-backend.cpp    |   7 +
 libgnucash/engine/Account.cpp                      |  25 ++-
 libgnucash/engine/Account.h                        |  15 +-
 libgnucash/engine/AccountP.h                       |   1 +
 libgnucash/engine/mocks/gmock-Transaction.cpp      |   7 +
 libgnucash/engine/mocks/gmock-Transaction.h        |   1 +
 9 files changed, 286 insertions(+), 133 deletions(-)



More information about the gnucash-changes mailing list