[Gnucash-changes] Improve importer performance.

Christian Stimming cstim at cvs.gnucash.org
Sat Nov 27 06:44:29 EST 2004


Log Message:
-----------
Improve importer performance.

2004-11-27  Christian Stimming  <stimming at tuhh.de>

	* src/import-export/import-backend.c
	(gnc_import_find_split_matches): Improve importer performance by
	matching imported transactions only against transactions in the
	proper time interval.

Modified Files:
--------------
    gnucash:
        ChangeLog
    gnucash/src/import-export:
        import-backend.c

Revision Data
-------------
Index: ChangeLog
===================================================================
RCS file: /home/cvs/cvsroot/gnucash/ChangeLog,v
retrieving revision 1.1859
retrieving revision 1.1860
diff -LChangeLog -LChangeLog -u -r1.1859 -r1.1860
--- ChangeLog
+++ ChangeLog
@@ -1,3 +1,10 @@
+2004-11-27  Christian Stimming  <stimming at tuhh.de>
+
+	* src/import-export/import-backend.c
+	(gnc_import_find_split_matches): Improve importer performance by
+	matching imported transactions only against transactions in the
+	proper time interval.
+
 2004-11-22  Christian Stimming  <stimming at tuhh.de>
 
 	* configure.in, README: Add configure check for libofx version
Index: import-backend.c
===================================================================
RCS file: /home/cvs/cvsroot/gnucash/src/import-export/import-backend.c,v
retrieving revision 1.28
retrieving revision 1.29
diff -Lsrc/import-export/import-backend.c -Lsrc/import-export/import-backend.c -u -r1.28 -r1.29
--- src/import-export/import-backend.c
+++ src/import-export/import-backend.c
@@ -39,6 +39,7 @@
 #include "Account.h"
 #include "dialog-utils.h"
 #include "global-options.h"
+#include "Query.h"
 
 #include "gnc-engine-util.h"
 
@@ -615,8 +616,11 @@
 	{
 	  /* If a transaction's amount doesn't match within the
 	     threshold, it's very unlikely to be the same transaction
-	     so we give it an extra -5 penality */
-	  prob = prob-5;
+	     so we give it an extra -5 penality. Changed 2004-11-27:
+	     The penalty is so high that we can forget about this
+	     split anyway and skip the rest of the tests. */
+	  return;
+	  /* prob = prob-5; */
 	  /* DEBUG("heuristics:  probability - 1 (amount)"); */
 	}
       
@@ -643,9 +647,12 @@
 	}
       else if (datediff_day > MATCH_DATE_NOT_THRESHOLD)
 	{
-	  /* Extra penalty if that split lies awfully far away
-	     from the given one. */
-	  prob = prob-5;
+	  /* Extra penalty if that split lies awfully far away from
+	     the given one. Changed 2004-11-27: The penalty is so high
+	     that we can forget about this split anyway and skip the
+	     rest of the tests. */
+	  return;
+	  /* prob = prob-5; */
 	  /*DEBUG("heuristics:  probability - 5 (date)"); */
 	}
       
@@ -764,13 +771,42 @@
 				   double fuzzy_amount_difference)
 {
   GList * list_element;
+  Query *query = xaccMallocQuery();
   g_assert (trans_info);
   
   /* Get list of splits of the originating account. */
-  list_element = 
-    g_list_first
-    (xaccAccountGetSplitList
-     (xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (trans_info))));
+  {
+    /* We used to traverse *all* splits of the account by using
+       xaccAccountGetSplitList, which is a bad idea because 90% of these
+       splits are outside the date range that is interesting. We should
+       rather use a query according to the date region, which is
+       implemented here. 
+    */
+    Account *importaccount = 
+      xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (trans_info));
+    time_t download_time = xaccTransGetDate (gnc_import_TransInfo_get_trans (trans_info));
+
+    xaccQuerySetBook (query, gnc_get_current_book());
+    xaccQueryAddSingleAccountMatch (query, importaccount,			    
+				    QOF_QUERY_AND);
+    xaccQueryAddDateMatchTT (query,
+			     TRUE, download_time - MATCH_DATE_NOT_THRESHOLD*86400/2,
+			     TRUE, download_time + MATCH_DATE_NOT_THRESHOLD*86400/2,
+			     QOF_QUERY_AND);
+    list_element = xaccQueryGetSplits (query);
+    /* Sigh. Doesnt help too much. We still create and run one query
+       for each imported transaction. Maybe it would improve
+       performance further if there is one single (master-)query at
+       the beginning, matching the full date range and all accounts in
+       question. However, this doesnt quite work because this function
+       here is called from each gnc_gen_trans_list_add_trans(), which
+       is called one at a time. Therefore the whole importer would
+       have to change its behaviour: Accept the imported txns via
+       gnc_gen_trans_list_add_trans(), and only when
+       gnc_gen_trans_list_run() is called, then calculate all the
+       different match candidates. Thats too much work for now.
+    */
+  }
 
   /* Traverse that list, calling split_find_match on each one. Note
      that xaccAccountForEachSplit is declared in Account.h but
@@ -781,6 +817,8 @@
 			process_threshold, fuzzy_amount_difference);
       list_element = g_list_next (list_element);
     }
+
+  xaccFreeQuery (query);
 }
 
 


More information about the gnucash-changes mailing list