[Gnucash-changes] r14361 - gnucash/trunk - Strip all invalid utf8 characters from imported QIF and OFX/QFX

David Hampton hampton at cvs.gnucash.org
Tue Jun 13 23:42:07 EDT 2006


Author: hampton
Date: 2006-06-13 23:42:07 -0400 (Tue, 13 Jun 2006)
New Revision: 14361
Trac: http://svn.gnucash.org/trac/changeset/14361

Modified:
   gnucash/trunk/ChangeLog
   gnucash/trunk/src/core-utils/gnc-glib-utils.c
   gnucash/trunk/src/core-utils/gnc-glib-utils.h
   gnucash/trunk/src/core-utils/gw-core-utils-spec.scm
   gnucash/trunk/src/import-export/ofx/gnc-ofx-import.c
   gnucash/trunk/src/import-export/qif-import/qif-file.scm
Log:
Strip all invalid utf8 characters from imported QIF and OFX/QFX
strings.  This fixes bugs #106203 #338296 #344170 and #344219.


Modified: gnucash/trunk/ChangeLog
===================================================================
--- gnucash/trunk/ChangeLog	2006-06-14 00:34:16 UTC (rev 14360)
+++ gnucash/trunk/ChangeLog	2006-06-14 03:42:07 UTC (rev 14361)
@@ -1,3 +1,14 @@
+2006-06-13  David Hampton  <hampton at employees.org>
+
+	* src/core-utils/gnc-glib-utils.[ch]:
+	* src/core-utils/gw-core-utils-spec.scm:
+	* src/import-export/qif-import/qif-file.scm:
+	* src/import-export/ofx/gnc-ofx-import.c: Strip all invalid utf8
+	characters from imported QIF and OFX/QFX strings.  This fixes bugs
+	#106203 #338296 #344170 and #344219.  Long term gnucash should be
+	enhanced to learn/remember the input encoding and automatically
+	convert to utf8.
+
 2006-06-13  Derek Atkins  <derek at ihtfp.com>
 
 	* configure.in: force-enable hbci if the user enables mt940

Modified: gnucash/trunk/src/core-utils/gnc-glib-utils.c
===================================================================
--- gnucash/trunk/src/core-utils/gnc-glib-utils.c	2006-06-14 00:34:16 UTC (rev 14360)
+++ gnucash/trunk/src/core-utils/gnc-glib-utils.c	2006-06-14 03:42:07 UTC (rev 14361)
@@ -22,6 +22,8 @@
 \********************************************************************/
 
 #include "config.h"
+#include <stdio.h>
+#include <string.h>
 
 #include "gnc-glib-utils.h"
 
@@ -41,3 +43,25 @@
     return -1;
   return 0;
 }
+
+gboolean
+gnc_utf8_validate (const gchar *str)
+{
+  return g_utf8_validate(str, -1, NULL);
+}
+
+void
+gnc_utf8_strip_invalid (gchar *str)
+{
+  gchar *end;
+  gint len;
+
+  if (g_utf8_validate(str, -1, (const gchar **)&end))
+    return;
+
+  g_warning("Invalid utf8 string: %s", str);
+  do {
+    len = strlen(end);
+    memmove(end, end+1, len);	/* shuffle the remainder one byte */
+  } while (!g_utf8_validate(str, -1, (const gchar **)&end));
+}

Modified: gnucash/trunk/src/core-utils/gnc-glib-utils.h
===================================================================
--- gnucash/trunk/src/core-utils/gnc-glib-utils.h	2006-06-14 00:34:16 UTC (rev 14360)
+++ gnucash/trunk/src/core-utils/gnc-glib-utils.h	2006-06-14 03:42:07 UTC (rev 14361)
@@ -43,8 +43,38 @@
  @{ 
 */
 
-int safe_utf8_collate (const char * da, const char * db);
+/** Collate two utf8 strings.  This function performs basic argument
+ *  checking before calling g_utf8_collate.
+ *
+ *  @param str1 The first string.
+ *
+ *  @param str2 The first string.
+ *
+ *  @return Same return value as g_utf8_collate. The values are: < 0
+ *  if str1 compares before str2, 0 if they compare equal, > 0 if str1
+ *  compares after str2. */
+int safe_utf8_collate (const char *str1, const char *str2);
 
+
+/** This is a helper function for guile. C code should call
+ *  g_utf8_validate directly.
+ *
+ *  @param str The string to be validated.
+ *
+ *  @return TRUE if this string is valid utf8. */
+gboolean gnc_utf8_validate (const gchar *str);
+
+
+/** Strip any non-utf8 characters from a string.  This function
+ *  rewrites the string "in place" instead of allocating and returning
+ *  a new string.  This allows it to operat on strings that are
+ *  defined as character arrays in a larger data structure.
+ *
+ *  @param str A pointer to the string to strip of invalid
+ *  characters. */
+void gnc_utf8_strip_invalid (gchar *str);
+
+
 /** @} */
 
 #endif /* GNC_GLIB_UTILS_H */

Modified: gnucash/trunk/src/core-utils/gw-core-utils-spec.scm
===================================================================
--- gnucash/trunk/src/core-utils/gw-core-utils-spec.scm	2006-06-14 00:34:16 UTC (rev 14360)
+++ gnucash/trunk/src/core-utils/gw-core-utils-spec.scm	2006-06-14 03:42:07 UTC (rev 14361)
@@ -25,6 +25,7 @@
    (lambda (wrapset client-wrapset)
      (list
       "#include <gnc-gconf-utils.h>\n"
+      "#include <gnc-glib-utils.h>\n"
       "#include <gnc-main.h>\n")))
 
   (gw:wrap-function
@@ -52,4 +53,20 @@
    '(((<gw:mchars> caller-owned) program))
    "Get a boolean value from gconf.")
 
+  (gw:wrap-function
+   ws
+   'gnc:utf8-validate
+   '<gw:bool>
+   "gnc_utf8_validate"
+   '(((<gw:mchars> caller-owned) program))
+   "Validate UTF8 encoded text.")
+
+  (gw:wrap-function
+   ws
+   'gnc:utf8-strip-invalid
+   '<gw:void>
+   "gnc_utf8_strip_invalid"
+   '(((<gw:mchars> caller-owned) program))
+   "Strip string of non-utf8 characters.")
+
 )

Modified: gnucash/trunk/src/import-export/ofx/gnc-ofx-import.c
===================================================================
--- gnucash/trunk/src/import-export/ofx/gnc-ofx-import.c	2006-06-14 00:34:16 UTC (rev 14360)
+++ gnucash/trunk/src/import-export/ofx/gnc-ofx-import.c	2006-06-14 03:42:07 UTC (rev 14361)
@@ -47,6 +47,7 @@
 #include "gnc-book.h"
 #include "gnc-ui-util.h"
 #include "gnc-gconf-utils.h"
+#include "gnc-glib-utils.h"
 
 #define GCONF_SECTION "dialogs/import/ofx"
 
@@ -124,6 +125,12 @@
 					data.account_id, 0, NULL, NULL, NO_TYPE, NULL, NULL);
     if(account!=NULL)
       {
+	/********** Validate the input strings to ensure utf8 ********************/
+	if (data.name_valid)
+	  gnc_utf8_strip_invalid(data.name);
+	if (data.memo_valid)
+	  gnc_utf8_strip_invalid(data.memo);
+
 	/********** Create the transaction and setup transaction data ************/
 	book = xaccAccountGetBook(account);
 	transaction = xaccMallocTransaction(book);
@@ -595,6 +602,7 @@
       }
     }
 
+    gnc_utf8_strip_invalid(data.account_name);
     account_description = g_strdup_printf( /* This string is a default account
 					      name. It MUST NOT contain the
 					      character ':' anywhere in it or

Modified: gnucash/trunk/src/import-export/qif-import/qif-file.scm
===================================================================
--- gnucash/trunk/src/import-export/qif-import/qif-file.scm	2006-06-14 00:34:16 UTC (rev 14360)
+++ gnucash/trunk/src/import-export/qif-import/qif-file.scm	2006-06-14 03:42:07 UTC (rev 14361)
@@ -11,6 +11,8 @@
 ;;  just store the fields "raw".
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+(use-modules (g-wrapped gw-core-utils))
+
 (cond
  ((or (string=? "1.3.4" (version))
       (string=? "1.4" (substring (version) 0 3))) #f)
@@ -72,6 +74,7 @@
                  ;; pick the 1-char tag off from the remainder of the line 
                  (set! tag (string-ref line 0))
                  (set! value (substring line 1))
+		 (gnc:utf8-strip-invalid value)
                  
                  ;; now do something with the line 
                  (if



More information about the gnucash-changes mailing list