gnucash maint: CsvImport - Fix mishandling of quoted empty fields

Geert Janssens gjanssens at code.gnucash.org
Mon May 27 16:42:48 EDT 2019


Updated	 via  https://github.com/Gnucash/gnucash/commit/e557b021 (commit)
	from  https://github.com/Gnucash/gnucash/commit/ed42f8ac (commit)



commit e557b021d3bf1840f97222fa059d70179e42b330
Author: Geert Janssens <geert at kobaltwit.be>
Date:   Mon May 27 17:24:23 2019 +0200

    CsvImport - Fix mishandling of quoted empty fields

diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
index bb4f8c89f..d1a87f5ac 100644
--- a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
+++ b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
@@ -82,8 +82,20 @@ int GncCsvTokenizer::tokenize()
             bs_pos = line.find ("\"\"");
             while (bs_pos != std::string::npos)
             {
-                line.replace (bs_pos, 2, "\\\"");
-                bs_pos = line.find ("\"\"");
+                // Only make changes in case the double quotes are part of a larger field
+                // In other words a field which only contains two double quotes represent an
+                // empty field. We don't need to touch those.
+                // The way to determine whether the double quotes represent an empty string
+                // is by checking whether the character in front or after are either
+                // a field separator or the beginning or end of of the string.
+                if (!(((bs_pos == 0) ||                                          // quotes are at start of line
+                       (m_sep_str.find (line[bs_pos-1]) != std::string::npos))    // quotes preceeded by field separator
+                      &&
+                      ((bs_pos + 2 >= line.length()) ||                          // quotes are at end of line
+                       (m_sep_str.find (line[bs_pos+2]) != std::string::npos))))   // quotes followed by field separator
+                    // Only make changes in case the double quotes are not an empty field
+                    line.replace (bs_pos, 2, "\\\"");
+                bs_pos = line.find ("\"\"", bs_pos + 2);
             }
 
             Tokenizer tok(line, sep);
diff --git a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
index 2a861ae6b..b5b7fe0bd 100644
--- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
+++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
@@ -172,7 +172,7 @@ GncTokenizerTest::test_gnc_tokenize_helper (const std::string& separators, token
 
 static tokenize_csv_test_data comma_separated [] = {
         { "Date,Num,Description,Notes,Account,Deposit,Withdrawal,Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
-        { "05/01/15,45,Acme Inc.,,Miscellaneous,,\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
+        { "05/01/15,45,Typical csv import line - including quoted empty field,,Miscellaneous,\"\",\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
         { "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
         { "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
         { "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
@@ -188,7 +188,7 @@ TEST_F (GncTokenizerTest, tokenize_comma_sep)
 
 static tokenize_csv_test_data semicolon_separated [] = {
         { "Date;Num;Description;Notes;Account;Deposit;Withdrawal;Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
-        { "05/01/15;45;Acme Inc.;;Miscellaneous;;\"1,100.00\";", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
+        { "05/01/15;45;Typical csv import line - including quoted empty field;;Miscellaneous;\"\";\"1,100.00\";", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
         { "05/01/15;45;Acme Inc.;;Miscellaneous;", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
         { NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
 };



Summary of changes:
 gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp   | 16 ++++++++++++++--
 gnucash/import-export/csv-imp/test/test-tokenizer.cpp |  4 ++--
 2 files changed, 16 insertions(+), 4 deletions(-)



More information about the gnucash-changes mailing list