gnucash maint: Multiple changes pushed

Geert Janssens gjanssens at code.gnucash.org
Sat May 5 07:43:39 EDT 2018


Updated	 via  https://github.com/Gnucash/gnucash/commit/27c1df30 (commit)
	 via  https://github.com/Gnucash/gnucash/commit/682b5cf5 (commit)
	from  https://github.com/Gnucash/gnucash/commit/8b3a8744 (commit)



commit 27c1df30afda7998c8f1dccd3253fb53a95fbe8e
Author: Geert Janssens <geert at kobaltwit.be>
Date:   Sat May 5 13:42:13 2018 +0200

    Handle the common csv double quote escape variation (repeating the double quote)

diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
index b5ad206..bb4f8c8 100644
--- a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
+++ b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
@@ -76,6 +76,16 @@ int GncCsvTokenizer::tokenize()
                 bs_pos = line.find ('\\', bs_pos);
             }
 
+            // Deal with repeated " ("") in strings.
+            // This is commonly used as escape mechanism for double quotes in csv files.
+            // However boost just eats them.
+            bs_pos = line.find ("\"\"");
+            while (bs_pos != std::string::npos)
+            {
+                line.replace (bs_pos, 2, "\\\"");
+                bs_pos = line.find ("\"\"");
+            }
+
             Tokenizer tok(line, sep);
             vec.assign(tok.begin(),tok.end());
             m_tokenized_contents.push_back(vec);
diff --git a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
index fdbd1e1..961ac65 100644
--- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
+++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
@@ -176,6 +176,7 @@ static tokenize_csv_test_data comma_separated [] = {
         { "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
         { "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
         { "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
+        { "Test with \"\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
         { NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
 };
 

commit 682b5cf581895a5da3296cfbfc24acc9ece0185e
Author: Geert Janssens <geert at kobaltwit.be>
Date:   Sat May 5 12:42:17 2018 +0200

    Bug 795666 - Backslash "\" in Description field spoils CSV Import without helpful error message
    
    We've configure boost::tokenizer to take the backslash as the escape character
    However boost::tokenizer will throw if it encounters a sole backslash that's
    not an escape (it would expect two if a pure backslash is to be inserted).
    Avoid this by replacing lone backslashes (not part of escapes) with double
    backslashes before passing control to the tokenizer.

diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
index 402900a..b5ad206 100644
--- a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
+++ b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
@@ -63,6 +63,19 @@ int GncCsvTokenizer::tokenize()
             }
             // ---
 
+            // Deal with backslashes that are not meant to be escapes
+            // The boost::tokenizer with escaped_list_separator as we use
+            // it would choke on this.
+            auto bs_pos = line.find ('\\');
+            while (bs_pos != std::string::npos)
+            {
+                if ((bs_pos == line.size()) ||                                 // got trailing single backslash
+                    (line.find_first_of ("\"\\n", bs_pos + 1) != bs_pos + 1))  // backslash is not part of known escapes \\, \" or \n
+                    line = line.substr(0, bs_pos) + "\\\\" + line.substr(bs_pos + 1);
+                bs_pos += 2;
+                bs_pos = line.find ('\\', bs_pos);
+            }
+
             Tokenizer tok(line, sep);
             vec.assign(tok.begin(),tok.end());
             m_tokenized_contents.push_back(vec);
diff --git a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
index c97902a..fdbd1e1 100644
--- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
+++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
@@ -138,17 +138,6 @@ TEST_F (GncTokenizerTest, tokenize_from_csv_file)
  * independently.
  */
 
-/* First test whether we're properly catching boost::tokenizer throws
- * This happens when the input data has invalid escape sequences */
-TEST_F (GncTokenizerTest, tokenize_binary_data)
-{
-    GncCsvTokenizer *csvtok = dynamic_cast<GncCsvTokenizer*>(csv_tok.get());
-    csvtok->set_separators (",");
-
-    set_utf8_contents (csv_tok, R"(\764Test,Something)");
-    EXPECT_THROW (csv_tok->tokenize(), std::range_error);
-}
-
 /* This helper function will run the parse step on the given data
  * with the parser as configured by the calling test function.
  * This allows the same code to be used with different csv test strings
@@ -185,6 +174,8 @@ static tokenize_csv_test_data comma_separated [] = {
         { "Date,Num,Description,Notes,Account,Deposit,Withdrawal,Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
         { "05/01/15,45,Acme Inc.,,Miscellaneous,,\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
         { "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
+        { "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
+        { "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
         { NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
 };
 



Summary of changes:
 .../import-export/csv-imp/gnc-tokenizer-csv.cpp    | 23 ++++++++++++++++++++++
 .../import-export/csv-imp/test/test-tokenizer.cpp  | 14 +++----------
 2 files changed, 26 insertions(+), 11 deletions(-)



More information about the gnucash-changes mailing list