gnucash maint: CsvImport - Fix mishandling of quoted empty fields
Geert Janssens
gjanssens at code.gnucash.org
Mon May 27 16:42:48 EDT 2019
Updated via https://github.com/Gnucash/gnucash/commit/e557b021 (commit)
from https://github.com/Gnucash/gnucash/commit/ed42f8ac (commit)
commit e557b021d3bf1840f97222fa059d70179e42b330
Author: Geert Janssens <geert at kobaltwit.be>
Date: Mon May 27 17:24:23 2019 +0200
CsvImport - Fix mishandling of quoted empty fields
diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
index bb4f8c89f..d1a87f5ac 100644
--- a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
+++ b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp
@@ -82,8 +82,20 @@ int GncCsvTokenizer::tokenize()
bs_pos = line.find ("\"\"");
while (bs_pos != std::string::npos)
{
- line.replace (bs_pos, 2, "\\\"");
- bs_pos = line.find ("\"\"");
+ // Only make changes in case the double quotes are part of a larger field
+ // In other words a field which only contains two double quotes represent an
+ // empty field. We don't need to touch those.
+ // The way to determine whether the double quotes represent an empty string
+ // is by checking whether the character in front or after are either
+ // a field separator or the beginning or end of of the string.
+ if (!(((bs_pos == 0) || // quotes are at start of line
+ (m_sep_str.find (line[bs_pos-1]) != std::string::npos)) // quotes preceeded by field separator
+ &&
+ ((bs_pos + 2 >= line.length()) || // quotes are at end of line
+ (m_sep_str.find (line[bs_pos+2]) != std::string::npos)))) // quotes followed by field separator
+ // Only make changes in case the double quotes are not an empty field
+ line.replace (bs_pos, 2, "\\\"");
+ bs_pos = line.find ("\"\"", bs_pos + 2);
}
Tokenizer tok(line, sep);
diff --git a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
index 2a861ae6b..b5b7fe0bd 100644
--- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
+++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp
@@ -172,7 +172,7 @@ GncTokenizerTest::test_gnc_tokenize_helper (const std::string& separators, token
static tokenize_csv_test_data comma_separated [] = {
{ "Date,Num,Description,Notes,Account,Deposit,Withdrawal,Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
- { "05/01/15,45,Acme Inc.,,Miscellaneous,,\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
+ { "05/01/15,45,Typical csv import line - including quoted empty field,,Miscellaneous,\"\",\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
{ "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
{ "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
{ "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } },
@@ -188,7 +188,7 @@ TEST_F (GncTokenizerTest, tokenize_comma_sep)
static tokenize_csv_test_data semicolon_separated [] = {
{ "Date;Num;Description;Notes;Account;Deposit;Withdrawal;Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } },
- { "05/01/15;45;Acme Inc.;;Miscellaneous;;\"1,100.00\";", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
+ { "05/01/15;45;Typical csv import line - including quoted empty field;;Miscellaneous;\"\";\"1,100.00\";", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } },
{ "05/01/15;45;Acme Inc.;;Miscellaneous;", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } },
{ NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } },
};
Summary of changes:
gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp | 16 ++++++++++++++--
gnucash/import-export/csv-imp/test/test-tokenizer.cpp | 4 ++--
2 files changed, 16 insertions(+), 4 deletions(-)
More information about the gnucash-changes
mailing list