36 #include <boost/tokenizer.hpp> 37 #include <boost/locale.hpp> 38 #include <boost/algorithm/string.hpp> 41 GncFwTokenizer::columns(
const std::vector<uint32_t>& cols)
47 GncFwTokenizer::get_columns()
53 bool GncFwTokenizer::col_can_delete (uint32_t col_num)
55 auto last_col = m_col_vec.size() - 1;
56 if (col_num >= last_col)
62 void GncFwTokenizer::col_delete (uint32_t col_num)
64 if (!col_can_delete (col_num))
67 m_col_vec[col_num + 1] += m_col_vec[col_num];
68 m_col_vec.erase (m_col_vec.begin() + col_num);
71 bool GncFwTokenizer::col_can_narrow (uint32_t col_num)
74 auto last_col = m_col_vec.size() - 1;
75 if (col_num >= last_col)
81 void GncFwTokenizer::col_narrow (uint32_t col_num)
83 if (!col_can_narrow (col_num))
87 m_col_vec[col_num + 1]++;
90 if (m_col_vec[col_num] == 0)
91 m_col_vec.erase (m_col_vec.begin() + col_num);
94 bool GncFwTokenizer::col_can_widen (uint32_t col_num)
97 auto last_col = m_col_vec.size() - 1;
98 if (col_num >= last_col)
104 void GncFwTokenizer::col_widen (uint32_t col_num)
106 if (!col_can_widen (col_num))
109 m_col_vec[col_num]++;
110 m_col_vec[col_num + 1]--;
113 if (m_col_vec[col_num + 1] == 0)
114 m_col_vec.erase (m_col_vec.begin() + col_num + 1);
117 bool GncFwTokenizer::col_can_split (uint32_t col_num, uint32_t position)
119 auto last_col = m_col_vec.size() - 1;
120 if (col_num > last_col)
123 uint32_t col_end = m_col_vec[col_num];
124 if (position < 1 || position >= col_end)
130 void GncFwTokenizer::col_split (uint32_t col_num, uint32_t position)
132 if (col_can_split (col_num, position))
134 m_col_vec.insert (m_col_vec.begin() + col_num, position);
135 m_col_vec[col_num + 1] -= position;
140 void GncFwTokenizer::load_file(
const std::string& path)
142 GncTokenizer::load_file(path);
146 std::istringstream in_stream(m_utf8_contents);
147 while (std::getline (in_stream, line))
149 if (line.size() > m_longest_line)
150 m_longest_line = line.size();
155 if (m_col_vec.empty())
158 m_col_vec.push_back(m_longest_line);
167 uint32_t total_width = 0;
168 for (
auto col_width : m_col_vec)
169 total_width += col_width;
171 if (m_longest_line > total_width)
172 m_col_vec.back() += m_longest_line - total_width;
173 else if (m_longest_line < total_width)
175 while (total_width - m_col_vec.back() > m_longest_line)
176 col_delete (m_col_vec[m_col_vec.size() - 2]);
177 m_col_vec.back() -= total_width - m_longest_line;
188 int GncFwTokenizer::tokenize()
190 using boost::locale::conv::utf_to_utf;
191 using Tokenizer = boost::tokenizer< boost::offset_separator,
192 std::wstring::const_iterator, std::wstring > ;
194 boost::offset_separator sep(m_col_vec.begin(), m_col_vec.end(),
false);
196 std::wstring wchar_contents = utf_to_utf<wchar_t>(m_utf8_contents.c_str(),
197 m_utf8_contents.c_str() + m_utf8_contents.size());
202 m_tokenized_contents.clear();
203 std::wistringstream in_stream(wchar_contents);
205 while (std::getline (in_stream, line))
207 Tokenizer tok(line, sep);
209 for (
auto token : tok)
211 auto stripped = boost::trim_copy(token);
212 auto narrow = utf_to_utf<char>(stripped.c_str(), stripped.c_str()
214 vec.push_back (narrow);
216 m_tokenized_contents.push_back(vec);
Class convert a file with fixed with delimited contents into vector of string vectors.