Eclipse SUMO - Simulation of Urban MObility
StringTokenizer.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3 // Copyright (C) 2001-2024 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials are made available under the
5 // terms of the Eclipse Public License 2.0 which is available at
6 // https://www.eclipse.org/legal/epl-2.0/
7 // This Source Code may also be made available under the following Secondary
8 // Licenses when the conditions for such availability set forth in the Eclipse
9 // Public License 2.0 are satisfied: GNU General Public License, version 2
10 // or later which is available at
11 // https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12 // SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13 /****************************************************************************/
20 // A java-style StringTokenizer for c++ (stl)
21 /****************************************************************************/
22 #include <config.h>
23 
24 #include <string>
25 #include <vector>
26 #include <iostream> // !!! debug only
27 
28 #include "UtilExceptions.h"
29 #include "StringTokenizer.h"
30 
31 
32 // ===========================================================================
33 // variable definitions
34 // ===========================================================================
35 const int StringTokenizer::NEWLINE = -256;
36 const int StringTokenizer::WHITECHARS = -257;
37 const int StringTokenizer::SPACE = 32;
38 const int StringTokenizer::TAB = 9;
39 
40 
41 // ===========================================================================
42 // method definitions
43 // ===========================================================================
44 
46  myPos(0) {
47 }
48 
49 
50 StringTokenizer::StringTokenizer(std::string tosplit) :
51  myTosplit(tosplit), myPos(0) {
52  prepareWhitechar(tosplit);
53 }
54 
55 
56 StringTokenizer::StringTokenizer(std::string tosplit, std::string token, bool splitAtAllChars) :
57  myTosplit(tosplit), myPos(0) {
58  prepare(tosplit, token, splitAtAllChars);
59 }
60 
61 
62 StringTokenizer::StringTokenizer(std::string tosplit, int special) :
63  myTosplit(tosplit), myPos(0) {
64  switch (special) {
65  case NEWLINE:
66  prepare(tosplit, "\r\n", true);
67  break;
68  case TAB:
69  prepare(tosplit, "\t", true);
70  break;
71  case WHITECHARS:
72  prepareWhitechar(tosplit);
73  break;
74  default:
75  char* buf = new char[2];
76  buf[0] = (char) special;
77  buf[1] = 0;
78  prepare(tosplit, buf, false);
79  delete[] buf;
80  break;
81  }
82 }
83 
84 
86 
87 
89  myPos = 0;
90 }
91 
92 
94  return myPos != (int)myStarts.size();
95 }
96 
97 
98 std::string StringTokenizer::next() {
99  if (myPos >= (int)myStarts.size()) {
100  throw OutOfBoundsException();
101  }
102  if (myLengths[myPos] == 0) {
103  myPos++;
104  return "";
105  }
106  int start = myStarts[myPos];
107  int length = myLengths[myPos++];
108  return myTosplit.substr(start, length);
109 }
110 
111 
112 std::string StringTokenizer::front() {
113  if (myStarts.size() == 0) {
114  throw OutOfBoundsException();
115  }
116  if (myLengths[0] == 0) {
117  return "";
118  }
119  return myTosplit.substr(myStarts[0], myLengths[0]);
120 }
121 
122 
123 std::string StringTokenizer::get(int pos) const {
124  if (pos >= (int)myStarts.size()) {
125  throw OutOfBoundsException();
126  }
127  if (myLengths[pos] == 0) {
128  return "";
129  }
130  int start = myStarts[pos];
131  int length = myLengths[pos];
132  return myTosplit.substr(start, length);
133 }
134 
135 
137  return (int)myStarts.size();
138 }
139 
140 
141 void StringTokenizer::prepare(const std::string& tosplit, const std::string& token, bool splitAtAllChars) {
142  int beg = 0;
143  int len = (int)token.length();
144  if (splitAtAllChars) {
145  len = 1;
146  }
147  while (beg < (int)tosplit.length()) {
148  std::string::size_type end;
149  if (splitAtAllChars) {
150  end = tosplit.find_first_of(token, beg);
151  } else {
152  end = tosplit.find(token, beg);
153  }
154  if (end == std::string::npos) {
155  end = tosplit.length();
156  }
157  myStarts.push_back(beg);
158  myLengths.push_back((int)end - beg);
159  beg = (int)end + len;
160  if (beg == (int)tosplit.length()) {
161  myStarts.push_back(beg - 1);
162  myLengths.push_back(0);
163  }
164  }
165 }
166 
167 
168 void StringTokenizer::prepareWhitechar(const std::string& tosplit) {
169  std::string::size_type len = tosplit.length();
170  std::string::size_type beg = 0;
171  while (beg < len && tosplit[beg] <= SPACE) {
172  beg++;
173  }
174  while (beg != std::string::npos && beg < len) {
175  std::string::size_type end = beg;
176  while (end < len && tosplit[end] > SPACE) {
177  end++;
178  }
179  myStarts.push_back((int)beg);
180  myLengths.push_back((int)end - (int)beg);
181  beg = end;
182  while (beg < len && tosplit[beg] <= SPACE) {
183  beg++;
184  }
185  }
186 }
187 
188 
189 std::vector<std::string>
191  std::vector<std::string> ret;
192  ret.reserve(size());
193  while (hasNext()) {
194  ret.push_back(next());
195  }
196  reinit();
197  return ret;
198 }
199 
200 
201 std::set<std::string>
203  std::vector<std::string> v = getVector();
204  return std::set<std::string>(v.begin(), v.end());
205 }
206 
207 
208 /****************************************************************************/
static const int NEWLINE
identifier for splitting the given string at all newline characters
std::string front()
returns the first substring without moving the iterator
std::set< std::string > getSet()
return set of strings
~StringTokenizer()
destructor
int size() const
returns the number of existing substrings
SizeVector myStarts
the list of substring starts
void reinit()
reinitialises the internal iterator
static const int WHITECHARS
identifier for splitting the given string at all whitespace characters
std::string get(int pos) const
returns the item at the given position
std::vector< std::string > getVector()
return vector of strings
bool hasNext()
returns the information whether further substrings exist
static const int TAB
the ascii index of the tab character
static const int SPACE
the ascii index of the highest whitespace character
SizeVector myLengths
the list of substring lengths
StringTokenizer()
default constructor
int myPos
the current position in the list of substrings
void prepare(const std::string &tosplit, const std::string &token, bool splitAtAllChars)
splits the first string at all occurrences of the second. If the third parameter is true split at all...
std::string myTosplit
the string to split
void prepareWhitechar(const std::string &tosplit)
splits the first string at all occurrences of whitechars
std::string next()
returns the next substring when it exists. Otherwise the behaviour is undefined