Eclipse SUMO - Simulation of Urban MObility
StringUtils.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3 // Copyright (C) 2001-2024 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials are made available under the
5 // terms of the Eclipse Public License 2.0 which is available at
6 // https://www.eclipse.org/legal/epl-2.0/
7 // This Source Code may also be made available under the following Secondary
8 // Licenses when the conditions for such availability set forth in the Eclipse
9 // Public License 2.0 are satisfied: GNU General Public License, version 2
10 // or later which is available at
11 // https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12 // SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13 /****************************************************************************/
21 // Some static methods for string processing
22 /****************************************************************************/
23 #include <config.h>
24 
25 #include <string>
26 #include <iostream>
27 #include <cstdio>
28 #include <cstring>
29 #include <regex>
30 #ifdef WIN32
31 #define NOMINMAX
32 #include <windows.h>
33 #undef NOMINMAX
34 #else
35 #include <unistd.h>
36 #endif
37 #include <xercesc/util/TransService.hpp>
38 #include <xercesc/util/TranscodingException.hpp>
40 #include <utils/common/ToString.h>
42 #include "StringUtils.h"
43 
44 
45 // ===========================================================================
46 // static member definitions
47 // ===========================================================================
48 std::string StringUtils::emptyString;
49 XERCES_CPP_NAMESPACE::XMLLCPTranscoder* StringUtils::myLCPTranscoder = nullptr;
50 
51 
52 // ===========================================================================
53 // method definitions
54 // ===========================================================================
55 std::string
56 StringUtils::prune(const std::string& str) {
57  const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
58  if (std::string::npos != endpos) {
59  const int startpos = (int)str.find_first_not_of(" \t\n\r");
60  return str.substr(startpos, endpos - startpos + 1);
61  }
62  return "";
63 }
64 
65 
66 std::string
67 StringUtils::pruneZeros(const std::string& str, int max) {
68  const std::string::size_type endpos = str.find_last_not_of("0");
69  if (endpos != std::string::npos && str.back() == '0') {
70  std::string res = str.substr(0, MAX2((int)str.size() - max, (int)endpos + 1));
71  return res;
72  }
73  return str;
74 }
75 
76 std::string
77 StringUtils::to_lower_case(const std::string& str) {
78  std::string s = str;
79  std::transform(s.begin(), s.end(), s.begin(), [](char c) {
80  return (char)::tolower(c);
81  });
82  return s;
83 }
84 
85 
86 std::string
87 StringUtils::latin1_to_utf8(std::string str) {
88  // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
89  std::string result;
90  for (const auto& c : str) {
91  const unsigned char uc = (unsigned char)c;
92  if (uc < 128) {
93  result += uc;
94  } else {
95  result += (char)(0xc2 + (uc > 0xbf));
96  result += (char)((uc & 0x3f) + 0x80);
97  }
98  }
99  return result;
100 }
101 
102 
103 std::string
104 StringUtils::convertUmlaute(std::string str) {
105  str = replace(str, "\xE4", "ae");
106  str = replace(str, "\xC4", "Ae");
107  str = replace(str, "\xF6", "oe");
108  str = replace(str, "\xD6", "Oe");
109  str = replace(str, "\xFC", "ue");
110  str = replace(str, "\xDC", "Ue");
111  str = replace(str, "\xDF", "ss");
112  str = replace(str, "\xC9", "E");
113  str = replace(str, "\xE9", "e");
114  str = replace(str, "\xC8", "E");
115  str = replace(str, "\xE8", "e");
116  return str;
117 }
118 
119 
120 std::string
121 StringUtils::replace(std::string str, const std::string& what, const std::string& by) {
122  std::string::size_type idx = str.find(what);
123  const int what_len = (int)what.length();
124  if (what_len > 0) {
125  const int by_len = (int)by.length();
126  while (idx != std::string::npos) {
127  str = str.replace(idx, what_len, by);
128  idx = str.find(what, idx + by_len);
129  }
130  }
131  return str;
132 }
133 
134 
135 std::string
136 StringUtils::substituteEnvironment(const std::string& str, const std::chrono::time_point<std::chrono::system_clock>* const timeRef) {
137  std::string s = str;
138  if (timeRef != nullptr) {
139  const std::string::size_type localTimeIndex = str.find("${LOCALTIME}");
140  const std::string::size_type utcIndex = str.find("${UTC}");
141  const bool isUTC = utcIndex != std::string::npos;
142  if (localTimeIndex != std::string::npos || isUTC) {
143  const time_t rawtime = std::chrono::system_clock::to_time_t(*timeRef);
144  char buffer [80];
145  struct tm* timeinfo = isUTC ? gmtime(&rawtime) : localtime(&rawtime);
146  strftime(buffer, 80, "%Y-%m-%d-%H-%M-%S.", timeinfo);
147  auto seconds = std::chrono::time_point_cast<std::chrono::seconds>(*timeRef);
148  auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(*timeRef - seconds);
149  const std::string micro = buffer + toString(microseconds.count());
150  if (isUTC) {
151  s.replace(utcIndex, 6, micro);
152  } else {
153  s.replace(localTimeIndex, 12, micro);
154  }
155  }
156  }
157  const std::string::size_type pidIndex = str.find("${PID}");
158  if (pidIndex != std::string::npos) {
159 #ifdef WIN32
160  s.replace(pidIndex, 6, toString(::GetCurrentProcessId()));
161 #else
162  s.replace(pidIndex, 6, toString(::getpid()));
163 #endif
164  }
165  if (std::getenv("SUMO_LOGO") == nullptr) {
166  s = replace(s, "${SUMO_LOGO}", "${SUMO_HOME}/data/logo/sumo-128x138.png");
167  }
168  const std::string::size_type tildeIndex = str.find("~");
169  if (tildeIndex == 0) {
170  s.replace(0, 1, "${HOME}");
171  }
172  s = replace(s, ",~", ",${HOME}");
173 #ifdef WIN32
174  if (std::getenv("HOME") == nullptr) {
175  s = replace(s, "${HOME}", "${USERPROFILE}");
176  }
177 #endif
178 
179  // Expression for an environment variables, e.g. ${NAME}
180  // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
181  // - .+? looks for the shortest match (non-greedy)
182  // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
183  std::regex envVarExpr(R"(\$\{(.+?)\})");
184 
185  // Are there any variables in this string?
186  std::smatch match;
187  std::string strIter = s;
188 
189  // Loop over the entire value string and look for variable names
190  while (std::regex_search(strIter, match, envVarExpr)) {
191  std::string varName = match[1];
192 
193  // Find the variable in the environment and its value
194  std::string varValue;
195  if (std::getenv(varName.c_str()) != nullptr) {
196  varValue = std::getenv(varName.c_str());
197  }
198 
199  // Replace the variable placeholder with its value in the original string
200  s = std::regex_replace(s, std::regex("\\$\\{" + varName + "\\}"), varValue);
201 
202  // Continue the loop with the remainder of the string
203  strIter = match.suffix();
204  }
205  return s;
206 }
207 
208 
209 bool
210 StringUtils::startsWith(const std::string& str, const std::string prefix) {
211  return str.compare(0, prefix.length(), prefix) == 0;
212 }
213 
214 
215 bool
216 StringUtils::endsWith(const std::string& str, const std::string suffix) {
217  if (str.length() >= suffix.length()) {
218  return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
219  } else {
220  return false;
221  }
222 }
223 
224 
225 std::string
226 StringUtils::padFront(const std::string& str, int length, char padding) {
227  return std::string(MAX2(0, length - (int)str.size()), padding) + str;
228 }
229 
230 
231 std::string
232 StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
233  std::string result = replace(orig, "&", "&amp;");
234  result = replace(result, ">", "&gt;");
235  result = replace(result, "<", "&lt;");
236  result = replace(result, "\"", "&quot;");
237  if (maskDoubleHyphen) {
238  result = replace(result, "--", "&#45;&#45;");
239  }
240  for (char invalid = '\1'; invalid < ' '; invalid++) {
241  result = replace(result, std::string(1, invalid).c_str(), "");
242  }
243  return replace(result, "'", "&apos;");
244 }
245 
246 
247 std::string
248 StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
249  std::ostringstream out;
250 
251  for (int i = 0; i < (int)toEncode.length(); ++i) {
252  const char t = toEncode.at(i);
253 
254  if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
255  (encodeWhich == "" &&
256  ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
257  (t >= 65 && t <= 90) || // A-Z
258  t == 95 || // underscore
259  (t >= 97 && t <= 122) || // a-z
260  t == 126)) // tilde
261  ) {
262  out << toEncode.at(i);
263  } else {
264  out << charToHex(toEncode.at(i));
265  }
266  }
267 
268  return out.str();
269 }
270 
271 
272 std::string
273 StringUtils::urlDecode(const std::string& toDecode) {
274  std::ostringstream out;
275 
276  for (int i = 0; i < (int)toDecode.length(); ++i) {
277  if (toDecode.at(i) == '%') {
278  std::string str(toDecode.substr(i + 1, 2));
279  out << hexToChar(str);
280  i += 2;
281  } else {
282  out << toDecode.at(i);
283  }
284  }
285 
286  return out.str();
287 }
288 
289 std::string
290 StringUtils::charToHex(unsigned char c) {
291  short i = c;
292 
293  std::stringstream s;
294 
295  s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
296 
297  return s.str();
298 }
299 
300 
301 unsigned char
302 StringUtils::hexToChar(const std::string& str) {
303  short c = 0;
304  if (!str.empty()) {
305  std::istringstream in(str);
306  in >> std::hex >> c;
307  if (in.fail()) {
308  throw NumberFormatException(str + " could not be interpreted as hex");
309  }
310  }
311  return static_cast<unsigned char>(c);
312 }
313 
314 
315 int
316 StringUtils::toInt(const std::string& sData) {
317  long long int result = toLong(sData);
318  if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
319  throw NumberFormatException(toString(result) + " int overflow");
320  }
321  return (int)result;
322 }
323 
324 
325 int
326 StringUtils::toIntSecure(const std::string& sData, int def) {
327  if (sData.length() == 0) {
328  return def;
329  }
330  return toInt(sData);
331 }
332 
333 
334 long long int
335 StringUtils::toLong(const std::string& sData) {
336  const char* const data = sData.c_str();
337  if (data == 0 || data[0] == 0) {
338  throw EmptyData();
339  }
340  char* end;
341  errno = 0;
342 #ifdef WIN32
343  long long int ret = _strtoi64(data, &end, 10);
344 #else
345  long long int ret = strtoll(data, &end, 10);
346 #endif
347  if (errno == ERANGE) {
348  errno = 0;
349  throw NumberFormatException("(long long integer range) " + sData);
350  }
351  if ((int)(end - data) != (int)strlen(data)) {
352  throw NumberFormatException("(long long integer format) " + sData);
353  }
354  return ret;
355 }
356 
357 
358 int
359 StringUtils::hexToInt(const std::string& sData) {
360  if (sData.length() == 0) {
361  throw EmptyData();
362  }
363  size_t idx = 0;
364  int result;
365  try {
366  if (sData[0] == '#') { // for html color codes
367  result = std::stoi(sData.substr(1), &idx, 16);
368  idx++;
369  } else {
370  result = std::stoi(sData, &idx, 16);
371  }
372  } catch (...) {
373  throw NumberFormatException("(hex integer format) " + sData);
374  }
375  if (idx != sData.length()) {
376  throw NumberFormatException("(hex integer format) " + sData);
377  }
378  return result;
379 }
380 
381 
382 double
383 StringUtils::toDouble(const std::string& sData) {
384  if (sData.size() == 0) {
385  throw EmptyData();
386  }
387  try {
388  size_t idx = 0;
389  const double result = std::stod(sData, &idx);
390  if (idx != sData.size()) {
391  throw NumberFormatException("(double format) " + sData);
392  } else {
393  return result;
394  }
395  } catch (...) {
396  // invalid_argument or out_of_range
397  throw NumberFormatException("(double) " + sData);
398  }
399 }
400 
401 
402 double
403 StringUtils::toDoubleSecure(const std::string& sData, const double def) {
404  if (sData.length() == 0) {
405  return def;
406  }
407  return toDouble(sData);
408 }
409 
410 
411 bool
412 StringUtils::toBool(const std::string& sData) {
413  if (sData.length() == 0) {
414  throw EmptyData();
415  }
416  const std::string s = to_lower_case(sData);
417  if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
418  return true;
419  }
420  if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
421  return false;
422  }
423  throw BoolFormatException(s);
424 }
425 
426 MMVersion
427 StringUtils::toVersion(const std::string& sData) {
428  std::vector<std::string> parts = StringTokenizer(sData, ".").getVector();
429  return MMVersion(toInt(parts.front()), toDouble(parts.back()));
430 }
431 
432 std::string
433 StringUtils::transcode(const XMLCh* const data, int length) {
434  if (data == 0) {
435  throw EmptyData();
436  }
437  if (length == 0) {
438  return "";
439  }
440 #if _XERCES_VERSION < 30100
441  char* t = XERCES_CPP_NAMESPACE::XMLString::transcode(data);
442  std::string result(t);
443  XERCES_CPP_NAMESPACE::XMLString::release(&t);
444  return result;
445 #else
446  try {
447  XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
448  return reinterpret_cast<const char*>(utf8.str());
449  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
450  return "?";
451  }
452 #endif
453 }
454 
455 
456 std::string
457 StringUtils::transcodeFromLocal(const std::string& localString) {
458 #if _XERCES_VERSION > 30100
459  try {
460  if (myLCPTranscoder == nullptr) {
461  myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
462  }
463  if (myLCPTranscoder != nullptr) {
464  return transcode(myLCPTranscoder->transcode(localString.c_str()));
465  }
466  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
467 #endif
468  return localString;
469 }
470 
471 
472 std::string
473 StringUtils::transcodeToLocal(const std::string& utf8String) {
474 #if _XERCES_VERSION > 30100
475  try {
476  if (myLCPTranscoder == nullptr) {
477  myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
478  }
479  if (myLCPTranscoder != nullptr) {
480  XERCES_CPP_NAMESPACE::TranscodeFromStr utf8(reinterpret_cast<const XMLByte*>(utf8String.c_str()), utf8String.size(), "UTF-8");
481  return myLCPTranscoder->transcode(utf8.str());
482  }
483  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
484 #endif
485  return utf8String;
486 }
487 
488 
489 std::string
490 StringUtils::trim_left(const std::string s, const std::string& t) {
491  std::string result = s;
492  result.erase(0, s.find_first_not_of(t));
493  return result;
494 }
495 
496 std::string
497 StringUtils::trim_right(const std::string s, const std::string& t) {
498  std::string result = s;
499  result.erase(s.find_last_not_of(t) + 1);
500  return result;
501 }
502 
503 std::string
504 StringUtils::trim(const std::string s, const std::string& t) {
505  return trim_right(trim_left(s, t), t);
506 }
507 
508 void
510  myLCPTranscoder = nullptr;
511 }
512 
513 /****************************************************************************/
std::pair< int, double > MMVersion
(M)ajor/(M)inor version for written networks and default version for loading
Definition: StdDefs.h:67
T MAX2(T a, T b)
Definition: StdDefs.h:82
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition: ToString.h:46
std::vector< std::string > getVector()
return vector of strings
static std::string pruneZeros(const std::string &str, int max)
Removes trailing zeros (at most 'max')
Definition: StringUtils.cpp:67
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
encode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static MMVersion toVersion(const std::string &sData)
to version
static std::string charToHex(unsigned char c)
char to hexadecimal
static std::string urlDecode(const std::string &encoded)
decode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
static std::string trim(const std::string s, const std::string &t=" \t\n")
remove leading and trailing whitespace
static std::string to_lower_case(const std::string &str)
Transfers the content to lower case.
Definition: StringUtils.cpp:77
static void resetTranscoder()
must be called when shutting down the xml subsystem
static XERCES_CPP_NAMESPACE::XMLLCPTranscoder * myLCPTranscoder
Definition: StringUtils.h:202
static std::string trim_right(const std::string s, const std::string &t=" \t\n")
remove trailing whitespace from string
static std::string trim_left(const std::string s, const std::string &t=" \t\n")
remove leading whitespace from string
static std::string replace(std::string str, const std::string &what, const std::string &by)
Replaces all occurrences of the second string by the third string within the first string.
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
Definition: StringUtils.cpp:87
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
Definition: StringUtils.cpp:56
static std::string padFront(const std::string &str, int length, char padding)
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
static unsigned char hexToChar(const std::string &str)
hexadecimal to char
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string emptyString
An empty string.
Definition: StringUtils.h:86
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
static std::string substituteEnvironment(const std::string &str, const std::chrono::time_point< std::chrono::system_clock > *const timeRef=nullptr)
Replaces an environment variable with its value (similar to bash); syntax for a variable is ${NAME}.
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
Definition: StringUtils.h:146
static std::string transcodeToLocal(const std::string &utf8String)
convert a string from UTF-8 to the local codepage
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
static std::string transcodeFromLocal(const std::string &localString)
convert a string from the local codepage to UTF-8
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter