Eclipse SUMO - Simulation of Urban MObility
StringUtils.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3 // Copyright (C) 2001-2024 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials are made available under the
5 // terms of the Eclipse Public License 2.0 which is available at
6 // https://www.eclipse.org/legal/epl-2.0/
7 // This Source Code may also be made available under the following Secondary
8 // Licenses when the conditions for such availability set forth in the Eclipse
9 // Public License 2.0 are satisfied: GNU General Public License, version 2
10 // or later which is available at
11 // https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12 // SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13 /****************************************************************************/
21 // Some static methods for string processing
22 /****************************************************************************/
23 #include <config.h>
24 
25 #include <string>
26 #include <iostream>
27 #include <cstdio>
28 #include <cstring>
29 #include <regex>
30 #ifdef WIN32
31 #define NOMINMAX
32 #include <windows.h>
33 #undef NOMINMAX
34 #else
35 #include <unistd.h>
36 #endif
37 #include <xercesc/util/TransService.hpp>
38 #include <xercesc/util/TranscodingException.hpp>
40 #include <utils/common/ToString.h>
42 #include "StringUtils.h"
43 
44 #define KM_PER_MILE 1.609344
45 
46 
47 // ===========================================================================
48 // static member definitions
49 // ===========================================================================
50 std::string StringUtils::emptyString;
51 XERCES_CPP_NAMESPACE::XMLLCPTranscoder* StringUtils::myLCPTranscoder = nullptr;
52 
53 
54 // ===========================================================================
55 // method definitions
56 // ===========================================================================
57 std::string
58 StringUtils::prune(const std::string& str) {
59  const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
60  if (std::string::npos != endpos) {
61  const int startpos = (int)str.find_first_not_of(" \t\n\r");
62  return str.substr(startpos, endpos - startpos + 1);
63  }
64  return "";
65 }
66 
67 
68 std::string
69 StringUtils::pruneZeros(const std::string& str, int max) {
70  const std::string::size_type endpos = str.find_last_not_of("0");
71  if (endpos != std::string::npos && str.back() == '0') {
72  std::string res = str.substr(0, MAX2((int)str.size() - max, (int)endpos + 1));
73  return res;
74  }
75  return str;
76 }
77 
78 std::string
79 StringUtils::to_lower_case(const std::string& str) {
80  std::string s = str;
81  std::transform(s.begin(), s.end(), s.begin(), [](char c) {
82  return (char)::tolower(c);
83  });
84  return s;
85 }
86 
87 
88 std::string
89 StringUtils::latin1_to_utf8(std::string str) {
90  // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
91  std::string result;
92  for (const auto& c : str) {
93  const unsigned char uc = (unsigned char)c;
94  if (uc < 128) {
95  result += uc;
96  } else {
97  result += (char)(0xc2 + (uc > 0xbf));
98  result += (char)((uc & 0x3f) + 0x80);
99  }
100  }
101  return result;
102 }
103 
104 
105 std::string
106 StringUtils::convertUmlaute(std::string str) {
107  str = replace(str, "\xE4", "ae");
108  str = replace(str, "\xC4", "Ae");
109  str = replace(str, "\xF6", "oe");
110  str = replace(str, "\xD6", "Oe");
111  str = replace(str, "\xFC", "ue");
112  str = replace(str, "\xDC", "Ue");
113  str = replace(str, "\xDF", "ss");
114  str = replace(str, "\xC9", "E");
115  str = replace(str, "\xE9", "e");
116  str = replace(str, "\xC8", "E");
117  str = replace(str, "\xE8", "e");
118  return str;
119 }
120 
121 
122 std::string
123 StringUtils::replace(std::string str, const std::string& what, const std::string& by) {
124  std::string::size_type idx = str.find(what);
125  const int what_len = (int)what.length();
126  if (what_len > 0) {
127  const int by_len = (int)by.length();
128  while (idx != std::string::npos) {
129  str = str.replace(idx, what_len, by);
130  idx = str.find(what, idx + by_len);
131  }
132  }
133  return str;
134 }
135 
136 
137 std::string
138 StringUtils::substituteEnvironment(const std::string& str, const std::chrono::time_point<std::chrono::system_clock>* const timeRef) {
139  std::string s = str;
140  if (timeRef != nullptr) {
141  const std::string::size_type localTimeIndex = str.find("${LOCALTIME}");
142  const std::string::size_type utcIndex = str.find("${UTC}");
143  const bool isUTC = utcIndex != std::string::npos;
144  if (localTimeIndex != std::string::npos || isUTC) {
145  const time_t rawtime = std::chrono::system_clock::to_time_t(*timeRef);
146  char buffer [80];
147  struct tm* timeinfo = isUTC ? gmtime(&rawtime) : localtime(&rawtime);
148  strftime(buffer, 80, "%Y-%m-%d-%H-%M-%S.", timeinfo);
149  auto seconds = std::chrono::time_point_cast<std::chrono::seconds>(*timeRef);
150  auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(*timeRef - seconds);
151  const std::string micro = buffer + toString(microseconds.count());
152  if (isUTC) {
153  s.replace(utcIndex, 6, micro);
154  } else {
155  s.replace(localTimeIndex, 12, micro);
156  }
157  }
158  }
159  const std::string::size_type pidIndex = str.find("${PID}");
160  if (pidIndex != std::string::npos) {
161 #ifdef WIN32
162  s.replace(pidIndex, 6, toString(::GetCurrentProcessId()));
163 #else
164  s.replace(pidIndex, 6, toString(::getpid()));
165 #endif
166  }
167  if (std::getenv("SUMO_LOGO") == nullptr) {
168  s = replace(s, "${SUMO_LOGO}", "${SUMO_HOME}/data/logo/sumo-128x138.png");
169  }
170  const std::string::size_type tildeIndex = str.find("~");
171  if (tildeIndex == 0) {
172  s.replace(0, 1, "${HOME}");
173  }
174  s = replace(s, ",~", ",${HOME}");
175 #ifdef WIN32
176  if (std::getenv("HOME") == nullptr) {
177  s = replace(s, "${HOME}", "${USERPROFILE}");
178  }
179 #endif
180 
181  // Expression for an environment variables, e.g. ${NAME}
182  // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
183  // - .+? looks for the shortest match (non-greedy)
184  // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
185  std::regex envVarExpr(R"(\$\{(.+?)\})");
186 
187  // Are there any variables in this string?
188  std::smatch match;
189  std::string strIter = s;
190 
191  // Loop over the entire value string and look for variable names
192  while (std::regex_search(strIter, match, envVarExpr)) {
193  std::string varName = match[1];
194 
195  // Find the variable in the environment and its value
196  std::string varValue;
197  if (std::getenv(varName.c_str()) != nullptr) {
198  varValue = std::getenv(varName.c_str());
199  }
200 
201  // Replace the variable placeholder with its value in the original string
202  s = std::regex_replace(s, std::regex("\\$\\{" + varName + "\\}"), varValue);
203 
204  // Continue the loop with the remainder of the string
205  strIter = match.suffix();
206  }
207  return s;
208 }
209 
210 
211 bool
212 StringUtils::startsWith(const std::string& str, const std::string prefix) {
213  return str.compare(0, prefix.length(), prefix) == 0;
214 }
215 
216 
217 bool
218 StringUtils::endsWith(const std::string& str, const std::string suffix) {
219  if (str.length() >= suffix.length()) {
220  return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
221  } else {
222  return false;
223  }
224 }
225 
226 
227 std::string
228 StringUtils::padFront(const std::string& str, int length, char padding) {
229  return std::string(MAX2(0, length - (int)str.size()), padding) + str;
230 }
231 
232 
233 std::string
234 StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
235  std::string result = replace(orig, "&", "&amp;");
236  result = replace(result, ">", "&gt;");
237  result = replace(result, "<", "&lt;");
238  result = replace(result, "\"", "&quot;");
239  if (maskDoubleHyphen) {
240  result = replace(result, "--", "&#45;&#45;");
241  }
242  for (char invalid = '\1'; invalid < ' '; invalid++) {
243  result = replace(result, std::string(1, invalid).c_str(), "");
244  }
245  return replace(result, "'", "&apos;");
246 }
247 
248 
249 std::string
250 StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
251  std::ostringstream out;
252 
253  for (int i = 0; i < (int)toEncode.length(); ++i) {
254  const char t = toEncode.at(i);
255 
256  if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
257  (encodeWhich == "" &&
258  ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
259  (t >= 65 && t <= 90) || // A-Z
260  t == 95 || // underscore
261  (t >= 97 && t <= 122) || // a-z
262  t == 126)) // tilde
263  ) {
264  out << toEncode.at(i);
265  } else {
266  out << charToHex(toEncode.at(i));
267  }
268  }
269 
270  return out.str();
271 }
272 
273 
274 std::string
275 StringUtils::urlDecode(const std::string& toDecode) {
276  std::ostringstream out;
277 
278  for (int i = 0; i < (int)toDecode.length(); ++i) {
279  if (toDecode.at(i) == '%') {
280  std::string str(toDecode.substr(i + 1, 2));
281  out << hexToChar(str);
282  i += 2;
283  } else {
284  out << toDecode.at(i);
285  }
286  }
287 
288  return out.str();
289 }
290 
291 std::string
292 StringUtils::charToHex(unsigned char c) {
293  short i = c;
294 
295  std::stringstream s;
296 
297  s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
298 
299  return s.str();
300 }
301 
302 
303 unsigned char
304 StringUtils::hexToChar(const std::string& str) {
305  short c = 0;
306  if (!str.empty()) {
307  std::istringstream in(str);
308  in >> std::hex >> c;
309  if (in.fail()) {
310  throw NumberFormatException(str + " could not be interpreted as hex");
311  }
312  }
313  return static_cast<unsigned char>(c);
314 }
315 
316 
317 int
318 StringUtils::toInt(const std::string& sData) {
319  long long int result = toLong(sData);
320  if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
321  throw NumberFormatException(toString(result) + " int overflow");
322  }
323  return (int)result;
324 }
325 
326 
327 int
328 StringUtils::toIntSecure(const std::string& sData, int def) {
329  if (sData.length() == 0) {
330  return def;
331  }
332  return toInt(sData);
333 }
334 
335 
336 long long int
337 StringUtils::toLong(const std::string& sData) {
338  const char* const data = sData.c_str();
339  if (data == 0 || data[0] == 0) {
340  throw EmptyData();
341  }
342  char* end;
343  errno = 0;
344 #ifdef WIN32
345  long long int ret = _strtoi64(data, &end, 10);
346 #else
347  long long int ret = strtoll(data, &end, 10);
348 #endif
349  if (errno == ERANGE) {
350  errno = 0;
351  throw NumberFormatException("(long long integer range) " + sData);
352  }
353  if ((int)(end - data) != (int)strlen(data)) {
354  throw NumberFormatException("(long long integer format) " + sData);
355  }
356  return ret;
357 }
358 
359 
360 int
361 StringUtils::hexToInt(const std::string& sData) {
362  if (sData.length() == 0) {
363  throw EmptyData();
364  }
365  size_t idx = 0;
366  int result;
367  try {
368  if (sData[0] == '#') { // for html color codes
369  result = std::stoi(sData.substr(1), &idx, 16);
370  idx++;
371  } else {
372  result = std::stoi(sData, &idx, 16);
373  }
374  } catch (...) {
375  throw NumberFormatException("(hex integer format) " + sData);
376  }
377  if (idx != sData.length()) {
378  throw NumberFormatException("(hex integer format) " + sData);
379  }
380  return result;
381 }
382 
383 
384 double
385 StringUtils::toDouble(const std::string& sData) {
386  if (sData.size() == 0) {
387  throw EmptyData();
388  }
389  try {
390  size_t idx = 0;
391  const double result = std::stod(sData, &idx);
392  if (idx != sData.size()) {
393  throw NumberFormatException("(double format) " + sData);
394  } else {
395  return result;
396  }
397  } catch (...) {
398  // invalid_argument or out_of_range
399  throw NumberFormatException("(double) " + sData);
400  }
401 }
402 
403 
404 double
405 StringUtils::toDoubleSecure(const std::string& sData, const double def) {
406  if (sData.length() == 0) {
407  return def;
408  }
409  return toDouble(sData);
410 }
411 
412 
413 bool
414 StringUtils::toBool(const std::string& sData) {
415  if (sData.length() == 0) {
416  throw EmptyData();
417  }
418  const std::string s = to_lower_case(sData);
419  if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
420  return true;
421  }
422  if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
423  return false;
424  }
425  throw BoolFormatException(s);
426 }
427 
428 MMVersion
429 StringUtils::toVersion(const std::string& sData) {
430  std::vector<std::string> parts = StringTokenizer(sData, ".").getVector();
431  return MMVersion(toInt(parts.front()), toDouble(parts.back()));
432 }
433 
434 
435 double
436 StringUtils::parseDist(const std::string& sData) {
437  if (sData.size() == 0) {
438  throw EmptyData();
439  }
440  try {
441  size_t idx = 0;
442  const double result = std::stod(sData, &idx);
443  if (idx != sData.size()) {
444  const std::string unit = prune(sData.substr(idx));
445  if (unit == "m" || unit == "metre" || unit == "meter" || unit == "metres" || unit == "meters") {
446  return result;
447  }
448  if (unit == "km" || unit == "kilometre" || unit == "kilometer" || unit == "kilometres" || unit == "kilometers") {
449  return result * 1000.;
450  }
451  if (unit == "mi" || unit == "mile" || unit == "miles") {
452  return result * 1000. * KM_PER_MILE;
453  }
454  if (unit == "nmi") {
455  return result * 1852.;
456  }
457  if (unit == "ft" || unit == "foot" || unit == "feet") {
458  return result * 12. * 0.0254;
459  }
460  if (unit == "\"" || unit == "in" || unit == "inch" || unit == "inches") {
461  return result * 0.0254;
462  }
463  if (unit[0] == '\'') {
464  double inches = 12 * result;
465  if (unit.length() > 1) {
466  inches += std::stod(unit.substr(1), &idx);
467  if (unit.substr(idx) == "\"") {
468  return inches * 0.0254;
469  }
470  }
471  }
472  throw NumberFormatException("(distance format) " + sData);
473  } else {
474  return result;
475  }
476  } catch (...) {
477  // invalid_argument or out_of_range
478  throw NumberFormatException("(double) " + sData);
479  }
480 }
481 
482 
483 double
484 StringUtils::parseSpeed(const std::string& sData, const bool defaultKmph) {
485  if (sData.size() == 0) {
486  throw EmptyData();
487  }
488  try {
489  size_t idx = 0;
490  const double result = std::stod(sData, &idx);
491  if (idx != sData.size()) {
492  const std::string unit = prune(sData.substr(idx));
493  if (unit == "km/h" || unit == "kph" || unit == "kmh" || unit == "kmph") {
494  return result / 3.6;
495  }
496  if (unit == "m/s") {
497  return result;
498  }
499  if (unit == "mph") {
500  return result * KM_PER_MILE / 3.6;
501  }
502  if (unit == "knots") {
503  return result * 1.852 / 3.6;
504  }
505  throw NumberFormatException("(speed format) " + sData);
506  } else {
507  return defaultKmph ? result / 3.6 : result;
508  }
509  } catch (...) {
510  // invalid_argument or out_of_range
511  throw NumberFormatException("(double) " + sData);
512  }
513 }
514 
515 
516 std::string
517 StringUtils::transcode(const XMLCh* const data, int length) {
518  if (data == 0) {
519  throw EmptyData();
520  }
521  if (length == 0) {
522  return "";
523  }
524 #if _XERCES_VERSION < 30100
525  char* t = XERCES_CPP_NAMESPACE::XMLString::transcode(data);
526  std::string result(t);
527  XERCES_CPP_NAMESPACE::XMLString::release(&t);
528  return result;
529 #else
530  try {
531  XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
532  return reinterpret_cast<const char*>(utf8.str());
533  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
534  return "?";
535  }
536 #endif
537 }
538 
539 
540 std::string
541 StringUtils::transcodeFromLocal(const std::string& localString) {
542 #if _XERCES_VERSION > 30100
543  try {
544  if (myLCPTranscoder == nullptr) {
545  myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
546  }
547  if (myLCPTranscoder != nullptr) {
548  return transcode(myLCPTranscoder->transcode(localString.c_str()));
549  }
550  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
551 #endif
552  return localString;
553 }
554 
555 
556 std::string
557 StringUtils::transcodeToLocal(const std::string& utf8String) {
558 #if _XERCES_VERSION > 30100
559  try {
560  if (myLCPTranscoder == nullptr) {
561  myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
562  }
563  if (myLCPTranscoder != nullptr) {
564  XERCES_CPP_NAMESPACE::TranscodeFromStr utf8(reinterpret_cast<const XMLByte*>(utf8String.c_str()), utf8String.size(), "UTF-8");
565  return myLCPTranscoder->transcode(utf8.str());
566  }
567  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
568 #endif
569  return utf8String;
570 }
571 
572 
573 std::string
574 StringUtils::trim_left(const std::string s, const std::string& t) {
575  std::string result = s;
576  result.erase(0, s.find_first_not_of(t));
577  return result;
578 }
579 
580 std::string
581 StringUtils::trim_right(const std::string s, const std::string& t) {
582  std::string result = s;
583  result.erase(s.find_last_not_of(t) + 1);
584  return result;
585 }
586 
587 std::string
588 StringUtils::trim(const std::string s, const std::string& t) {
589  return trim_right(trim_left(s, t), t);
590 }
591 
592 
593 std::string
594 StringUtils::wrapText(const std::string s, int width) {
595  std::vector<std::string> parts = StringTokenizer(s).getVector();
596  std::string result;
597  std::string line;
598  bool firstLine = true;
599  bool firstWord = true;
600  for (std::string p : parts) {
601  if ((int)(line.size() + p.size()) < width || firstWord) {
602  if (firstWord) {
603  firstWord = false;
604  } else {
605  line += " ";
606  }
607  line = line + p;
608  } else {
609  if (firstLine) {
610  firstLine = false;
611  } else {
612  result += "\n";
613  }
614  result = result + line;
615  line.clear();
616  firstWord = true;
617  }
618  }
619  if (line.size() > 0) {
620  if (firstLine) {
621  firstLine = false;
622  } else {
623  result += "\n";
624  }
625  result = result + line;
626  }
627  return result;
628 }
629 
630 
631 void
633  myLCPTranscoder = nullptr;
634 }
635 
636 /****************************************************************************/
std::pair< int, double > MMVersion
(M)ajor/(M)inor version for written networks and default version for loading
Definition: StdDefs.h:67
T MAX2(T a, T b)
Definition: StdDefs.h:82
#define KM_PER_MILE
Definition: StringUtils.cpp:44
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition: ToString.h:46
std::vector< std::string > getVector()
return vector of strings
static std::string pruneZeros(const std::string &str, int max)
Removes trailing zeros (at most 'max')
Definition: StringUtils.cpp:69
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
encode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static MMVersion toVersion(const std::string &sData)
parse a (network) version string
static std::string charToHex(unsigned char c)
char to hexadecimal
static std::string urlDecode(const std::string &encoded)
decode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
static std::string trim(const std::string s, const std::string &t=" \t\n")
remove leading and trailing whitespace
static std::string to_lower_case(const std::string &str)
Transfers the content to lower case.
Definition: StringUtils.cpp:79
static void resetTranscoder()
must be called when shutting down the xml subsystem
static XERCES_CPP_NAMESPACE::XMLLCPTranscoder * myLCPTranscoder
Definition: StringUtils.h:211
static std::string trim_right(const std::string s, const std::string &t=" \t\n")
remove trailing whitespace from string
static std::string trim_left(const std::string s, const std::string &t=" \t\n")
remove leading whitespace from string
static std::string replace(std::string str, const std::string &what, const std::string &by)
Replaces all occurrences of the second string by the third string within the first string.
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
Definition: StringUtils.cpp:89
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
Definition: StringUtils.cpp:58
static std::string padFront(const std::string &str, int length, char padding)
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
static double parseDist(const std::string &sData)
parse a distance, length or width value with a unit
static unsigned char hexToChar(const std::string &str)
hexadecimal to char
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string wrapText(const std::string s, int width)
remove leading and trailing whitespace
static double parseSpeed(const std::string &sData, const bool defaultKmph=true)
parse a speed value with a unit
static std::string emptyString
An empty string.
Definition: StringUtils.h:86
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
static std::string substituteEnvironment(const std::string &str, const std::chrono::time_point< std::chrono::system_clock > *const timeRef=nullptr)
Replaces an environment variable with its value (similar to bash); syntax for a variable is ${NAME}.
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
Definition: StringUtils.h:152
static std::string transcodeToLocal(const std::string &utf8String)
convert a string from UTF-8 to the local codepage
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
static std::string transcodeFromLocal(const std::string &localString)
convert a string from the local codepage to UTF-8
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter