Eclipse SUMO - Simulation of Urban MObility
Loading...
Searching...
No Matches
StringUtils.cpp
Go to the documentation of this file.
1/****************************************************************************/
2// Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3// Copyright (C) 2001-2024 German Aerospace Center (DLR) and others.
4// This program and the accompanying materials are made available under the
5// terms of the Eclipse Public License 2.0 which is available at
6// https://www.eclipse.org/legal/epl-2.0/
7// This Source Code may also be made available under the following Secondary
8// Licenses when the conditions for such availability set forth in the Eclipse
9// Public License 2.0 are satisfied: GNU General Public License, version 2
10// or later which is available at
11// https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13/****************************************************************************/
21// Some static methods for string processing
22/****************************************************************************/
23#include <config.h>
24
25#include <string>
26#include <iostream>
27#include <cstdio>
28#include <cstring>
29#include <regex>
30#ifdef WIN32
31#define NOMINMAX
32#include <windows.h>
33#undef NOMINMAX
34#else
35#include <unistd.h>
36#endif
37#include <xercesc/util/TransService.hpp>
38#include <xercesc/util/TranscodingException.hpp>
42#include "StringUtils.h"
43
44#define KM_PER_MILE 1.609344
45
46
47// ===========================================================================
48// static member definitions
49// ===========================================================================
50std::string StringUtils::emptyString;
51XERCES_CPP_NAMESPACE::XMLLCPTranscoder* StringUtils::myLCPTranscoder = nullptr;
52
53
54// ===========================================================================
55// method definitions
56// ===========================================================================
57std::string
58StringUtils::prune(const std::string& str) {
59 const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
60 if (std::string::npos != endpos) {
61 const int startpos = (int)str.find_first_not_of(" \t\n\r");
62 return str.substr(startpos, endpos - startpos + 1);
63 }
64 return "";
65}
66
67
68std::string
69StringUtils::pruneZeros(const std::string& str, int max) {
70 const std::string::size_type endpos = str.find_last_not_of("0");
71 if (endpos != std::string::npos && str.back() == '0') {
72 std::string res = str.substr(0, MAX2((int)str.size() - max, (int)endpos + 1));
73 return res;
74 }
75 return str;
76}
77
78std::string
79StringUtils::to_lower_case(const std::string& str) {
80 std::string s = str;
81 std::transform(s.begin(), s.end(), s.begin(), [](char c) {
82 return (char)::tolower(c);
83 });
84 return s;
85}
86
87
88std::string
90 // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
91 std::string result;
92 for (const auto& c : str) {
93 const unsigned char uc = (unsigned char)c;
94 if (uc < 128) {
95 result += uc;
96 } else {
97 result += (char)(0xc2 + (uc > 0xbf));
98 result += (char)((uc & 0x3f) + 0x80);
99 }
100 }
101 return result;
102}
103
104
105std::string
107 str = replace(str, "\xE4", "ae");
108 str = replace(str, "\xC4", "Ae");
109 str = replace(str, "\xF6", "oe");
110 str = replace(str, "\xD6", "Oe");
111 str = replace(str, "\xFC", "ue");
112 str = replace(str, "\xDC", "Ue");
113 str = replace(str, "\xDF", "ss");
114 str = replace(str, "\xC9", "E");
115 str = replace(str, "\xE9", "e");
116 str = replace(str, "\xC8", "E");
117 str = replace(str, "\xE8", "e");
118 return str;
119}
120
121
122std::string
123StringUtils::replace(std::string str, const std::string& what, const std::string& by) {
124 std::string::size_type idx = str.find(what);
125 const int what_len = (int)what.length();
126 if (what_len > 0) {
127 const int by_len = (int)by.length();
128 while (idx != std::string::npos) {
129 str = str.replace(idx, what_len, by);
130 idx = str.find(what, idx + by_len);
131 }
132 }
133 return str;
134}
135
136
137std::string
138StringUtils::substituteEnvironment(const std::string& str, const std::chrono::time_point<std::chrono::system_clock>* const timeRef) {
139 std::string s = str;
140 if (timeRef != nullptr) {
141 const std::string::size_type localTimeIndex = str.find("${LOCALTIME}");
142 const std::string::size_type utcIndex = str.find("${UTC}");
143 const bool isUTC = utcIndex != std::string::npos;
144 if (localTimeIndex != std::string::npos || isUTC) {
145 const time_t rawtime = std::chrono::system_clock::to_time_t(*timeRef);
146 char buffer [80];
147 struct tm* timeinfo = isUTC ? gmtime(&rawtime) : localtime(&rawtime);
148 strftime(buffer, 80, "%Y-%m-%d-%H-%M-%S.", timeinfo);
149 auto seconds = std::chrono::time_point_cast<std::chrono::seconds>(*timeRef);
150 auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(*timeRef - seconds);
151 const std::string micro = buffer + toString(microseconds.count());
152 if (isUTC) {
153 s.replace(utcIndex, 6, micro);
154 } else {
155 s.replace(localTimeIndex, 12, micro);
156 }
157 }
158 }
159 const std::string::size_type pidIndex = str.find("${PID}");
160 if (pidIndex != std::string::npos) {
161#ifdef WIN32
162 s.replace(pidIndex, 6, toString(::GetCurrentProcessId()));
163#else
164 s.replace(pidIndex, 6, toString(::getpid()));
165#endif
166 }
167 if (std::getenv("SUMO_LOGO") == nullptr) {
168 s = replace(s, "${SUMO_LOGO}", "${SUMO_HOME}/data/logo/sumo-128x138.png");
169 }
170 const std::string::size_type tildeIndex = str.find("~");
171 if (tildeIndex == 0) {
172 s.replace(0, 1, "${HOME}");
173 }
174 s = replace(s, ",~", ",${HOME}");
175#ifdef WIN32
176 if (std::getenv("HOME") == nullptr) {
177 s = replace(s, "${HOME}", "${USERPROFILE}");
178 }
179#endif
180
181 // Expression for an environment variables, e.g. ${NAME}
182 // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
183 // - .+? looks for the shortest match (non-greedy)
184 // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
185 std::regex envVarExpr(R"(\$\{(.+?)\})");
186
187 // Are there any variables in this string?
188 std::smatch match;
189 std::string strIter = s;
190
191 // Loop over the entire value string and look for variable names
192 while (std::regex_search(strIter, match, envVarExpr)) {
193 std::string varName = match[1];
194
195 // Find the variable in the environment and its value
196 std::string varValue;
197 if (std::getenv(varName.c_str()) != nullptr) {
198 varValue = std::getenv(varName.c_str());
199 }
200
201 // Replace the variable placeholder with its value in the original string
202 s = std::regex_replace(s, std::regex("\\$\\{" + varName + "\\}"), varValue);
203
204 // Continue the loop with the remainder of the string
205 strIter = match.suffix();
206 }
207 return s;
208}
209
210
211bool
212StringUtils::startsWith(const std::string& str, const std::string prefix) {
213 return str.compare(0, prefix.length(), prefix) == 0;
214}
215
216
217bool
218StringUtils::endsWith(const std::string& str, const std::string suffix) {
219 if (str.length() >= suffix.length()) {
220 return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
221 } else {
222 return false;
223 }
224}
225
226
227std::string
228StringUtils::padFront(const std::string& str, int length, char padding) {
229 return std::string(MAX2(0, length - (int)str.size()), padding) + str;
230}
231
232
233std::string
234StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
235 std::string result = replace(orig, "&", "&amp;");
236 result = replace(result, ">", "&gt;");
237 result = replace(result, "<", "&lt;");
238 result = replace(result, "\"", "&quot;");
239 if (maskDoubleHyphen) {
240 result = replace(result, "--", "&#45;&#45;");
241 }
242 for (char invalid = '\1'; invalid < ' '; invalid++) {
243 result = replace(result, std::string(1, invalid).c_str(), "");
244 }
245 return replace(result, "'", "&apos;");
246}
247
248
249std::string
250StringUtils::escapeShell(const std::string& orig) {
251 std::string result = replace(orig, "\"", "\\\"");
252 return result;
253}
254
255
256std::string
257StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
258 std::ostringstream out;
259
260 for (int i = 0; i < (int)toEncode.length(); ++i) {
261 const char t = toEncode.at(i);
262
263 if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
264 (encodeWhich == "" &&
265 ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
266 (t >= 65 && t <= 90) || // A-Z
267 t == 95 || // underscore
268 (t >= 97 && t <= 122) || // a-z
269 t == 126)) // tilde
270 ) {
271 out << toEncode.at(i);
272 } else {
273 out << charToHex(toEncode.at(i));
274 }
275 }
276
277 return out.str();
278}
279
280
281std::string
282StringUtils::urlDecode(const std::string& toDecode) {
283 std::ostringstream out;
284
285 for (int i = 0; i < (int)toDecode.length(); ++i) {
286 if (toDecode.at(i) == '%') {
287 std::string str(toDecode.substr(i + 1, 2));
288 out << hexToChar(str);
289 i += 2;
290 } else {
291 out << toDecode.at(i);
292 }
293 }
294
295 return out.str();
296}
297
298std::string
299StringUtils::charToHex(unsigned char c) {
300 short i = c;
301
302 std::stringstream s;
303
304 s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
305
306 return s.str();
307}
308
309
310unsigned char
311StringUtils::hexToChar(const std::string& str) {
312 short c = 0;
313 if (!str.empty()) {
314 std::istringstream in(str);
315 in >> std::hex >> c;
316 if (in.fail()) {
317 throw NumberFormatException(str + " could not be interpreted as hex");
318 }
319 }
320 return static_cast<unsigned char>(c);
321}
322
323
324int
325StringUtils::toInt(const std::string& sData) {
326 long long int result = toLong(sData);
327 if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
328 throw NumberFormatException(toString(result) + " int overflow");
329 }
330 return (int)result;
331}
332
333
334int
335StringUtils::toIntSecure(const std::string& sData, int def) {
336 if (sData.length() == 0) {
337 return def;
338 }
339 return toInt(sData);
340}
341
342
343long long int
344StringUtils::toLong(const std::string& sData) {
345 const char* const data = sData.c_str();
346 if (data == 0 || data[0] == 0) {
347 throw EmptyData();
348 }
349 char* end;
350 errno = 0;
351#ifdef WIN32
352 long long int ret = _strtoi64(data, &end, 10);
353#else
354 long long int ret = strtoll(data, &end, 10);
355#endif
356 if (errno == ERANGE) {
357 errno = 0;
358 throw NumberFormatException("(long long integer range) " + sData);
359 }
360 if ((int)(end - data) != (int)strlen(data)) {
361 throw NumberFormatException("(long long integer format) " + sData);
362 }
363 return ret;
364}
365
366
367int
368StringUtils::hexToInt(const std::string& sData) {
369 if (sData.length() == 0) {
370 throw EmptyData();
371 }
372 size_t idx = 0;
373 int result;
374 try {
375 if (sData[0] == '#') { // for html color codes
376 result = std::stoi(sData.substr(1), &idx, 16);
377 idx++;
378 } else {
379 result = std::stoi(sData, &idx, 16);
380 }
381 } catch (...) {
382 throw NumberFormatException("(hex integer format) " + sData);
383 }
384 if (idx != sData.length()) {
385 throw NumberFormatException("(hex integer format) " + sData);
386 }
387 return result;
388}
389
390
391double
392StringUtils::toDouble(const std::string& sData) {
393 if (sData.size() == 0) {
394 throw EmptyData();
395 }
396 try {
397 size_t idx = 0;
398 const double result = std::stod(sData, &idx);
399 if (idx != sData.size()) {
400 throw NumberFormatException("(double format) " + sData);
401 } else {
402 return result;
403 }
404 } catch (...) {
405 // invalid_argument or out_of_range
406 throw NumberFormatException("(double) " + sData);
407 }
408}
409
410
411double
412StringUtils::toDoubleSecure(const std::string& sData, const double def) {
413 if (sData.length() == 0) {
414 return def;
415 }
416 return toDouble(sData);
417}
418
419
420bool
421StringUtils::toBool(const std::string& sData) {
422 if (sData.length() == 0) {
423 throw EmptyData();
424 }
425 const std::string s = to_lower_case(sData);
426 if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
427 return true;
428 }
429 if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
430 return false;
431 }
432 throw BoolFormatException(s);
433}
434
436StringUtils::toVersion(const std::string& sData) {
437 std::vector<std::string> parts = StringTokenizer(sData, ".").getVector();
438 return MMVersion(toInt(parts.front()), toDouble(parts.back()));
439}
440
441
442double
443StringUtils::parseDist(const std::string& sData) {
444 if (sData.size() == 0) {
445 throw EmptyData();
446 }
447 try {
448 size_t idx = 0;
449 const double result = std::stod(sData, &idx);
450 if (idx != sData.size()) {
451 const std::string unit = prune(sData.substr(idx));
452 if (unit == "m" || unit == "metre" || unit == "meter" || unit == "metres" || unit == "meters") {
453 return result;
454 }
455 if (unit == "km" || unit == "kilometre" || unit == "kilometer" || unit == "kilometres" || unit == "kilometers") {
456 return result * 1000.;
457 }
458 if (unit == "mi" || unit == "mile" || unit == "miles") {
459 return result * 1000. * KM_PER_MILE;
460 }
461 if (unit == "nmi") {
462 return result * 1852.;
463 }
464 if (unit == "ft" || unit == "foot" || unit == "feet") {
465 return result * 12. * 0.0254;
466 }
467 if (unit == "\"" || unit == "in" || unit == "inch" || unit == "inches") {
468 return result * 0.0254;
469 }
470 if (unit[0] == '\'') {
471 double inches = 12 * result;
472 if (unit.length() > 1) {
473 inches += std::stod(unit.substr(1), &idx);
474 if (unit.substr(idx) == "\"") {
475 return inches * 0.0254;
476 }
477 }
478 }
479 throw NumberFormatException("(distance format) " + sData);
480 } else {
481 return result;
482 }
483 } catch (...) {
484 // invalid_argument or out_of_range
485 throw NumberFormatException("(double) " + sData);
486 }
487}
488
489
490double
491StringUtils::parseSpeed(const std::string& sData, const bool defaultKmph) {
492 if (sData.size() == 0) {
493 throw EmptyData();
494 }
495 try {
496 size_t idx = 0;
497 const double result = std::stod(sData, &idx);
498 if (idx != sData.size()) {
499 const std::string unit = prune(sData.substr(idx));
500 if (unit == "km/h" || unit == "kph" || unit == "kmh" || unit == "kmph") {
501 return result / 3.6;
502 }
503 if (unit == "m/s") {
504 return result;
505 }
506 if (unit == "mph") {
507 return result * KM_PER_MILE / 3.6;
508 }
509 if (unit == "knots") {
510 return result * 1.852 / 3.6;
511 }
512 throw NumberFormatException("(speed format) " + sData);
513 } else {
514 return defaultKmph ? result / 3.6 : result;
515 }
516 } catch (...) {
517 // invalid_argument or out_of_range
518 throw NumberFormatException("(double) " + sData);
519 }
520}
521
522
523std::string
524StringUtils::transcode(const XMLCh* const data, int length) {
525 if (data == 0) {
526 throw EmptyData();
527 }
528 if (length == 0) {
529 return "";
530 }
531#if _XERCES_VERSION < 30100
532 char* t = XERCES_CPP_NAMESPACE::XMLString::transcode(data);
533 std::string result(t);
534 XERCES_CPP_NAMESPACE::XMLString::release(&t);
535 return result;
536#else
537 try {
538 XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
539 return reinterpret_cast<const char*>(utf8.str());
540 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
541 return "?";
542 }
543#endif
544}
545
546
547std::string
548StringUtils::transcodeFromLocal(const std::string& localString) {
549#if _XERCES_VERSION > 30100
550 try {
551 if (myLCPTranscoder == nullptr) {
552 myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
553 }
554 if (myLCPTranscoder != nullptr) {
555 return transcode(myLCPTranscoder->transcode(localString.c_str()));
556 }
557 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
558#endif
559 return localString;
560}
561
562
563std::string
564StringUtils::transcodeToLocal(const std::string& utf8String) {
565#if _XERCES_VERSION > 30100
566 try {
567 if (myLCPTranscoder == nullptr) {
568 myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
569 }
570 if (myLCPTranscoder != nullptr) {
571 XERCES_CPP_NAMESPACE::TranscodeFromStr utf8(reinterpret_cast<const XMLByte*>(utf8String.c_str()), utf8String.size(), "UTF-8");
572 return myLCPTranscoder->transcode(utf8.str());
573 }
574 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
575#endif
576 return utf8String;
577}
578
579
580std::string
581StringUtils::trim_left(const std::string s, const std::string& t) {
582 std::string result = s;
583 result.erase(0, s.find_first_not_of(t));
584 return result;
585}
586
587std::string
588StringUtils::trim_right(const std::string s, const std::string& t) {
589 std::string result = s;
590 result.erase(s.find_last_not_of(t) + 1);
591 return result;
592}
593
594std::string
595StringUtils::trim(const std::string s, const std::string& t) {
596 return trim_right(trim_left(s, t), t);
597}
598
599
600std::string
601StringUtils::wrapText(const std::string s, int width) {
602 std::vector<std::string> parts = StringTokenizer(s).getVector();
603 std::string result;
604 std::string line;
605 bool firstLine = true;
606 bool firstWord = true;
607 for (std::string p : parts) {
608 if ((int)(line.size() + p.size()) < width || firstWord) {
609 if (firstWord) {
610 firstWord = false;
611 } else {
612 line += " ";
613 }
614 line = line + p;
615 } else {
616 if (firstLine) {
617 firstLine = false;
618 } else {
619 result += "\n";
620 }
621 result = result + line;
622 line.clear();
623 firstWord = true;
624 }
625 }
626 if (line.size() > 0) {
627 if (firstLine) {
628 firstLine = false;
629 } else {
630 result += "\n";
631 }
632 result = result + line;
633 }
634 return result;
635}
636
637
638void
642
643/****************************************************************************/
std::pair< int, double > MMVersion
(M)ajor/(M)inor version for written networks and default version for loading
Definition StdDefs.h:67
T MAX2(T a, T b)
Definition StdDefs.h:82
#define KM_PER_MILE
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition ToString.h:46
std::vector< std::string > getVector()
return vector of strings
static std::string pruneZeros(const std::string &str, int max)
Removes trailing zeros (at most 'max')
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
encode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static MMVersion toVersion(const std::string &sData)
parse a (network) version string
static std::string charToHex(unsigned char c)
char to hexadecimal
static std::string urlDecode(const std::string &encoded)
decode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
static std::string trim(const std::string s, const std::string &t=" \t\n")
remove leading and trailing whitespace
static std::string to_lower_case(const std::string &str)
Transfers the content to lower case.
static void resetTranscoder()
must be called when shutting down the xml subsystem
static XERCES_CPP_NAMESPACE::XMLLCPTranscoder * myLCPTranscoder
static std::string trim_right(const std::string s, const std::string &t=" \t\n")
remove trailing whitespace from string
static std::string trim_left(const std::string s, const std::string &t=" \t\n")
remove leading whitespace from string
static std::string escapeShell(const std::string &orig)
Escape special characters with backslash.
static std::string replace(std::string str, const std::string &what, const std::string &by)
Replaces all occurrences of the second string by the third string within the first string.
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
static std::string padFront(const std::string &str, int length, char padding)
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
static double parseDist(const std::string &sData)
parse a distance, length or width value with a unit
static unsigned char hexToChar(const std::string &str)
hexadecimal to char
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string wrapText(const std::string s, int width)
remove leading and trailing whitespace
static double parseSpeed(const std::string &sData, const bool defaultKmph=true)
parse a speed value with a unit
static std::string emptyString
An empty string.
Definition StringUtils.h:91
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
static std::string substituteEnvironment(const std::string &str, const std::chrono::time_point< std::chrono::system_clock > *const timeRef=nullptr)
Replaces an environment variable with its value (similar to bash); syntax for a variable is ${NAME}.
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
static std::string transcodeToLocal(const std::string &utf8String)
convert a string from UTF-8 to the local codepage
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
static std::string transcodeFromLocal(const std::string &localString)
convert a string from the local codepage to UTF-8
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter