Eclipse SUMO - Simulation of Urban MObility
Loading...
Searching...
No Matches
StringUtils.cpp
Go to the documentation of this file.
1/****************************************************************************/
2// Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3// Copyright (C) 2001-2024 German Aerospace Center (DLR) and others.
4// This program and the accompanying materials are made available under the
5// terms of the Eclipse Public License 2.0 which is available at
6// https://www.eclipse.org/legal/epl-2.0/
7// This Source Code may also be made available under the following Secondary
8// Licenses when the conditions for such availability set forth in the Eclipse
9// Public License 2.0 are satisfied: GNU General Public License, version 2
10// or later which is available at
11// https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13/****************************************************************************/
21// Some static methods for string processing
22/****************************************************************************/
23#include <config.h>
24
25#include <string>
26#include <iostream>
27#include <cstdio>
28#include <cstring>
29#include <regex>
30#ifdef WIN32
31#define NOMINMAX
32#include <windows.h>
33#undef NOMINMAX
34#else
35#include <unistd.h>
36#endif
37#include <xercesc/util/TransService.hpp>
38#include <xercesc/util/TranscodingException.hpp>
42#include "StringUtils.h"
43
44#define KM_PER_MILE 1.609344
45
46
47// ===========================================================================
48// static member definitions
49// ===========================================================================
50std::string StringUtils::emptyString;
51XERCES_CPP_NAMESPACE::XMLLCPTranscoder* StringUtils::myLCPTranscoder = nullptr;
52
53
54// ===========================================================================
55// method definitions
56// ===========================================================================
57std::string
58StringUtils::prune(const std::string& str) {
59 const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
60 if (std::string::npos != endpos) {
61 const int startpos = (int)str.find_first_not_of(" \t\n\r");
62 return str.substr(startpos, endpos - startpos + 1);
63 }
64 return "";
65}
66
67
68std::string
69StringUtils::pruneZeros(const std::string& str, int max) {
70 const std::string::size_type endpos = str.find_last_not_of("0");
71 if (endpos != std::string::npos && str.back() == '0') {
72 std::string res = str.substr(0, MAX2((int)str.size() - max, (int)endpos + 1));
73 return res;
74 }
75 return str;
76}
77
78std::string
79StringUtils::to_lower_case(const std::string& str) {
80 std::string s = str;
81 std::transform(s.begin(), s.end(), s.begin(), [](char c) {
82 return (char)::tolower(c);
83 });
84 return s;
85}
86
87
88std::string
90 // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
91 std::string result;
92 for (const auto& c : str) {
93 const unsigned char uc = (unsigned char)c;
94 if (uc < 128) {
95 result += uc;
96 } else {
97 result += (char)(0xc2 + (uc > 0xbf));
98 result += (char)((uc & 0x3f) + 0x80);
99 }
100 }
101 return result;
102}
103
104
105std::string
107 str = replace(str, "\xE4", "ae");
108 str = replace(str, "\xC4", "Ae");
109 str = replace(str, "\xF6", "oe");
110 str = replace(str, "\xD6", "Oe");
111 str = replace(str, "\xFC", "ue");
112 str = replace(str, "\xDC", "Ue");
113 str = replace(str, "\xDF", "ss");
114 str = replace(str, "\xC9", "E");
115 str = replace(str, "\xE9", "e");
116 str = replace(str, "\xC8", "E");
117 str = replace(str, "\xE8", "e");
118 return str;
119}
120
121
122std::string
123StringUtils::replace(std::string str, const std::string& what, const std::string& by) {
124 std::string::size_type idx = str.find(what);
125 const int what_len = (int)what.length();
126 if (what_len > 0) {
127 const int by_len = (int)by.length();
128 while (idx != std::string::npos) {
129 str = str.replace(idx, what_len, by);
130 idx = str.find(what, idx + by_len);
131 }
132 }
133 return str;
134}
135
136
137std::string
138StringUtils::substituteEnvironment(const std::string& str, const std::chrono::time_point<std::chrono::system_clock>* const timeRef) {
139 std::string s = str;
140 if (timeRef != nullptr) {
141 const std::string::size_type localTimeIndex = str.find("${LOCALTIME}");
142 const std::string::size_type utcIndex = str.find("${UTC}");
143 const bool isUTC = utcIndex != std::string::npos;
144 if (localTimeIndex != std::string::npos || isUTC) {
145 const time_t rawtime = std::chrono::system_clock::to_time_t(*timeRef);
146 char buffer [80];
147 struct tm* timeinfo = isUTC ? gmtime(&rawtime) : localtime(&rawtime);
148 strftime(buffer, 80, "%Y-%m-%d-%H-%M-%S.", timeinfo);
149 auto seconds = std::chrono::time_point_cast<std::chrono::seconds>(*timeRef);
150 auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(*timeRef - seconds);
151 const std::string micro = buffer + toString(microseconds.count());
152 if (isUTC) {
153 s.replace(utcIndex, 6, micro);
154 } else {
155 s.replace(localTimeIndex, 12, micro);
156 }
157 }
158 }
159 const std::string::size_type pidIndex = str.find("${PID}");
160 if (pidIndex != std::string::npos) {
161#ifdef WIN32
162 s.replace(pidIndex, 6, toString(::GetCurrentProcessId()));
163#else
164 s.replace(pidIndex, 6, toString(::getpid()));
165#endif
166 }
167 if (std::getenv("SUMO_LOGO") == nullptr) {
168 s = replace(s, "${SUMO_LOGO}", "${SUMO_HOME}/data/logo/sumo-128x138.png");
169 }
170 const std::string::size_type tildeIndex = str.find("~");
171 if (tildeIndex == 0) {
172 s.replace(0, 1, "${HOME}");
173 }
174 s = replace(s, ",~", ",${HOME}");
175#ifdef WIN32
176 if (std::getenv("HOME") == nullptr) {
177 s = replace(s, "${HOME}", "${USERPROFILE}");
178 }
179#endif
180
181 // Expression for an environment variables, e.g. ${NAME}
182 // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
183 // - .+? looks for the shortest match (non-greedy)
184 // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
185 std::regex envVarExpr(R"(\$\{(.+?)\})");
186
187 // Are there any variables in this string?
188 std::smatch match;
189 std::string strIter = s;
190
191 // Loop over the entire value string and look for variable names
192 while (std::regex_search(strIter, match, envVarExpr)) {
193 std::string varName = match[1];
194
195 // Find the variable in the environment and its value
196 std::string varValue;
197 if (std::getenv(varName.c_str()) != nullptr) {
198 varValue = std::getenv(varName.c_str());
199 }
200
201 // Replace the variable placeholder with its value in the original string
202 s = std::regex_replace(s, std::regex("\\$\\{" + varName + "\\}"), varValue);
203
204 // Continue the loop with the remainder of the string
205 strIter = match.suffix();
206 }
207 return s;
208}
209
210
211bool
212StringUtils::startsWith(const std::string& str, const std::string prefix) {
213 return str.compare(0, prefix.length(), prefix) == 0;
214}
215
216
217bool
218StringUtils::endsWith(const std::string& str, const std::string suffix) {
219 if (str.length() >= suffix.length()) {
220 return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
221 } else {
222 return false;
223 }
224}
225
226
227std::string
228StringUtils::padFront(const std::string& str, int length, char padding) {
229 return std::string(MAX2(0, length - (int)str.size()), padding) + str;
230}
231
232
233std::string
234StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
235 std::string result = replace(orig, "&", "&amp;");
236 result = replace(result, ">", "&gt;");
237 result = replace(result, "<", "&lt;");
238 result = replace(result, "\"", "&quot;");
239 if (maskDoubleHyphen) {
240 result = replace(result, "--", "&#45;&#45;");
241 }
242 for (char invalid = '\1'; invalid < ' '; invalid++) {
243 result = replace(result, std::string(1, invalid).c_str(), "");
244 }
245 return replace(result, "'", "&apos;");
246}
247
248
249std::string
250StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
251 std::ostringstream out;
252
253 for (int i = 0; i < (int)toEncode.length(); ++i) {
254 const char t = toEncode.at(i);
255
256 if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
257 (encodeWhich == "" &&
258 ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
259 (t >= 65 && t <= 90) || // A-Z
260 t == 95 || // underscore
261 (t >= 97 && t <= 122) || // a-z
262 t == 126)) // tilde
263 ) {
264 out << toEncode.at(i);
265 } else {
266 out << charToHex(toEncode.at(i));
267 }
268 }
269
270 return out.str();
271}
272
273
274std::string
275StringUtils::urlDecode(const std::string& toDecode) {
276 std::ostringstream out;
277
278 for (int i = 0; i < (int)toDecode.length(); ++i) {
279 if (toDecode.at(i) == '%') {
280 std::string str(toDecode.substr(i + 1, 2));
281 out << hexToChar(str);
282 i += 2;
283 } else {
284 out << toDecode.at(i);
285 }
286 }
287
288 return out.str();
289}
290
291std::string
292StringUtils::charToHex(unsigned char c) {
293 short i = c;
294
295 std::stringstream s;
296
297 s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
298
299 return s.str();
300}
301
302
303unsigned char
304StringUtils::hexToChar(const std::string& str) {
305 short c = 0;
306 if (!str.empty()) {
307 std::istringstream in(str);
308 in >> std::hex >> c;
309 if (in.fail()) {
310 throw NumberFormatException(str + " could not be interpreted as hex");
311 }
312 }
313 return static_cast<unsigned char>(c);
314}
315
316
317int
318StringUtils::toInt(const std::string& sData) {
319 long long int result = toLong(sData);
320 if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
321 throw NumberFormatException(toString(result) + " int overflow");
322 }
323 return (int)result;
324}
325
326
327int
328StringUtils::toIntSecure(const std::string& sData, int def) {
329 if (sData.length() == 0) {
330 return def;
331 }
332 return toInt(sData);
333}
334
335
336long long int
337StringUtils::toLong(const std::string& sData) {
338 const char* const data = sData.c_str();
339 if (data == 0 || data[0] == 0) {
340 throw EmptyData();
341 }
342 char* end;
343 errno = 0;
344#ifdef WIN32
345 long long int ret = _strtoi64(data, &end, 10);
346#else
347 long long int ret = strtoll(data, &end, 10);
348#endif
349 if (errno == ERANGE) {
350 errno = 0;
351 throw NumberFormatException("(long long integer range) " + sData);
352 }
353 if ((int)(end - data) != (int)strlen(data)) {
354 throw NumberFormatException("(long long integer format) " + sData);
355 }
356 return ret;
357}
358
359
360int
361StringUtils::hexToInt(const std::string& sData) {
362 if (sData.length() == 0) {
363 throw EmptyData();
364 }
365 size_t idx = 0;
366 int result;
367 try {
368 if (sData[0] == '#') { // for html color codes
369 result = std::stoi(sData.substr(1), &idx, 16);
370 idx++;
371 } else {
372 result = std::stoi(sData, &idx, 16);
373 }
374 } catch (...) {
375 throw NumberFormatException("(hex integer format) " + sData);
376 }
377 if (idx != sData.length()) {
378 throw NumberFormatException("(hex integer format) " + sData);
379 }
380 return result;
381}
382
383
384double
385StringUtils::toDouble(const std::string& sData) {
386 if (sData.size() == 0) {
387 throw EmptyData();
388 }
389 try {
390 size_t idx = 0;
391 const double result = std::stod(sData, &idx);
392 if (idx != sData.size()) {
393 throw NumberFormatException("(double format) " + sData);
394 } else {
395 return result;
396 }
397 } catch (...) {
398 // invalid_argument or out_of_range
399 throw NumberFormatException("(double) " + sData);
400 }
401}
402
403
404double
405StringUtils::toDoubleSecure(const std::string& sData, const double def) {
406 if (sData.length() == 0) {
407 return def;
408 }
409 return toDouble(sData);
410}
411
412
413bool
414StringUtils::toBool(const std::string& sData) {
415 if (sData.length() == 0) {
416 throw EmptyData();
417 }
418 const std::string s = to_lower_case(sData);
419 if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
420 return true;
421 }
422 if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
423 return false;
424 }
425 throw BoolFormatException(s);
426}
427
429StringUtils::toVersion(const std::string& sData) {
430 std::vector<std::string> parts = StringTokenizer(sData, ".").getVector();
431 return MMVersion(toInt(parts.front()), toDouble(parts.back()));
432}
433
434
435double
436StringUtils::parseDist(const std::string& sData) {
437 if (sData.size() == 0) {
438 throw EmptyData();
439 }
440 try {
441 size_t idx = 0;
442 const double result = std::stod(sData, &idx);
443 if (idx != sData.size()) {
444 const std::string unit = prune(sData.substr(idx));
445 if (unit == "m" || unit == "metre" || unit == "meter" || unit == "metres" || unit == "meters") {
446 return result;
447 }
448 if (unit == "km" || unit == "kilometre" || unit == "kilometer" || unit == "kilometres" || unit == "kilometers") {
449 return result * 1000.;
450 }
451 if (unit == "mi" || unit == "mile" || unit == "miles") {
452 return result * 1000. * KM_PER_MILE;
453 }
454 if (unit == "nmi") {
455 return result * 1852.;
456 }
457 if (unit == "ft" || unit == "foot" || unit == "feet") {
458 return result * 12. * 0.0254;
459 }
460 if (unit == "\"" || unit == "in" || unit == "inch" || unit == "inches") {
461 return result * 0.0254;
462 }
463 if (unit[0] == '\'') {
464 double inches = 12 * result;
465 if (unit.length() > 1) {
466 inches += std::stod(unit.substr(1), &idx);
467 if (unit.substr(idx) == "\"") {
468 return inches * 0.0254;
469 }
470 }
471 }
472 throw NumberFormatException("(distance format) " + sData);
473 } else {
474 return result;
475 }
476 } catch (...) {
477 // invalid_argument or out_of_range
478 throw NumberFormatException("(double) " + sData);
479 }
480}
481
482
483double
484StringUtils::parseSpeed(const std::string& sData, const bool defaultKmph) {
485 if (sData.size() == 0) {
486 throw EmptyData();
487 }
488 try {
489 size_t idx = 0;
490 const double result = std::stod(sData, &idx);
491 if (idx != sData.size()) {
492 const std::string unit = prune(sData.substr(idx));
493 if (unit == "km/h" || unit == "kph" || unit == "kmh" || unit == "kmph") {
494 return result / 3.6;
495 }
496 if (unit == "m/s") {
497 return result;
498 }
499 if (unit == "mph") {
500 return result * KM_PER_MILE / 3.6;
501 }
502 if (unit == "knots") {
503 return result * 1.852 / 3.6;
504 }
505 throw NumberFormatException("(speed format) " + sData);
506 } else {
507 return defaultKmph ? result / 3.6 : result;
508 }
509 } catch (...) {
510 // invalid_argument or out_of_range
511 throw NumberFormatException("(double) " + sData);
512 }
513}
514
515
516std::string
517StringUtils::transcode(const XMLCh* const data, int length) {
518 if (data == 0) {
519 throw EmptyData();
520 }
521 if (length == 0) {
522 return "";
523 }
524#if _XERCES_VERSION < 30100
525 char* t = XERCES_CPP_NAMESPACE::XMLString::transcode(data);
526 std::string result(t);
527 XERCES_CPP_NAMESPACE::XMLString::release(&t);
528 return result;
529#else
530 try {
531 XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
532 return reinterpret_cast<const char*>(utf8.str());
533 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
534 return "?";
535 }
536#endif
537}
538
539
540std::string
541StringUtils::transcodeFromLocal(const std::string& localString) {
542#if _XERCES_VERSION > 30100
543 try {
544 if (myLCPTranscoder == nullptr) {
545 myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
546 }
547 if (myLCPTranscoder != nullptr) {
548 return transcode(myLCPTranscoder->transcode(localString.c_str()));
549 }
550 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
551#endif
552 return localString;
553}
554
555
556std::string
557StringUtils::transcodeToLocal(const std::string& utf8String) {
558#if _XERCES_VERSION > 30100
559 try {
560 if (myLCPTranscoder == nullptr) {
561 myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
562 }
563 if (myLCPTranscoder != nullptr) {
564 XERCES_CPP_NAMESPACE::TranscodeFromStr utf8(reinterpret_cast<const XMLByte*>(utf8String.c_str()), utf8String.size(), "UTF-8");
565 return myLCPTranscoder->transcode(utf8.str());
566 }
567 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
568#endif
569 return utf8String;
570}
571
572
573std::string
574StringUtils::trim_left(const std::string s, const std::string& t) {
575 std::string result = s;
576 result.erase(0, s.find_first_not_of(t));
577 return result;
578}
579
580std::string
581StringUtils::trim_right(const std::string s, const std::string& t) {
582 std::string result = s;
583 result.erase(s.find_last_not_of(t) + 1);
584 return result;
585}
586
587std::string
588StringUtils::trim(const std::string s, const std::string& t) {
589 return trim_right(trim_left(s, t), t);
590}
591
592
593std::string
594StringUtils::wrapText(const std::string s, int width) {
595 std::vector<std::string> parts = StringTokenizer(s).getVector();
596 std::string result;
597 std::string line;
598 bool firstLine = true;
599 bool firstWord = true;
600 for (std::string p : parts) {
601 if ((int)(line.size() + p.size()) < width || firstWord) {
602 if (firstWord) {
603 firstWord = false;
604 } else {
605 line += " ";
606 }
607 line = line + p;
608 } else {
609 if (firstLine) {
610 firstLine = false;
611 } else {
612 result += "\n";
613 }
614 result = result + line;
615 line.clear();
616 firstWord = true;
617 }
618 }
619 if (line.size() > 0) {
620 if (firstLine) {
621 firstLine = false;
622 } else {
623 result += "\n";
624 }
625 result = result + line;
626 }
627 return result;
628}
629
630
631void
635
636/****************************************************************************/
std::pair< int, double > MMVersion
(M)ajor/(M)inor version for written networks and default version for loading
Definition StdDefs.h:67
T MAX2(T a, T b)
Definition StdDefs.h:82
#define KM_PER_MILE
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition ToString.h:46
std::vector< std::string > getVector()
return vector of strings
static std::string pruneZeros(const std::string &str, int max)
Removes trailing zeros (at most 'max')
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
encode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static MMVersion toVersion(const std::string &sData)
parse a (network) version string
static std::string charToHex(unsigned char c)
char to hexadecimal
static std::string urlDecode(const std::string &encoded)
decode url (stem from http://bogomip.net/blog/cpp-url-encoding-and-decoding/)
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
static std::string trim(const std::string s, const std::string &t=" \t\n")
remove leading and trailing whitespace
static std::string to_lower_case(const std::string &str)
Transfers the content to lower case.
static void resetTranscoder()
must be called when shutting down the xml subsystem
static XERCES_CPP_NAMESPACE::XMLLCPTranscoder * myLCPTranscoder
static std::string trim_right(const std::string s, const std::string &t=" \t\n")
remove trailing whitespace from string
static std::string trim_left(const std::string s, const std::string &t=" \t\n")
remove leading whitespace from string
static std::string replace(std::string str, const std::string &what, const std::string &by)
Replaces all occurrences of the second string by the third string within the first string.
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
static std::string padFront(const std::string &str, int length, char padding)
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
static double parseDist(const std::string &sData)
parse a distance, length or width value with a unit
static unsigned char hexToChar(const std::string &str)
hexadecimal to char
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string wrapText(const std::string s, int width)
remove leading and trailing whitespace
static double parseSpeed(const std::string &sData, const bool defaultKmph=true)
parse a speed value with a unit
static std::string emptyString
An empty string.
Definition StringUtils.h:86
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
static std::string substituteEnvironment(const std::string &str, const std::chrono::time_point< std::chrono::system_clock > *const timeRef=nullptr)
Replaces an environment variable with its value (similar to bash); syntax for a variable is ${NAME}.
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
static std::string transcodeToLocal(const std::string &utf8String)
convert a string from UTF-8 to the local codepage
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
static std::string transcodeFromLocal(const std::string &localString)
convert a string from the local codepage to UTF-8
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter