Eclipse SUMO - Simulation of Urban MObility
SUMOSAXReader.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo
3 // Copyright (C) 2012-2024 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials are made available under the
5 // terms of the Eclipse Public License 2.0 which is available at
6 // https://www.eclipse.org/legal/epl-2.0/
7 // This Source Code may also be made available under the following Secondary
8 // Licenses when the conditions for such availability set forth in the Eclipse
9 // Public License 2.0 are satisfied: GNU General Public License, version 2
10 // or later which is available at
11 // https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12 // SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13 /****************************************************************************/
20 // SAX-reader encapsulation
21 /****************************************************************************/
22 #include <config.h>
23 
24 #include <string>
25 #include <memory>
26 #include <iostream>
27 #include <xercesc/sax2/XMLReaderFactory.hpp>
28 #include <xercesc/framework/LocalFileInputSource.hpp>
29 #include <xercesc/framework/MemBufInputSource.hpp>
30 
33 #include <utils/common/ToString.h>
35 #include "GenericSAXHandler.h"
36 #ifdef HAVE_ZLIB
37 #include <foreign/zstr/zstr.hpp>
38 #endif
39 #include "IStreamInputSource.h"
40 #include "SUMOSAXReader.h"
41 
42 using XERCES_CPP_NAMESPACE::SAX2XMLReader;
43 using XERCES_CPP_NAMESPACE::XMLUni;
44 
45 
46 // ===========================================================================
47 // method definitions
48 // ===========================================================================
49 
50 SUMOSAXReader::SUMOSAXReader(GenericSAXHandler& handler, const std::string& validationScheme, XERCES_CPP_NAMESPACE::XMLGrammarPool* grammarPool) :
51  myHandler(nullptr),
52  myValidationScheme(validationScheme),
53  myGrammarPool(grammarPool),
54  myXMLReader(nullptr),
55  myIStream(nullptr),
56  myInputStream(nullptr),
57  mySchemaResolver(true, false),
58  myLocalResolver(false, false),
59  myNoOpResolver(false, true),
60  myNextSection(-1, nullptr) {
61  setHandler(handler);
62 }
63 
64 
66  delete myXMLReader;
67  delete myNextSection.second;
68 }
69 
70 
71 void
73  myHandler = &handler;
74  if (myXMLReader != nullptr) {
75  myXMLReader->setContentHandler(&handler);
76  myXMLReader->setErrorHandler(&handler);
77  }
78 }
79 
80 
81 void
82 SUMOSAXReader::setValidation(std::string validationScheme) {
83  // The settings ensure that by default (validationScheme "local" or "never") no network access occurs
84  // this is achieved by either resolving no entities at all (myNoOpResolver) or resolving only
85  // to local files (myLocalResolver). Thus we can safely disable the Sonar warnings in the parse methods below.
86  if (myXMLReader != nullptr && validationScheme != myValidationScheme) {
87  if (validationScheme == "") {
88  validationScheme = myValidationScheme;
89  }
90  // see here https://svn.apache.org/repos/asf/xerces/c/trunk/samples/src/SAX2Count/SAX2Count.cpp for the way to set features
91  if (validationScheme == "never") {
92  myXMLReader->setEntityResolver(&myNoOpResolver);
93  myXMLReader->setProperty(XMLUni::fgXercesScannerName, (void*)XMLUni::fgWFXMLScanner);
94  } else {
95  myXMLReader->setEntityResolver(validationScheme == "local" ? &myLocalResolver : &mySchemaResolver);
96  myXMLReader->setProperty(XMLUni::fgXercesScannerName, (void*)XMLUni::fgIGXMLScanner);
97  myXMLReader->setFeature(XMLUni::fgXercesSchema, true);
98  myXMLReader->setFeature(XMLUni::fgSAX2CoreValidation, true);
99  myXMLReader->setFeature(XMLUni::fgXercesDynamic, validationScheme == "local" || validationScheme == "auto");
100  myXMLReader->setFeature(XMLUni::fgXercesUseCachedGrammarInParse, myValidationScheme == "always");
101  }
102  }
103  myValidationScheme = validationScheme;
104 }
105 
106 
107 void
108 SUMOSAXReader::parse(std::string systemID) {
109  if (!FileHelpers::isReadable(systemID)) {
110  throw IOError(TLF("Cannot read file '%'!", systemID));
111  }
112  if (FileHelpers::isDirectory(systemID)) {
113  throw IOError(TLF("File '%' is a directory!", systemID));
114  }
115  ensureSAXReader();
116 #ifdef HAVE_ZLIB
117  zstr::ifstream istream(StringUtils::transcodeToLocal(systemID).c_str(), std::fstream::in | std::fstream::binary);
118  myXMLReader->parse(IStreamInputSource(istream)); // NOSONAR
119 #else
120  myXMLReader->parse(StringUtils::transcodeToLocal(systemID).c_str()); // NOSONAR
121 #endif
122 }
123 
124 
125 void
126 SUMOSAXReader::parseString(std::string content) {
127  ensureSAXReader();
128  XERCES_CPP_NAMESPACE::MemBufInputSource memBufIS((const XMLByte*)content.c_str(), content.size(), "registrySettings");
129  myXMLReader->parse(memBufIS); // NOSONAR
130 }
131 
132 
133 bool
134 SUMOSAXReader::parseFirst(std::string systemID) {
135  if (!FileHelpers::isReadable(systemID)) {
136  throw IOError(TLF("Cannot read file '%'!", systemID));
137  }
138  if (FileHelpers::isDirectory(systemID)) {
139  throw IOError(TLF("File '%' is a directory!", systemID));
140  }
141  ensureSAXReader();
142  myToken = XERCES_CPP_NAMESPACE::XMLPScanToken();
143 #ifdef HAVE_ZLIB
144  myIStream = std::unique_ptr<zstr::ifstream>(new zstr::ifstream(StringUtils::transcodeToLocal(systemID).c_str(), std::fstream::in | std::fstream::binary));
145  myInputStream = std::unique_ptr<IStreamInputSource>(new IStreamInputSource(*myIStream));
146  return myXMLReader->parseFirst(*myInputStream, myToken); // NOSONAR
147 #else
148  return myXMLReader->parseFirst(StringUtils::transcodeToLocal(systemID).c_str(), myToken); // NOSONAR
149 #endif
150 }
151 
152 
153 bool
155  if (myXMLReader == nullptr) {
156  throw ProcessError(TL("The XML-parser was not initialized."));
157  }
158  return myXMLReader->parseNext(myToken);
159 }
160 
161 
162 bool
164  if (myXMLReader == nullptr) {
165  throw ProcessError(TL("The XML-parser was not initialized."));
166  }
167  bool started = false;
168  if (myNextSection.first != -1) {
169  started = myNextSection.first == element;
171  delete myNextSection.second;
172  myNextSection.first = -1;
173  myNextSection.second = nullptr;
174  }
175  myHandler->setSection(element, started);
176  while (!myHandler->sectionFinished()) {
177  if (!myXMLReader->parseNext(myToken)) {
178  return false;
179  }
180  }
182  return true;
183 }
184 
185 
186 void
188  if (myXMLReader == nullptr) {
189  myXMLReader = XERCES_CPP_NAMESPACE::XMLReaderFactory::createXMLReader(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager, myGrammarPool);
190  if (myXMLReader == nullptr) {
191  throw ProcessError(TL("The XML-parser could not be build."));
192  }
193  setValidation();
194  myXMLReader->setContentHandler(myHandler);
195  myXMLReader->setErrorHandler(myHandler);
196  }
197 }
198 
199 
200 SUMOSAXReader::LocalSchemaResolver::LocalSchemaResolver(const bool haveFallback, const bool noOp) :
201  myHaveFallback(haveFallback),
202  myNoOp(noOp) {
203 }
204 
205 
206 XERCES_CPP_NAMESPACE::InputSource*
207 SUMOSAXReader::LocalSchemaResolver::resolveEntity(const XMLCh* const /* publicId */, const XMLCh* const systemId) {
208  if (myNoOp) {
209  return new XERCES_CPP_NAMESPACE::MemBufInputSource((const XMLByte*)"", 0, "");
210  }
211  const std::string url = StringUtils::transcode(systemId);
212  const std::string::size_type pos = url.find("/xsd/");
213  if (pos != std::string::npos) {
214  const char* sumoPath = std::getenv("SUMO_HOME");
215  // no need for a warning if SUMO_HOME is not set, global preparsing should have done it.
216  if (sumoPath != nullptr) {
217  const std::string file = sumoPath + std::string("/data") + url.substr(pos);
218  if (FileHelpers::isReadable(file)) {
219  XMLCh* t = XERCES_CPP_NAMESPACE::XMLString::transcode(file.c_str());
220  XERCES_CPP_NAMESPACE::InputSource* const result = new XERCES_CPP_NAMESPACE::LocalFileInputSource(t);
221  XERCES_CPP_NAMESPACE::XMLString::release(&t);
222  return result;
223  } else {
224  WRITE_WARNING("Cannot read local schema '" + file + (myHaveFallback ? "', will try website lookup." : "', XML validation will fail."));
225  }
226  }
227  }
228  if (myHaveFallback || (!StringUtils::startsWith(url, "http:") && !StringUtils::startsWith(url, "https:") && !StringUtils::startsWith(url, "ftp:"))) {
229  return nullptr;
230  }
231  return new XERCES_CPP_NAMESPACE::MemBufInputSource((const XMLByte*)"", 0, "");
232 }
233 
234 
235 /****************************************************************************/
#define WRITE_WARNING(msg)
Definition: MsgHandler.h:295
#define TL(string)
Definition: MsgHandler.h:315
#define TLF(string,...)
Definition: MsgHandler.h:317
static bool isReadable(std::string path)
Checks whether the given file is readable.
Definition: FileHelpers.cpp:51
static bool isDirectory(std::string path)
Checks whether the given file is a directory.
Definition: FileHelpers.cpp:65
A handler which converts occurring elements and attributes into enums.
bool sectionFinished() const
void setSection(const int element, const bool seen)
std::pair< int, SUMOSAXAttributes * > retrieveNextSectionStart()
virtual void myStartElement(int element, const SUMOSAXAttributes &attrs)
Callback method for an opening tag to implement by derived classes.
Xerces InputSource reading from arbitrary std::istream.
LocalSchemaResolver(const bool haveFallback, const bool noOp)
constructor
XERCES_CPP_NAMESPACE::InputSource * resolveEntity(const XMLCh *const publicId, const XMLCh *const systemId)
resolve entity
bool parseSection(int element)
Continue a progressive parse started by parseFirst until the given element is encountered.
std::unique_ptr< IStreamInputSource > myInputStream
input stream
std::string myValidationScheme
Information whether built reader/parser shall validate XML-documents against schemata.
SUMOSAXReader(GenericSAXHandler &handler, const std::string &validationScheme, XERCES_CPP_NAMESPACE::XMLGrammarPool *grammarPool)
Constructor.
std::pair< int, SUMOSAXAttributes * > myNextSection
next section
void setHandler(GenericSAXHandler &handler)
Sets the given handler as content and error handler for the reader.
void parseString(std::string content)
Parse XML from the given string.
LocalSchemaResolver myLocalResolver
local resolver
bool parseFirst(std::string systemID)
Start parsing the given file using parseFirst of myXMLReader.
std::unique_ptr< std::istream > myIStream
istream
LocalSchemaResolver mySchemaResolver
schema resolver
void setValidation(std::string validationScheme="")
Sets a new validation scheme and applies the validation settings to the XML reader.
~SUMOSAXReader()
Destructor.
GenericSAXHandler * myHandler
generic SAX Handler
void parse(std::string systemID)
Parse the given file completely by calling parse of myXMLReader.
XERCES_CPP_NAMESPACE::SAX2XMLReader * myXMLReader
XML reader.
LocalSchemaResolver myNoOpResolver
no operation resolver
XERCES_CPP_NAMESPACE::XMLGrammarPool * myGrammarPool
Schema cache to be used for grammars which are not declared.
XERCES_CPP_NAMESPACE::XMLPScanToken myToken
token
void ensureSAXReader()
Builds a reader, if needed.
bool parseNext()
Continue a progressive parse started by parseFirst.
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
Definition: StringUtils.h:146
static std::string transcodeToLocal(const std::string &utf8String)
convert a string from UTF-8 to the local codepage