Line data Source code
1 : /****************************************************************************/ 2 : // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.dev/sumo 3 : // Copyright (C) 2002-2024 German Aerospace Center (DLR) and others. 4 : // This program and the accompanying materials are made available under the 5 : // terms of the Eclipse Public License 2.0 which is available at 6 : // https://www.eclipse.org/legal/epl-2.0/ 7 : // This Source Code may also be made available under the following Secondary 8 : // Licenses when the conditions for such availability set forth in the Eclipse 9 : // Public License 2.0 are satisfied: GNU General Public License, version 2 10 : // or later which is available at 11 : // https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html 12 : // SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later 13 : /****************************************************************************/ 14 : /// @file GenericSAXHandler.h 15 : /// @author Daniel Krajzewicz 16 : /// @author Jakob Erdmann 17 : /// @author Michael Behrisch 18 : /// @date Sept 2002 19 : /// 20 : // A handler which converts occurring elements and attributes into enums 21 : /****************************************************************************/ 22 : #pragma once 23 : #include <config.h> 24 : 25 : #include <string> 26 : #include <map> 27 : #include <stack> 28 : #include <sstream> 29 : #include <vector> 30 : #include <xercesc/sax2/Attributes.hpp> 31 : #include <xercesc/sax2/DefaultHandler.hpp> 32 : #include <utils/common/UtilExceptions.h> 33 : #include <utils/common/StringBijection.h> 34 : #include "SUMOSAXAttributes.h" 35 : 36 : 37 : // =========================================================================== 38 : // class definitions 39 : // =========================================================================== 40 : /** 41 : * @class GenericSAXHandler 42 : * @brief A handler which converts occurring elements and attributes into enums 43 : * 44 : * Normally, when using a standard SAX-handler, we would have to compare 45 : * the incoming XMLCh*-element names with the ones we can parse. The same 46 : * applies to parsing the attributes. This was assumed to be very time consuming, 47 : * that's why we derive our handlers from this class. 48 : * 49 : * The idea behind this second handler layer was avoid repeated conversion 50 : * from strings/whatever to XMLCh* and back again. The usage is quite straight 51 : * forward, the only overhead is the need to define the enums - both elements 52 : * and attributes within "SUMOXMLDefinitions". Still, it maybe helps to avoid typos. 53 : * 54 : * This class implements the SAX-callback and offers a new set of callbacks 55 : * which must be implemented by derived classes. Instead of XMLCh*-values, 56 : * element names are supplied to the derived classes as enums (int). 57 : * 58 : * Also, this class allows to retrieve attributes using enums (int) within 59 : * the implemented "myStartElement" method. 60 : * 61 : * Basically, GenericSAXHandler is not derived within SUMO directly, but via SUMOSAXHandler 62 : * which knows all tags/attributes used by SUMO. It is still kept separate for 63 : * an easier maintainability and later extensions. 64 : */ 65 : class GenericSAXHandler : public XERCES_CPP_NAMESPACE::DefaultHandler { 66 : 67 : public: 68 : /** 69 : * @brief Constructor 70 : * 71 : * This constructor gets the lists of known tag and attribute names with 72 : * their enums (sumotags and sumoattrs in most cases). The end of the list 73 : * is signaled by terminatorTag/terminatorAttr respectively. 74 : * 75 : * The attribute names are converted into XMLCh* and stored within an 76 : * internal container. This container is cleared within the destructor. 77 : * 78 : * @param[in] tags The list of known tags 79 : * @param[in] terminatorTag The tag which indicates the end of tags (usually the last entry) 80 : * @param[in] attrs The list of known attributes 81 : * @param[in] terminatorAttr The attr which indicates the end of attrs (usually the last entry) 82 : * @param[in] file The name of the processed file 83 : * @param[in] expectedRoot The expected root element, empty string disables the check 84 : * 85 : * @todo Why are both lists non-const and given as pointers? 86 : */ 87 : GenericSAXHandler( 88 : StringBijection<int>::Entry* tags, int terminatorTag, 89 : StringBijection<int>::Entry* attrs, int terminatorAttr, 90 : const std::string& file, const std::string& expectedRoot = ""); 91 : 92 : 93 : /** @brief Destructor */ 94 : virtual ~GenericSAXHandler(); 95 : 96 : 97 : /** 98 : * @brief The inherited method called when a new tag opens 99 : * 100 : * The method parses the supplied XMLCh*-qname using the internal name/enum-map 101 : * to obtain the enum representation of the attribute name. 102 : * 103 : * Then, "myStartElement" is called supplying the enumeration value, the 104 : * string-representation of the name and the attributes. 105 : * 106 : * @todo recheck/describe encoding of the string-representation 107 : * @todo do not generate and report the string-representation 108 : */ 109 : void startElement(const XMLCh* const uri, const XMLCh* const localname, 110 : const XMLCh* const qname, const XERCES_CPP_NAMESPACE::Attributes& attrs); 111 : 112 : 113 : /** 114 : * @brief The inherited method called when characters occurred 115 : * 116 : * The retrieved characters are converted into a string and appended into a 117 : * private buffer. They are reported as soon as the element ends. 118 : * 119 : * @todo recheck/describe what happens with characters when a new element is opened 120 : * @todo describe characters processing in the class' head 121 : */ 122 : void characters(const XMLCh* const chars, const XERCES3_SIZE_t length); 123 : 124 : 125 : /** 126 : * @brief The inherited method called when a tag is being closed 127 : * 128 : * This method calls the user-implemented methods myCharacters with the previously 129 : * collected and converted characters. 130 : * 131 : * Then, myEndElement is called, supplying it the qname converted to its enum- 132 : * and string-representations. 133 : * 134 : * @todo recheck/describe encoding of the string-representation 135 : * @todo do not generate and report the string-representation 136 : */ 137 : void endElement(const XMLCh* const uri, const XMLCh* const localname, 138 : const XMLCh* const qname); 139 : 140 : 141 : /** 142 : * @brief Assigning a parent handler which is enabled when the specified tag is closed 143 : */ 144 : void registerParent(const int tag, GenericSAXHandler* handler); 145 : 146 : 147 : /** 148 : * @brief Sets the current file name 149 : * 150 : * @param[in] name The name of the currently processed file 151 : * 152 : * @todo Hmmm - this is as unsafe as having a direct access to the variable; recheck 153 : */ 154 : void setFileName(const std::string& name); 155 : 156 : 157 : /** 158 : * @brief returns the current file name 159 : * 160 : * @return The name of the currently processed file 161 : */ 162 : const std::string& getFileName() const; 163 : 164 : 165 : /// @name SAX ErrorHandler callbacks 166 : //@{ 167 : 168 : /** 169 : * @brief Handler for XML-warnings 170 : * 171 : * The message is built using buildErrorMessage and reported 172 : * to the warning-instance of the MsgHandler. 173 : * 174 : * @param[in] exception The occurred exception to process 175 : */ 176 : void warning(const XERCES_CPP_NAMESPACE::SAXParseException& exception); 177 : 178 : 179 : /** 180 : * @brief Handler for XML-errors 181 : * 182 : * The message is built using buildErrorMessage and thrown within a ProcessError. 183 : * 184 : * @param[in] exception The occurred exception to process 185 : * @exception ProcessError On any call 186 : */ 187 : void error(const XERCES_CPP_NAMESPACE::SAXParseException& exception); 188 : 189 : 190 : /** 191 : * @brief Handler for XML-errors 192 : * 193 : * The message is built using buildErrorMessage and thrown within a ProcessError. 194 : * 195 : * @exception ProcessError On any call 196 : * @param[in] exception The occurred exception to process 197 : */ 198 : void fatalError(const XERCES_CPP_NAMESPACE::SAXParseException& exception); 199 : //@} 200 : 201 : 202 : void setSection(const int element, const bool seen) { 203 474 : mySection = element; 204 474 : mySectionSeen = seen; 205 474 : mySectionOpen = seen; 206 474 : mySectionEnded = false; 207 474 : } 208 : 209 : bool sectionFinished() const { 210 5207156 : return mySectionEnded; 211 : } 212 : 213 : std::pair<int, SUMOSAXAttributes*> retrieveNextSectionStart() { 214 297 : std::pair<int, SUMOSAXAttributes*> ret = myNextSectionStart; 215 297 : myNextSectionStart.first = -1; 216 297 : myNextSectionStart.second = nullptr; 217 : return ret; 218 : } 219 : 220 : void needsCharacterData(const bool value = true) { 221 24 : myCollectCharacterData = value; 222 3 : } 223 : 224 : // Reader needs access to myStartElement, myEndElement 225 : friend class SUMOSAXReader; 226 : 227 : 228 : protected: 229 : /** 230 : * @brief Builds an error message 231 : * 232 : * The error message includes the file name and the line/column information 233 : * as supported by the given SAXParseException 234 : * 235 : * @param[in] exception The name of the currently processed file 236 : * @return A string describing the given exception 237 : */ 238 : std::string buildErrorMessage(const XERCES_CPP_NAMESPACE::SAXParseException& exception); 239 : 240 : 241 : /** 242 : * @brief Callback method for an opening tag to implement by derived classes 243 : * 244 : * Called by "startElement" (see there). 245 : * @param[in] element The element that contains the characters, given as a int 246 : * @param[in] attrs The SAX-attributes, wrapped as SUMOSAXAttributes 247 : * @exceptions ProcessError These method may throw a ProcessError if something fails 248 : */ 249 : virtual void myStartElement(int element, 250 : const SUMOSAXAttributes& attrs); 251 : 252 : 253 : /** 254 : * @brief Callback method for characters to implement by derived classes 255 : * 256 : * Called by "endElement" (see there). 257 : * @param[in] element The opened element, given as a int 258 : * @param[in] chars The complete embedded character string 259 : * @exceptions ProcessError These method may throw a ProcessError if something fails 260 : */ 261 : virtual void myCharacters(int element, 262 : const std::string& chars); 263 : 264 : 265 : /** @brief Callback method for a closing tag to implement by derived classes 266 : * 267 : * Called by "endElement" (see there). 268 : * @param[in] element The closed element, given as a int 269 : * @exceptions ProcessError These method may throw a ProcessError if something fails 270 : */ 271 : virtual void myEndElement(int element); 272 : 273 : /// @brief signal endElement to the parent handler (special case for MSCalibrator) 274 : void callParentEnd(int element); 275 : 276 : private: 277 : /** 278 : * @brief converts from c++-string into unicode 279 : * 280 : * @todo recheck encoding 281 : * @param[in] name The string to convert 282 : * @return The string converted into a XMLCh-string 283 : */ 284 : XMLCh* convert(const std::string& name) const; 285 : 286 : 287 : /** 288 : * @brief Converts a tag from its string into its numerical representation 289 : * 290 : * Returns the enum-representation stored for the given tag. If the tag is not 291 : * known, SUMO_TAG_NOTHING is returned. 292 : * @param[in] tag The string to convert 293 : * @return The int-value that represents the string, SUMO_TAG_NOTHING if the named attribute is not known 294 : */ 295 : int convertTag(const std::string& tag) const; 296 : 297 : 298 : private: 299 : /// @name attributes parsing 300 : //@{ 301 : 302 : // the type of the map from ids to their unicode-string representation 303 : typedef std::vector<XMLCh*> AttrMap; 304 : 305 : // the map from ids to their unicode-string representation 306 : AttrMap myPredefinedTags; 307 : 308 : /// the map from ids to their string representation 309 : std::vector<std::string> myPredefinedTagsMML; 310 : //@} 311 : 312 : 313 : /// @name elements parsing 314 : //@{ 315 : 316 : // the type of the map that maps tag names to ints 317 : typedef std::map<std::string, int> TagMap; 318 : 319 : // the map of tag names to their internal numerical representation 320 : TagMap myTagMap; 321 : //@} 322 : 323 : /// A list of character strings obtained so far to build the complete characters string at the end 324 : std::vector<std::string> myCharactersVector; 325 : 326 : /// @brief The handler to give control back to 327 : GenericSAXHandler* myParentHandler; 328 : 329 : /// @brief The tag indicating that control should be given back 330 : int myParentIndicator; 331 : 332 : /// @brief The name of the currently parsed file 333 : std::string myFileName; 334 : 335 : /// @brief The root element to expect, empty string disables the check 336 : std::string myExpectedRoot; 337 : 338 : /// @brief whether the reader should collect character data 339 : bool myCollectCharacterData = false; 340 : 341 : /// @brief whether the reader has already seen the root element 342 : bool myRootSeen = false; 343 : 344 : /// @brief The tag indicating the current section to parse 345 : int mySection = -1; 346 : 347 : /// @brief whether the reader has already seen the begin of the section 348 : bool mySectionSeen = false; 349 : 350 : /// @brief whether the reader has already seen the end of the section 351 : bool mySectionEnded = false; 352 : 353 : /// @brief whether an element of the current section is open 354 : bool mySectionOpen = false; 355 : 356 : std::pair<int, SUMOSAXAttributes*> myNextSectionStart; 357 : 358 : private: 359 : /// @brief invalidated copy constructor 360 : GenericSAXHandler(const GenericSAXHandler& s); 361 : 362 : /// @brief invalidated assignment operator 363 : const GenericSAXHandler& operator=(const GenericSAXHandler& s); 364 : 365 : };