00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "Teuchos_XMLParser.hpp"
00038 #include "Teuchos_TreeBuildingXMLHandler.hpp"
00039 #include "Teuchos_TestForException.hpp"
00040
00041 using namespace Teuchos;
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112 XMLObject XMLParser::parse()
00113 {
00114
00115 RCP<TreeBuildingXMLHandler> handler = rcp(new TreeBuildingXMLHandler());
00116
00117 _entities.clear();
00118 _entities["apos"] = "'";
00119 _entities["quot"] = "\"";
00120 _entities["lt"] = "<";
00121 _entities["gt"] = ">";
00122 _entities["amp"] = "&";
00123
00124 bool done = false;
00125 int curopen = 0;
00126 bool gotRoot = false;
00127
00128 while (!done) {
00129
00130 std::string tag, cdata;
00131 unsigned char c1, c2;
00132 Teuchos::map<std::string,string> attrs;
00133
00134
00135 if (curopen == 0) {
00136
00137 if ( getSpace(c1) ) {
00138 done = true;
00139 break;
00140 }
00141 }
00142 else {
00143
00144 if (_is->readBytes(&c1,1) < 1) {
00145 done = true;
00146 break;
00147 }
00148 }
00149
00150 if (c1 == '<') {
00151
00152
00153 TEST_FOR_EXCEPTION( _is->readBytes(&c2,1) < 1 , std::runtime_error, "XMLParser::parse(): stream ended in tag begin/end");
00154
00155 if (c2 == '/') {
00156
00157
00158 getETag(tag);
00159 TEST_FOR_EXCEPTION( handler->endElement(tag) , std::runtime_error , "XMLParser::getETag(): document not well-formed: end element tag did not match start element");
00160 curopen--;
00161 }
00162 else if (isLetter(c2) || c2==':' || c2=='_') {
00163
00164 bool emptytag;
00165 getSTag(c2, tag, attrs, emptytag);
00166 handler->startElement(tag,attrs);
00167 if (curopen == 0) {
00168 TEST_FOR_EXCEPTION(gotRoot == true, std::runtime_error , "XMLParser::getETag(): document not well-formed: more than one root element specified");
00169 gotRoot = true;
00170 }
00171 curopen++;
00172 if (emptytag) {
00173 TEST_FOR_EXCEPTION( handler->endElement(tag) , std::runtime_error , "XMLParser::getETag(): document not well-formed: end element tag did not match start element");
00174 curopen--;
00175 }
00176 }
00177 else if (c2 == '!') {
00178
00179
00180
00181
00182
00183 TEST_FOR_EXCEPTION( assertChar('-') , std::runtime_error , "XMLParser::parse(): element not well-formed or exploits unsupported feature" );
00184 TEST_FOR_EXCEPTION( assertChar('-') , std::runtime_error , "XMLParser::parse(): element not well-formed or exploits unsupported feature" );
00185 getComment();
00186 }
00187 else {
00188 TEST_FOR_EXCEPTION(1, std::runtime_error, "XMLParser::parse(): element not well-formed or exploits unsupported feature" );
00189 }
00190 }
00191 else if ( (curopen > 0) && (c1 == '&') ) {
00192 std::string chars = "";
00193 getReference(chars);
00194 handler->characters(chars);
00195 }
00196 else if ( (curopen > 0) ) {
00197 std::string chars = "";
00198 chars.push_back(c1);
00199 handler->characters(chars);
00200 }
00201 else {
00202 TEST_FOR_EXCEPTION(1,std::runtime_error,"XMLParser::parse(): document not well-formed");
00203 }
00204 }
00205
00206 TEST_FOR_EXCEPTION( curopen != 0 , std::runtime_error, "XMLParser::parse(): document not well-formed: elements not matched" );
00207
00208 return handler->getObject();
00209
00210 }
00211
00212
00213 void XMLParser::getETag(std::string &tag)
00214 {
00215
00216
00217
00218
00219
00220
00221
00222 bool tagover = false;
00223 unsigned char c;
00224
00225 tag = "";
00226 TEST_FOR_EXCEPTION( _is->readBytes(&c,1) < 1 , std::runtime_error , "XMLParser::getETag(): EOF before end element was terminated");
00227 TEST_FOR_EXCEPTION( !isLetter(c) && c!='_' && c!=':' , std::runtime_error , "XMLParser::getETag(): tag not well-formed");
00228 tag.push_back(c);
00229 while (1) {
00230 TEST_FOR_EXCEPTION( _is->readBytes(&c,1) < 1 , std::runtime_error , "XMLParser::getETag(): EOF before end element was terminated");
00231 if ( isNameChar(c) ) {
00232 if (tagover) {
00233 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getETag(): end element not well-formed: expected '>'");
00234 }
00235 tag.push_back(c);
00236 }
00237 else if (isSpace(c)) {
00238
00239 tagover = true;
00240 }
00241 else if (c == '>') {
00242 break;
00243 }
00244 else {
00245 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getETag(): end element not well-formed");
00246 }
00247 }
00248 }
00249
00250
00251 void XMLParser::getSTag(unsigned char lookahead, std::string &tag, Teuchos::map<std::string,string> &attrs, bool &emptytag)
00252 {
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273 unsigned char c;
00274 attrs.clear();
00275
00276 tag = lookahead;
00277
00278 while (1) {
00279 TEST_FOR_EXCEPTION( _is->readBytes(&c,1) < 1 , std::runtime_error , "XMLParser::getSTag(): EOF before start element was terminated");
00280 if (isNameChar(c)) {
00281 tag.push_back(c);
00282 }
00283 else {
00284 break;
00285 }
00286 }
00287
00288
00289
00290 do {
00291
00292 bool hadspace = false;
00293
00294
00295 if ( isSpace(c) ) {
00296 hadspace = true;
00297 TEST_FOR_EXCEPTION( getSpace(c) , std::runtime_error , "XMLParser::getSTag(): EOF before start element was terminated");
00298 }
00299
00300
00301 if ( (isLetter(c) || c=='_' || c==':') && hadspace ) {
00302
00303
00304
00305 std::string attname, attval;
00306 attname = c;
00307 do {
00308 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getSTag(): EOF before start element was terminated");
00309 if ( isNameChar(c) ) {
00310 attname.push_back(c);
00311 }
00312 else if ( isSpace(c) || c=='=' ) {
00313 break;
00314 }
00315 else {
00316 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getSTag(): attribute not well-formed: expected whitespace or '='");
00317 }
00318 } while (1);
00319
00320
00321 if (isSpace(c)) {
00322 getSpace(c);
00323 }
00324
00325 if (c != '=') {
00326 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getSTag(): attribute not well-formed: expected '='");
00327 }
00328
00329
00330 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getSTag(): EOF before start element was terminated");
00331 if (isSpace(c)) {
00332 getSpace(c);
00333 }
00334
00335
00336 bool apost;
00337 attval = "";
00338 if (c == '\'') {
00339 apost = true;
00340 }
00341 else if (c == '\"') {
00342 apost = false;
00343 }
00344 else {
00345 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getSTag(): attribute value must be quoted with either ''' or '\"'");
00346 }
00347 do {
00348 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getSTag(): EOF before start element was terminated");
00349 if (apost && c=='\'') {
00350
00351 break;
00352 }
00353 else if (!apost && c=='\"') {
00354
00355 break;
00356 }
00357 else if ( c == '&' ) {
00358
00359 std::string refstr;
00360 getReference(refstr);
00361 attval += refstr;
00362 }
00363 else if ( c!='<' ) {
00364
00365 attval.push_back(c);
00366 }
00367 else {
00368 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getSTag(): invalid character in attribute value");
00369 }
00370 } while(1);
00371
00372
00373 TEST_FOR_EXCEPTION( attrs.find(attname) != attrs.end() , std::runtime_error , "XMLParser::getSTag(): cannot have two attributes with the same name");
00374 attrs[attname] = attval;
00375 }
00376 else if (c == '>') {
00377 emptytag = false;
00378 break;
00379 }
00380 else if (c == '/') {
00381 TEST_FOR_EXCEPTION(assertChar('>'), std::runtime_error , "XMLParser::getSTag(): empty element tag not well-formed: expected '>'");
00382 emptytag = true;
00383 break;
00384 }
00385 else {
00386 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getSTag(): start element not well-formed: invalid character");
00387 }
00388
00389
00390 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getSTag(): EOF before start element was terminated");
00391
00392 } while(1);
00393 }
00394
00395
00396 void XMLParser::getComment()
00397 {
00398
00399
00400
00401
00402
00403
00404
00405 unsigned char c;
00406 while (1) {
00407 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getComment(): EOF before comment was terminated");
00408
00409 if (c=='-') {
00410
00411 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getComment(): EOF before comment was terminated");
00412 if (c=='-') {
00413
00414 TEST_FOR_EXCEPTION( assertChar('>') , std::runtime_error , "XMLParser::getComment(): comment not well-formed: expected '>'");
00415 break;
00416 }
00417 else if (!isChar(c)) {
00418 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getComment(): comment not well-formed: invalid character");
00419 }
00420 }
00421 else if (!isChar(c)) {
00422 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getComment(): comment not well-formed: invalid character");
00423 }
00424 }
00425 }
00426
00427
00428 void XMLParser::getReference(std::string &refstr) {
00429
00430 unsigned char c;
00431 unsigned int num, base;
00432 refstr = "";
00433 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getReference(): EOF before reference was terminated");
00434 if (c == '#') {
00435
00436
00437
00438
00439 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getReference(): EOF before reference was terminated");
00440 if (c == 'x') {
00441 base = 16;
00442 num = 0;
00443 }
00444 else if ('0' <= c && c <= '9') {
00445 base = 10;
00446 num = c - '0';
00447 }
00448 else {
00449 TEST_FOR_EXCEPTION(1, std::runtime_error, "XMLParser::getReference(): invalid character in character reference: expected 'x' or [0-9]");
00450 }
00451
00452 do {
00453 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getReference(): EOF before reference was terminated");
00454 TEST_FOR_EXCEPTION( c != ';' && !('0' <= c && c <= '9') , std::runtime_error , "XMLParser::getReference(): invalid character in character reference: expected [0-9] or ';'");
00455 if (c == ';') {
00456 break;
00457 }
00458 num = num*base + (c-'0');
00459 } while (1);
00460 TEST_FOR_EXCEPTION(num > 0xFF, std::runtime_error , "XMLParser::getReference(): character reference value out of range");
00461 refstr.push_back( (unsigned char)num );
00462 }
00463 else if (isLetter(c) || c=='_' || c==':') {
00464
00465
00466 std::string entname = "";
00467 entname.push_back(c);
00468 do {
00469 TEST_FOR_EXCEPTION(_is->readBytes(&c,1) < 1, std::runtime_error , "XMLParser::getReference(): EOF before reference was terminated");
00470 if (c==';') {
00471 break;
00472 }
00473 else if ( isLetter(c) || ('0' <= c && c <= '9')
00474 || c=='.' || c=='-' || c=='_' || c==':'
00475 || c==0xB7 ) {
00476 entname.push_back(c);
00477 }
00478 else {
00479 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getReference(): entity reference not well-formed: invalid character");
00480 }
00481 } while (1);
00482 TEST_FOR_EXCEPTION( _entities.find(entname) == _entities.end(), std::runtime_error , "XMLParser::getReference(): entity reference not well-formed: undefined entity");
00483 refstr = _entities[entname];
00484 }
00485 else {
00486 TEST_FOR_EXCEPTION(1, std::runtime_error , "XMLParser::getReference(): reference not well-formed: expected name or '#'");
00487 }
00488 }
00489
00490
00491 int XMLParser::getSpace(unsigned char &lookahead) {
00492
00493 do {
00494 if (_is->readBytes(&lookahead,1) < 1) {
00495 return 1;
00496 }
00497 }
00498 while (isSpace(lookahead));
00499 return 0;
00500 }
00501
00502
00503 bool XMLParser::isLetter(unsigned char c) {
00504 if ( (0x41 <= c && c <= 0x5A) || (0x61 <= c && c <= 0x7A) ||
00505 (0xC0 <= c && c <= 0xD6) || (0xD8 <= c && c <= 0xF6) ||
00506 (0xF8 <= c) )
00507 {
00508 return true;
00509 }
00510 return false;
00511 }
00512
00513
00514 bool XMLParser::isNameChar(unsigned char c) {
00515 if ( isLetter(c) || ('0' <= c && c <= '9') ||
00516 c=='.' || c=='-' || c=='_' || c==':' || c==0xB7 )
00517 {
00518 return true;
00519 }
00520 return false;
00521 }
00522
00523
00524 bool XMLParser::isSpace(unsigned char c) {
00525 if ( c==0x20 || c==0x9 || c==0xD || c==0xA )
00526 {
00527 return true;
00528 }
00529 return false;
00530 }
00531
00532
00533 bool XMLParser::isChar(unsigned char c) {
00534 if ( c==0x9 || c==0xA || c==0xD || 0x20 <= c) {
00535 return true;
00536 }
00537 return false;
00538 }
00539
00540
00541 int XMLParser::assertChar(unsigned char cexp)
00542 {
00543
00544
00545 unsigned char c;
00546 if (_is->readBytes(&c,1) < 1) {
00547 return 1;
00548 }
00549 if (c != cexp) {
00550 return 2;
00551 }
00552 return 0;
00553 }
00554