TDME2 1.9.121
tinyxmlparser.cpp
Go to the documentation of this file.
1/*
2www.sourceforge.net/projects/tinyxml
3Original code by Lee Thomason (www.grinninglizard.com)
4
5This software is provided 'as-is', without any express or implied
6warranty. In no event will the authors be held liable for any
7damages arising from the use of this software.
8
9Permission is granted to anyone to use this software for any
10purpose, including commercial applications, and to alter it and
11redistribute it freely, subject to the following restrictions:
12
131. The origin of this software must not be misrepresented; you must
14not claim that you wrote the original software. If you use this
15software in a product, an acknowledgment in the product documentation
16would be appreciated but is not required.
17
182. Altered source versions must be plainly marked as such, and
19must not be misrepresented as being the original software.
20
213. This notice may not be removed or altered from any source
22distribution.
23*/
24
25#include <ctype.h>
26#include <stddef.h>
27
28#include "tinyxml.h"
29
30//#define DEBUG_PARSER
31#if defined( DEBUG_PARSER )
32 #define TIXML_LOG printf
33#endif
34
35using namespace tinyxml;
36
37// Note tha "PutString" hardcodes the same list. This
38// is less flexible than it appears. Changing the entries
39// or order will break putstring.
40TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] =
41{
42 { "&amp;", 5, '&' },
43 { "&lt;", 4, '<' },
44 { "&gt;", 4, '>' },
45 { "&quot;", 6, '\"' },
46 { "&apos;", 6, '\'' }
47};
48
49// Bunch of unicode info at:
50// http://www.unicode.org/faq/utf_bom.html
51// Including the basic of this table, which determines the #bytes in the
52// sequence from the lead byte. 1 placed for invalid sequences --
53// although the result will be junk, pass it through as much as possible.
54// Beware of the non-characters in UTF-8:
55// ef bb bf (Microsoft "lead bytes")
56// ef bf be
57// ef bf bf
58
59const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
60const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
61const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
62
63const int TiXmlBase::utf8ByteTable[256] =
64{
65 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
67 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
68 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
78 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
79 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
80 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
81 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
82};
83
84
85void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
86{
87 const unsigned long BYTE_MASK = 0xBF;
88 const unsigned long BYTE_MARK = 0x80;
89 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
90
91 if (input < 0x80)
92 *length = 1;
93 else if ( input < 0x800 )
94 *length = 2;
95 else if ( input < 0x10000 )
96 *length = 3;
97 else if ( input < 0x200000 )
98 *length = 4;
99 else
100 { *length = 0; return; } // This code won't covert this correctly anyway.
101
102 output += *length;
103
104 // Scary scary fall throughs.
105 switch (*length)
106 {
107 case 4:
108 --output;
109 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
110 input >>= 6;
111 case 3:
112 --output;
113 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
114 input >>= 6;
115 case 2:
116 --output;
117 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
118 input >>= 6;
119 case 1:
120 --output;
121 *output = (char)(input | FIRST_BYTE_MARK[*length]);
122 }
123}
124
125
126/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
127{
128 // This will only work for low-ascii, everything else is assumed to be a valid
129 // letter. I'm not sure this is the best approach, but it is quite tricky trying
130 // to figure out alhabetical vs. not across encoding. So take a very
131 // conservative approach.
132
133// if ( encoding == TIXML_ENCODING_UTF8 )
134// {
135 if ( anyByte < 127 )
136 return isalpha( anyByte );
137 else
138 return 1; // What else to do? The unicode set is huge...get the english ones right.
139// }
140// else
141// {
142// return isalpha( anyByte );
143// }
144}
145
146
147/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
148{
149 // This will only work for low-ascii, everything else is assumed to be a valid
150 // letter. I'm not sure this is the best approach, but it is quite tricky trying
151 // to figure out alhabetical vs. not across encoding. So take a very
152 // conservative approach.
153
154// if ( encoding == TIXML_ENCODING_UTF8 )
155// {
156 if ( anyByte < 127 )
157 return isalnum( anyByte );
158 else
159 return 1; // What else to do? The unicode set is huge...get the english ones right.
160// }
161// else
162// {
163// return isalnum( anyByte );
164// }
165}
166
167namespace tinyxml {
168
170 {
171 friend class TiXmlDocument;
172 public:
173 void Stamp( const char* now, TiXmlEncoding encoding );
174
175 const TiXmlCursor& Cursor() const { return cursor; }
176
177 private:
178 // Only used by the document!
179 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
180 {
181 assert( start );
182 stamp = start;
183 tabsize = _tabsize;
184 cursor.row = row;
185 cursor.col = col;
186 }
187
189 const char* stamp;
191 };
192
193};
194
195
196void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
197{
198 assert( now );
199
200 // Do nothing if the tabsize is 0.
201 if ( tabsize < 1 )
202 {
203 return;
204 }
205
206 // Get the current row, column.
207 int row = cursor.row;
208 int col = cursor.col;
209 const char* p = stamp;
210 assert( p );
211
212 while ( p < now )
213 {
214 // Treat p as unsigned, so we have a happy compiler.
215 const unsigned char* pU = (const unsigned char*)p;
216
217 // Code contributed by Fletcher Dunn: (modified by lee)
218 switch (*pU) {
219 case 0:
220 // We *should* never get here, but in case we do, don't
221 // advance past the terminating null character, ever
222 return;
223
224 case '\r':
225 // bump down to the next line
226 ++row;
227 col = 0;
228 // Eat the character
229 ++p;
230
231 // Check for \r\n sequence, and treat this as a single character
232 if (*p == '\n') {
233 ++p;
234 }
235 break;
236
237 case '\n':
238 // bump down to the next line
239 ++row;
240 col = 0;
241
242 // Eat the character
243 ++p;
244
245 // Check for \n\r sequence, and treat this as a single
246 // character. (Yes, this bizarre thing does occur still
247 // on some arcane platforms...)
248 if (*p == '\r') {
249 ++p;
250 }
251 break;
252
253 case '\t':
254 // Eat the character
255 ++p;
256
257 // Skip to next tab stop
258 col = (col / tabsize + 1) * tabsize;
259 break;
260
261 case TIXML_UTF_LEAD_0:
262 if ( encoding == TIXML_ENCODING_UTF8 )
263 {
264 if ( *(p+1) && *(p+2) )
265 {
266 // In these cases, don't advance the column. These are
267 // 0-width spaces.
268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
269 p += 3;
270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
271 p += 3;
272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
273 p += 3;
274 else
275 { p +=3; ++col; } // A normal character.
276 }
277 }
278 else
279 {
280 ++p;
281 ++col;
282 }
283 break;
284
285 default:
286 if ( encoding == TIXML_ENCODING_UTF8 )
287 {
288 // Eat the 1 to 4 byte utf8 character.
289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
290 if ( step == 0 )
291 step = 1; // Error case from bad encoding, but handle gracefully.
292 p += step;
293
294 // Just advance one column, of course.
295 ++col;
296 }
297 else
298 {
299 ++p;
300 ++col;
301 }
302 break;
303 }
304 }
305 cursor.row = row;
306 cursor.col = col;
307 assert( cursor.row >= -1 );
308 assert( cursor.col >= -1 );
309 stamp = p;
310 assert( stamp );
311}
312
313
314const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
315{
316 if ( !p || !*p )
317 {
318 return 0;
319 }
320 if ( encoding == TIXML_ENCODING_UTF8 )
321 {
322 while ( *p )
323 {
324 const unsigned char* pU = (const unsigned char*)p;
325
326 // Skip the stupid Microsoft UTF-8 Byte order marks
327 if ( *(pU+0)==TIXML_UTF_LEAD_0
328 && *(pU+1)==TIXML_UTF_LEAD_1
329 && *(pU+2)==TIXML_UTF_LEAD_2 )
330 {
331 p += 3;
332 continue;
333 }
334 else if(*(pU+0)==TIXML_UTF_LEAD_0
335 && *(pU+1)==0xbfU
336 && *(pU+2)==0xbeU )
337 {
338 p += 3;
339 continue;
340 }
341 else if(*(pU+0)==TIXML_UTF_LEAD_0
342 && *(pU+1)==0xbfU
343 && *(pU+2)==0xbfU )
344 {
345 p += 3;
346 continue;
347 }
348
349 if ( IsWhiteSpace( *p ) ) // Still using old rules for white space.
350 ++p;
351 else
352 break;
353 }
354 }
355 else
356 {
357 while ( *p && IsWhiteSpace( *p ) )
358 ++p;
359 }
360
361 return p;
362}
363
364/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
365{
366 for( ;; )
367 {
368 if ( !in->good() ) return false;
369
370 int c = in->peek();
371 // At this scope, we can't get to a document. So fail silently.
372 if ( !IsWhiteSpace( c ) || c <= 0 )
373 return true;
374
375 *tag += (char) in->get();
376 }
377}
378
379/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
380{
381 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
382 while ( in->good() )
383 {
384 int c = in->peek();
385 if ( c == character )
386 return true;
387 if ( c <= 0 ) // Silent failure: can't get document at this scope
388 return false;
389
390 in->get();
391 *tag += (char) c;
392 }
393 return false;
394}
395
396// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
397// "assign" optimization removes over 10% of the execution time.
398//
399const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
400{
401 // Oddly, not supported on some comilers,
402 //name->clear();
403 // So use this:
404 *name = "";
405 assert( p );
406
407 // Names start with letters or underscores.
408 // Of course, in unicode, tinyxml has no idea what a letter *is*. The
409 // algorithm is generous.
410 //
411 // After that, they can be letters, underscores, numbers,
412 // hyphens, or colons. (Colons are valid ony for namespaces,
413 // but tinyxml can't tell namespaces from names.)
414 if ( p && *p
415 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
416 {
417 const char* start = p;
418 while( p && *p
419 && ( IsAlphaNum( (unsigned char ) *p, encoding )
420 || *p == '_'
421 || *p == '-'
422 || *p == '.'
423 || *p == ':' ) )
424 {
425 //(*name) += *p; // expensive
426 ++p;
427 }
428 if ( p-start > 0 ) {
429 name->assign( start, p-start );
430 }
431 return p;
432 }
433 return 0;
434}
435
436const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
437{
438 // Presume an entity, and pull it out.
439 TIXML_STRING ent;
440 int i;
441 *length = 0;
442
443 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
444 {
445 unsigned long ucs = 0;
446 ptrdiff_t delta = 0;
447 unsigned mult = 1;
448
449 if ( *(p+2) == 'x' )
450 {
451 // Hexadecimal.
452 if ( !*(p+3) ) return 0;
453
454 const char* q = p+3;
455 q = strchr( q, ';' );
456
457 if ( !q || !*q ) return 0;
458
459 delta = q-p;
460 --q;
461
462 while ( *q != 'x' )
463 {
464 if ( *q >= '0' && *q <= '9' )
465 ucs += mult * (*q - '0');
466 else if ( *q >= 'a' && *q <= 'f' )
467 ucs += mult * (*q - 'a' + 10);
468 else if ( *q >= 'A' && *q <= 'F' )
469 ucs += mult * (*q - 'A' + 10 );
470 else
471 return 0;
472 mult *= 16;
473 --q;
474 }
475 }
476 else
477 {
478 // Decimal.
479 if ( !*(p+2) ) return 0;
480
481 const char* q = p+2;
482 q = strchr( q, ';' );
483
484 if ( !q || !*q ) return 0;
485
486 delta = q-p;
487 --q;
488
489 while ( *q != '#' )
490 {
491 if ( *q >= '0' && *q <= '9' )
492 ucs += mult * (*q - '0');
493 else
494 return 0;
495 mult *= 10;
496 --q;
497 }
498 }
499 if ( encoding == TIXML_ENCODING_UTF8 )
500 {
501 // convert the UCS to UTF-8
502 ConvertUTF32ToUTF8( ucs, value, length );
503 }
504 else
505 {
506 *value = (char)ucs;
507 *length = 1;
508 }
509 return p + delta + 1;
510 }
511
512 // Now try to match it.
513 for( i=0; i<NUM_ENTITY; ++i )
514 {
515 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
516 {
517 assert( strlen( entity[i].str ) == entity[i].strLength );
518 *value = entity[i].chr;
519 *length = 1;
520 return ( p + entity[i].strLength );
521 }
522 }
523
524 // So it wasn't an entity, its unrecognized, or something like that.
525 *value = *p; // Don't put back the last one, since we return it!
526 //*length = 1; // Leave unrecognized entities - this doesn't really work.
527 // Just writes strange XML.
528 return p+1;
529}
530
531
532bool TiXmlBase::StringEqual( const char* p,
533 const char* tag,
534 bool ignoreCase,
535 TiXmlEncoding encoding )
536{
537 assert( p );
538 assert( tag );
539 if ( !p || !*p )
540 {
541 assert( 0 );
542 return false;
543 }
544
545 const char* q = p;
546
547 if ( ignoreCase )
548 {
549 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
550 {
551 ++q;
552 ++tag;
553 }
554
555 if ( *tag == 0 )
556 return true;
557 }
558 else
559 {
560 while ( *q && *tag && *q == *tag )
561 {
562 ++q;
563 ++tag;
564 }
565
566 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
567 return true;
568 }
569 return false;
570}
571
572const char* TiXmlBase::ReadText( const char* p,
573 TIXML_STRING * text,
574 bool trimWhiteSpace,
575 const char* endTag,
576 bool caseInsensitive,
577 TiXmlEncoding encoding )
578{
579 *text = "";
580 if ( !trimWhiteSpace // certain tags always keep whitespace
581 || !condenseWhiteSpace ) // if true, whitespace is always kept
582 {
583 // Keep all the white space.
584 while ( p && *p
585 && !StringEqual( p, endTag, caseInsensitive, encoding )
586 )
587 {
588 int len;
589 char cArr[4] = { 0, 0, 0, 0 };
590 p = GetChar( p, cArr, &len, encoding );
591 text->append( cArr, len );
592 }
593 }
594 else
595 {
596 bool whitespace = false;
597
598 // Remove leading white space:
599 p = SkipWhiteSpace( p, encoding );
600 while ( p && *p
601 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
602 {
603 if ( *p == '\r' || *p == '\n' )
604 {
605 whitespace = true;
606 ++p;
607 }
608 else if ( IsWhiteSpace( *p ) )
609 {
610 whitespace = true;
611 ++p;
612 }
613 else
614 {
615 // If we've found whitespace, add it before the
616 // new character. Any whitespace just becomes a space.
617 if ( whitespace )
618 {
619 (*text) += ' ';
620 whitespace = false;
621 }
622 int len;
623 char cArr[4] = { 0, 0, 0, 0 };
624 p = GetChar( p, cArr, &len, encoding );
625 if ( len == 1 )
626 (*text) += cArr[0]; // more efficient
627 else
628 text->append( cArr, len );
629 }
630 }
631 }
632 if ( p && *p )
633 p += strlen( endTag );
634 return ( p && *p ) ? p : 0;
635}
636
637
638void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
639{
640 // The basic issue with a document is that we don't know what we're
641 // streaming. Read something presumed to be a tag (and hope), then
642 // identify it, and call the appropriate stream method on the tag.
643 //
644 // This "pre-streaming" will never read the closing ">" so the
645 // sub-tag can orient itself.
646
647 if ( !StreamTo( in, '<', tag ) )
648 {
650 return;
651 }
652
653 while ( in->good() )
654 {
655 int tagIndex = (int) tag->length();
656 while ( in->good() && in->peek() != '>' )
657 {
658 int c = in->get();
659 if ( c <= 0 )
660 {
662 break;
663 }
664 (*tag) += (char) c;
665 }
666
667 if ( in->good() )
668 {
669 // We now have something we presume to be a node of
670 // some sort. Identify it, and call the node to
671 // continue streaming.
672 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
673
674 if ( node )
675 {
676 node->StreamIn( in, tag );
677 bool isElement = node->ToElement() != 0;
678 delete node;
679 node = 0;
680
681 // If this is the root element, we're done. Parsing will be
682 // done by the >> operator.
683 if ( isElement )
684 {
685 return;
686 }
687 }
688 else
689 {
691 return;
692 }
693 }
694 }
695 // We should have returned sooner.
697}
698
699
700const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
701{
702 ClearError();
703
704 // Parse away, at the document level. Since a document
705 // contains nothing but other tags, most of what happens
706 // here is skipping white space.
707 if ( !p || !*p )
708 {
710 return 0;
711 }
712
713 // Note that, for a document, this needs to come
714 // before the while space skip, so that parsing
715 // starts from the pointer we are given.
716 location.Clear();
717 if ( prevData )
718 {
719 location.row = prevData->cursor.row;
720 location.col = prevData->cursor.col;
721 }
722 else
723 {
724 location.row = 0;
725 location.col = 0;
726 }
728 location = data.Cursor();
729
730 if ( encoding == TIXML_ENCODING_UNKNOWN )
731 {
732 // Check for the Microsoft UTF-8 lead bytes.
733 const unsigned char* pU = (const unsigned char*)p;
734 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
735 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
736 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
737 {
738 encoding = TIXML_ENCODING_UTF8;
739 useMicrosoftBOM = true;
740 }
741 }
742
743 p = SkipWhiteSpace( p, encoding );
744 if ( !p )
745 {
747 return 0;
748 }
749
750 while ( p && *p )
751 {
752 TiXmlNode* node = Identify( p, encoding );
753 if ( node )
754 {
755 p = node->Parse( p, &data, encoding );
756 LinkEndChild( node );
757 }
758 else
759 {
760 break;
761 }
762
763 // Did we get encoding info?
764 if ( encoding == TIXML_ENCODING_UNKNOWN
765 && node->ToDeclaration() )
766 {
767 TiXmlDeclaration* dec = node->ToDeclaration();
768 const char* enc = dec->Encoding();
769 assert( enc );
770
771 if ( *enc == 0 )
772 encoding = TIXML_ENCODING_UTF8;
773 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
774 encoding = TIXML_ENCODING_UTF8;
775 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
776 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
777 else
778 encoding = TIXML_ENCODING_LEGACY;
779 }
780
781 p = SkipWhiteSpace( p, encoding );
782 }
783
784 // Was this empty?
785 if ( !firstChild ) {
786 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
787 return 0;
788 }
789
790 // All is well.
791 return p;
792}
793
794void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
795{
796 // The first error in a chain is more accurate - don't set again!
797 if ( error )
798 return;
799
800 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
801 error = true;
802 errorId = err;
804
806 if ( pError && data )
807 {
808 data->Stamp( pError, encoding );
809 errorLocation = data->Cursor();
810 }
811}
812
813
814TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
815{
816 TiXmlNode* returnNode = 0;
817
818 p = SkipWhiteSpace( p, encoding );
819 if( !p || !*p || *p != '<' )
820 {
821 return 0;
822 }
823
824 p = SkipWhiteSpace( p, encoding );
825
826 if ( !p || !*p )
827 {
828 return 0;
829 }
830
831 // What is this thing?
832 // - Elements start with a letter or underscore, but xml is reserved.
833 // - Comments: <!--
834 // - Decleration: <?xml
835 // - Everthing else is unknown to tinyxml.
836 //
837
838 const char* xmlHeader = { "<?xml" };
839 const char* commentHeader = { "<!--" };
840 const char* dtdHeader = { "<!" };
841 const char* cdataHeader = { "<![CDATA[" };
842
843 if ( StringEqual( p, xmlHeader, true, encoding ) )
844 {
845 #ifdef DEBUG_PARSER
846 TIXML_LOG( "XML parsing Declaration\n" );
847 #endif
848 returnNode = new TiXmlDeclaration();
849 }
850 else if ( StringEqual( p, commentHeader, false, encoding ) )
851 {
852 #ifdef DEBUG_PARSER
853 TIXML_LOG( "XML parsing Comment\n" );
854 #endif
855 returnNode = new TiXmlComment();
856 }
857 else if ( StringEqual( p, cdataHeader, false, encoding ) )
858 {
859 #ifdef DEBUG_PARSER
860 TIXML_LOG( "XML parsing CDATA\n" );
861 #endif
862 TiXmlText* text = new TiXmlText( "" );
863 text->SetCDATA( true );
864 returnNode = text;
865 }
866 else if ( StringEqual( p, dtdHeader, false, encoding ) )
867 {
868 #ifdef DEBUG_PARSER
869 TIXML_LOG( "XML parsing Unknown(1)\n" );
870 #endif
871 returnNode = new TiXmlUnknown();
872 }
873 else if ( IsAlpha( *(p+1), encoding )
874 || *(p+1) == '_' )
875 {
876 #ifdef DEBUG_PARSER
877 TIXML_LOG( "XML parsing Element\n" );
878 #endif
879 returnNode = new TiXmlElement( "" );
880 }
881 else
882 {
883 #ifdef DEBUG_PARSER
884 TIXML_LOG( "XML parsing Unknown(2)\n" );
885 #endif
886 returnNode = new TiXmlUnknown();
887 }
888
889 if ( returnNode )
890 {
891 // Set the parent, so it can report errors
892 returnNode->parent = this;
893 }
894 return returnNode;
895}
896
897
898void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
899{
900 // We're called with some amount of pre-parsing. That is, some of "this"
901 // element is in "tag". Go ahead and stream to the closing ">"
902 while( in->good() )
903 {
904 int c = in->get();
905 if ( c <= 0 )
906 {
907 TiXmlDocument* document = GetDocument();
908 if ( document )
910 return;
911 }
912 (*tag) += (char) c ;
913
914 if ( c == '>' )
915 break;
916 }
917
918 if ( tag->length() < 3 ) return;
919
920 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
921 // If not, identify and stream.
922
923 if ( tag->at( tag->length() - 1 ) == '>'
924 && tag->at( tag->length() - 2 ) == '/' )
925 {
926 // All good!
927 return;
928 }
929 else if ( tag->at( tag->length() - 1 ) == '>' )
930 {
931 // There is more. Could be:
932 // text
933 // cdata text (which looks like another node)
934 // closing tag
935 // another node.
936 for ( ;; )
937 {
938 StreamWhiteSpace( in, tag );
939
940 // Do we have text?
941 if ( in->good() && in->peek() != '<' )
942 {
943 // Yep, text.
944 TiXmlText text( "" );
945 text.StreamIn( in, tag );
946
947 // What follows text is a closing tag or another node.
948 // Go around again and figure it out.
949 continue;
950 }
951
952 // We now have either a closing tag...or another node.
953 // We should be at a "<", regardless.
954 if ( !in->good() ) return;
955 assert( in->peek() == '<' );
956 int tagIndex = (int) tag->length();
957
958 bool closingTag = false;
959 bool firstCharFound = false;
960
961 for( ;; )
962 {
963 if ( !in->good() )
964 return;
965
966 int c = in->peek();
967 if ( c <= 0 )
968 {
969 TiXmlDocument* document = GetDocument();
970 if ( document )
972 return;
973 }
974
975 if ( c == '>' )
976 break;
977
978 *tag += (char) c;
979 in->get();
980
981 // Early out if we find the CDATA id.
982 if ( c == '[' && tag->size() >= 9 )
983 {
984 size_t len = tag->size();
985 const char* start = tag->c_str() + len - 9;
986 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
987 assert( !closingTag );
988 break;
989 }
990 }
991
992 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
993 {
994 firstCharFound = true;
995 if ( c == '/' )
996 closingTag = true;
997 }
998 }
999 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
1000 // If it was not, the streaming will be done by the tag.
1001 if ( closingTag )
1002 {
1003 if ( !in->good() )
1004 return;
1005
1006 int c = in->get();
1007 if ( c <= 0 )
1008 {
1009 TiXmlDocument* document = GetDocument();
1010 if ( document )
1012 return;
1013 }
1014 assert( c == '>' );
1015 *tag += (char) c;
1016
1017 // We are done, once we've found our closing tag.
1018 return;
1019 }
1020 else
1021 {
1022 // If not a closing tag, id it, and stream.
1023 const char* tagloc = tag->c_str() + tagIndex;
1024 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1025 if ( !node )
1026 return;
1027 node->StreamIn( in, tag );
1028 delete node;
1029 node = 0;
1030
1031 // No return: go around from the beginning: text, closing tag, or node.
1032 }
1033 }
1034 }
1035}
1036
1037const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1038{
1039 p = SkipWhiteSpace( p, encoding );
1040 TiXmlDocument* document = GetDocument();
1041
1042 if ( !p || !*p )
1043 {
1044 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1045 return 0;
1046 }
1047
1048 if ( data )
1049 {
1050 data->Stamp( p, encoding );
1051 location = data->Cursor();
1052 }
1053
1054 if ( *p != '<' )
1055 {
1056 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1057 return 0;
1058 }
1059
1060 p = SkipWhiteSpace( p+1, encoding );
1061
1062 // Read the name.
1063 const char* pErr = p;
1064
1065 p = ReadName( p, &value, encoding );
1066 if ( !p || !*p )
1067 {
1068 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1069 return 0;
1070 }
1071
1072 TIXML_STRING endTag ("</");
1073 endTag += value;
1074
1075 // Check for and read attributes. Also look for an empty
1076 // tag or an end tag.
1077 while ( p && *p )
1078 {
1079 pErr = p;
1080 p = SkipWhiteSpace( p, encoding );
1081 if ( !p || !*p )
1082 {
1083 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1084 return 0;
1085 }
1086 if ( *p == '/' )
1087 {
1088 ++p;
1089 // Empty tag.
1090 if ( *p != '>' )
1091 {
1092 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1093 return 0;
1094 }
1095 return (p+1);
1096 }
1097 else if ( *p == '>' )
1098 {
1099 // Done with attributes (if there were any.)
1100 // Read the value -- which can include other
1101 // elements -- read the end tag, and return.
1102 ++p;
1103 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
1104 if ( !p || !*p ) {
1105 // We were looking for the end tag, but found nothing.
1106 // Fix for [ 1663758 ] Failure to report error on bad XML
1107 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1108 return 0;
1109 }
1110
1111 // We should find the end tag now
1112 // note that:
1113 // </foo > and
1114 // </foo>
1115 // are both valid end tags.
1116 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1117 {
1118 p += endTag.length();
1119 p = SkipWhiteSpace( p, encoding );
1120 if ( p && *p && *p == '>' ) {
1121 ++p;
1122 return p;
1123 }
1124 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1125 return 0;
1126 }
1127 else
1128 {
1129 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1130 return 0;
1131 }
1132 }
1133 else
1134 {
1135 // Try to read an attribute:
1136 TiXmlAttribute* attrib = new TiXmlAttribute();
1137 if ( !attrib )
1138 {
1139 return 0;
1140 }
1141
1142 attrib->SetDocument( document );
1143 pErr = p;
1144 p = attrib->Parse( p, data, encoding );
1145
1146 if ( !p || !*p )
1147 {
1148 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1149 delete attrib;
1150 return 0;
1151 }
1152
1153 // Handle the strange case of double attributes:
1154 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1155
1156 if ( node )
1157 {
1158 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1159 delete attrib;
1160 return 0;
1161 }
1162
1163 attributeSet.Add( attrib );
1164 }
1165 }
1166 return p;
1167}
1168
1169
1170const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1171{
1172 TiXmlDocument* document = GetDocument();
1173
1174 // Read in text and elements in any order.
1175 const char* pWithWhiteSpace = p;
1176 p = SkipWhiteSpace( p, encoding );
1177
1178 while ( p && *p )
1179 {
1180 if ( *p != '<' )
1181 {
1182 // Take what we have, make a text element.
1183 TiXmlText* textNode = new TiXmlText( "" );
1184
1185 if ( !textNode )
1186 {
1187 return 0;
1188 }
1189
1191 {
1192 p = textNode->Parse( p, data, encoding );
1193 }
1194 else
1195 {
1196 // Special case: we want to keep the white space
1197 // so that leading spaces aren't removed.
1198 p = textNode->Parse( pWithWhiteSpace, data, encoding );
1199 }
1200
1201 if ( !textNode->Blank() )
1202 LinkEndChild( textNode );
1203 else
1204 delete textNode;
1205 }
1206 else
1207 {
1208 // We hit a '<'
1209 // Have we hit a new element or an end tag? This could also be
1210 // a TiXmlText in the "CDATA" style.
1211 if ( StringEqual( p, "</", false, encoding ) )
1212 {
1213 return p;
1214 }
1215 else
1216 {
1217 TiXmlNode* node = Identify( p, encoding );
1218 if ( node )
1219 {
1220 p = node->Parse( p, data, encoding );
1221 LinkEndChild( node );
1222 }
1223 else
1224 {
1225 return 0;
1226 }
1227 }
1228 }
1229 pWithWhiteSpace = p;
1230 p = SkipWhiteSpace( p, encoding );
1231 }
1232
1233 if ( !p )
1234 {
1235 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1236 }
1237 return p;
1238}
1239
1240
1241void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1242{
1243 while ( in->good() )
1244 {
1245 int c = in->get();
1246 if ( c <= 0 )
1247 {
1248 TiXmlDocument* document = GetDocument();
1249 if ( document )
1251 return;
1252 }
1253 (*tag) += (char) c;
1254
1255 if ( c == '>' )
1256 {
1257 // All is well.
1258 return;
1259 }
1260 }
1261}
1262
1263
1264const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1265{
1266 TiXmlDocument* document = GetDocument();
1267 p = SkipWhiteSpace( p, encoding );
1268
1269 if ( data )
1270 {
1271 data->Stamp( p, encoding );
1272 location = data->Cursor();
1273 }
1274 if ( !p || !*p || *p != '<' )
1275 {
1276 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1277 return 0;
1278 }
1279 ++p;
1280 value = "";
1281
1282 while ( p && *p && *p != '>' )
1283 {
1284 value += *p;
1285 ++p;
1286 }
1287
1288 if ( !p )
1289 {
1290 if ( document )
1291 document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1292 }
1293 if ( p && *p == '>' )
1294 return p+1;
1295 return p;
1296}
1297
1298void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1299{
1300 while ( in->good() )
1301 {
1302 int c = in->get();
1303 if ( c <= 0 )
1304 {
1305 TiXmlDocument* document = GetDocument();
1306 if ( document )
1308 return;
1309 }
1310
1311 (*tag) += (char) c;
1312
1313 if ( c == '>'
1314 && tag->at( tag->length() - 2 ) == '-'
1315 && tag->at( tag->length() - 3 ) == '-' )
1316 {
1317 // All is well.
1318 return;
1319 }
1320 }
1321}
1322
1323
1324const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1325{
1326 TiXmlDocument* document = GetDocument();
1327 value = "";
1328
1329 p = SkipWhiteSpace( p, encoding );
1330
1331 if ( data )
1332 {
1333 data->Stamp( p, encoding );
1334 location = data->Cursor();
1335 }
1336 const char* startTag = "<!--";
1337 const char* endTag = "-->";
1338
1339 if ( !StringEqual( p, startTag, false, encoding ) )
1340 {
1341 if ( document )
1342 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1343 return 0;
1344 }
1345 p += strlen( startTag );
1346
1347 // [ 1475201 ] TinyXML parses entities in comments
1348 // Oops - ReadText doesn't work, because we don't want to parse the entities.
1349 // p = ReadText( p, &value, false, endTag, false, encoding );
1350 //
1351 // from the XML spec:
1352 /*
1353 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
1354 they may appear within the document type declaration at places allowed by the grammar.
1355 They are not part of the document's character data; an XML processor MAY, but need not,
1356 make it possible for an application to retrieve the text of comments. For compatibility,
1357 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
1358 references MUST NOT be recognized within comments.
1359
1360 An example of a comment:
1361
1362 <!-- declarations for <head> & <body> -->
1363 */
1364
1365 value = "";
1366 // Keep all the white space.
1367 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
1368 {
1369 value.append( p, 1 );
1370 ++p;
1371 }
1372 if ( p && *p )
1373 p += strlen( endTag );
1374
1375 return p;
1376}
1377
1378
1379const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1380{
1381 p = SkipWhiteSpace( p, encoding );
1382 if ( !p || !*p ) return 0;
1383
1384 if ( data )
1385 {
1386 data->Stamp( p, encoding );
1387 location = data->Cursor();
1388 }
1389 // Read the name, the '=' and the value.
1390 const char* pErr = p;
1391 p = ReadName( p, &name, encoding );
1392 if ( !p || !*p )
1393 {
1394 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1395 return 0;
1396 }
1397 p = SkipWhiteSpace( p, encoding );
1398 if ( !p || !*p || *p != '=' )
1399 {
1400 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1401 return 0;
1402 }
1403
1404 ++p; // skip '='
1405 p = SkipWhiteSpace( p, encoding );
1406 if ( !p || !*p )
1407 {
1408 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1409 return 0;
1410 }
1411
1412 const char* end;
1413 const char SINGLE_QUOTE = '\'';
1414 const char DOUBLE_QUOTE = '\"';
1415
1416 if ( *p == SINGLE_QUOTE )
1417 {
1418 ++p;
1419 end = "\'"; // single quote in string
1420 p = ReadText( p, &value, false, end, false, encoding );
1421 }
1422 else if ( *p == DOUBLE_QUOTE )
1423 {
1424 ++p;
1425 end = "\""; // double quote in string
1426 p = ReadText( p, &value, false, end, false, encoding );
1427 }
1428 else
1429 {
1430 // All attribute values should be in single or double quotes.
1431 // But this is such a common error that the parser will try
1432 // its best, even without them.
1433 value = "";
1434 while ( p && *p // existence
1435 && !IsWhiteSpace( *p ) // whitespace
1436 && *p != '/' && *p != '>' ) // tag end
1437 {
1438 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1439 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
1440 // We did not have an opening quote but seem to have a
1441 // closing one. Give up and throw an error.
1442 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1443 return 0;
1444 }
1445 value += *p;
1446 ++p;
1447 }
1448 }
1449 return p;
1450}
1451
1452
1453void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1454{
1455 while ( in->good() )
1456 {
1457 int c = in->peek();
1458 if ( !cdata && (c == '<' ) )
1459 {
1460 return;
1461 }
1462 if ( c <= 0 )
1463 {
1464 TiXmlDocument* document = GetDocument();
1465 if ( document )
1467 return;
1468 }
1469
1470 (*tag) += (char) c;
1471 in->get(); // "commits" the peek made above
1472
1473 if ( cdata && c == '>' && tag->size() >= 3 ) {
1474 size_t len = tag->size();
1475 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1476 // terminator of cdata.
1477 return;
1478 }
1479 }
1480 }
1481}
1482
1483
1484const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1485{
1486 value = "";
1487 TiXmlDocument* document = GetDocument();
1488
1489 if ( data )
1490 {
1491 data->Stamp( p, encoding );
1492 location = data->Cursor();
1493 }
1494
1495 const char* const startTag = "<![CDATA[";
1496 const char* const endTag = "]]>";
1497
1498 if ( cdata || StringEqual( p, startTag, false, encoding ) )
1499 {
1500 cdata = true;
1501
1502 if ( !StringEqual( p, startTag, false, encoding ) )
1503 {
1504 if ( document )
1505 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1506 return 0;
1507 }
1508 p += strlen( startTag );
1509
1510 // Keep all the white space, ignore the encoding, etc.
1511 while ( p && *p
1512 && !StringEqual( p, endTag, false, encoding )
1513 )
1514 {
1515 value += *p;
1516 ++p;
1517 }
1518
1519 TIXML_STRING dummy;
1520 p = ReadText( p, &dummy, false, endTag, false, encoding );
1521 return p;
1522 }
1523 else
1524 {
1525 bool ignoreWhite = true;
1526
1527 const char* end = "<";
1528 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1529 if ( p && *p )
1530 return p-1; // don't truncate the '<'
1531 return 0;
1532 }
1533}
1534
1535
1536void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1537{
1538 while ( in->good() )
1539 {
1540 int c = in->get();
1541 if ( c <= 0 )
1542 {
1543 TiXmlDocument* document = GetDocument();
1544 if ( document )
1546 return;
1547 }
1548 (*tag) += (char) c;
1549
1550 if ( c == '>' )
1551 {
1552 // All is well.
1553 return;
1554 }
1555 }
1556}
1557
1558
1559const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1560{
1561 p = SkipWhiteSpace( p, _encoding );
1562 // Find the beginning, find the end, and look for
1563 // the stuff in-between.
1564 TiXmlDocument* document = GetDocument();
1565 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1566 {
1567 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1568 return 0;
1569 }
1570 if ( data )
1571 {
1572 data->Stamp( p, _encoding );
1573 location = data->Cursor();
1574 }
1575 p += 5;
1576
1577 version = "";
1578 encoding = "";
1579 standalone = "";
1580
1581 while ( p && *p )
1582 {
1583 if ( *p == '>' )
1584 {
1585 ++p;
1586 return p;
1587 }
1588
1589 p = SkipWhiteSpace( p, _encoding );
1590 if ( StringEqual( p, "version", true, _encoding ) )
1591 {
1592 TiXmlAttribute attrib;
1593 p = attrib.Parse( p, data, _encoding );
1594 version = attrib.Value();
1595 }
1596 else if ( StringEqual( p, "encoding", true, _encoding ) )
1597 {
1598 TiXmlAttribute attrib;
1599 p = attrib.Parse( p, data, _encoding );
1600 encoding = attrib.Value();
1601 }
1602 else if ( StringEqual( p, "standalone", true, _encoding ) )
1603 {
1604 TiXmlAttribute attrib;
1605 p = attrib.Parse( p, data, _encoding );
1606 standalone = attrib.Value();
1607 }
1608 else
1609 {
1610 // Read over whatever it is.
1611 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1612 ++p;
1613 }
1614 }
1615 return 0;
1616}
1617
1619{
1620 for ( unsigned i=0; i<value.length(); i++ )
1621 if ( !IsWhiteSpace( value[i] ) )
1622 return false;
1623 return true;
1624}
TiXmlAttribute * Find(const char *_name) const
Definition: tinyxml.cpp:1532
void Add(TiXmlAttribute *attribute)
Definition: tinyxml.cpp:1480
An attribute is a name-value pair.
Definition: tinyxml.h:734
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
TIXML_STRING value
Definition: tinyxml.h:833
void SetDocument(TiXmlDocument *doc)
Definition: tinyxml.h:825
TiXmlDocument * document
Definition: tinyxml.h:831
const TIXML_STRING & NameTStr() const
Definition: tinyxml.h:770
const char * Value() const
Return the value of this attribute.
Definition: tinyxml.h:764
TIXML_STRING name
Definition: tinyxml.h:832
static bool StreamTo(std::istream *in, int character, TIXML_STRING *tag)
static bool condenseWhiteSpace
Definition: tinyxml.h:386
static const char * ReadText(const char *in, TIXML_STRING *text, bool ignoreWhiteSpace, const char *endTag, bool ignoreCase, TiXmlEncoding encoding)
static int IsAlphaNum(unsigned char anyByte, TiXmlEncoding encoding)
static const char * GetEntity(const char *in, char *value, int *length, TiXmlEncoding encoding)
static bool IsWhiteSpaceCondensed()
Return the current white space setting.
Definition: tinyxml.h:199
static bool StringEqual(const char *p, const char *endTag, bool ignoreCase, TiXmlEncoding encoding)
static bool IsWhiteSpace(char c)
Definition: tinyxml.h:265
static void ConvertUTF32ToUTF8(unsigned long input, char *output, int *length)
TiXmlCursor location
Definition: tinyxml.h:346
static const int utf8ByteTable[256]
Definition: tinyxml.h:228
static const char * GetChar(const char *p, char *_value, int *length, TiXmlEncoding encoding)
Definition: tinyxml.h:300
static Entity entity[NUM_ENTITY]
Definition: tinyxml.h:385
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)=0
static int IsAlpha(unsigned char anyByte, TiXmlEncoding encoding)
static bool StreamWhiteSpace(std::istream *in, TIXML_STRING *tag)
static const char * errorString[TIXML_ERROR_STRING_COUNT]
Definition: tinyxml.h:344
@ TIXML_ERROR_DOCUMENT_EMPTY
Definition: tinyxml.h:253
@ TIXML_ERROR_PARSING_ELEMENT
Definition: tinyxml.h:244
@ TIXML_ERROR_READING_END_TAG
Definition: tinyxml.h:249
@ TIXML_ERROR_STRING_COUNT
Definition: tinyxml.h:258
@ TIXML_ERROR_EMBEDDED_NULL
Definition: tinyxml.h:254
@ TIXML_ERROR_PARSING_EMPTY
Definition: tinyxml.h:248
@ TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME
Definition: tinyxml.h:245
@ TIXML_ERROR_PARSING_COMMENT
Definition: tinyxml.h:251
@ TIXML_ERROR_READING_ATTRIBUTES
Definition: tinyxml.h:247
@ TIXML_ERROR_PARSING_UNKNOWN
Definition: tinyxml.h:250
@ TIXML_ERROR_PARSING_DECLARATION
Definition: tinyxml.h:252
@ TIXML_ERROR_READING_ELEMENT_VALUE
Definition: tinyxml.h:246
@ TIXML_ERROR_PARSING_CDATA
Definition: tinyxml.h:255
static const char * ReadName(const char *p, TIXML_STRING *name, TiXmlEncoding encoding)
static const char * SkipWhiteSpace(const char *, TiXmlEncoding encoding)
static int ToLower(int v, TiXmlEncoding encoding)
Definition: tinyxml.h:355
An XML comment.
Definition: tinyxml.h:1099
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
In correct XML the declaration is the first entry in the file.
Definition: tinyxml.h:1216
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
TIXML_STRING standalone
Definition: tinyxml.h:1269
const char * Encoding() const
Encoding. Will return an empty string if none was found.
Definition: tinyxml.h:1239
TIXML_STRING encoding
Definition: tinyxml.h:1268
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
TIXML_STRING version
Definition: tinyxml.h:1267
Always the top level node.
Definition: tinyxml.h:1317
virtual const char * Parse(const char *p, TiXmlParsingData *data=0, TiXmlEncoding encoding=TIXML_DEFAULT_ENCODING)
Parse the given null terminated block of xml data.
void SetError(int err, const char *errorLocation, TiXmlParsingData *prevData, TiXmlEncoding encoding)
int TabSize() const
Definition: tinyxml.h:1425
void ClearError()
If you have handled the error, it can be reset with this call.
Definition: tinyxml.h:1430
TiXmlCursor errorLocation
Definition: tinyxml.h:1470
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
TIXML_STRING errorDesc
Definition: tinyxml.h:1468
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
const char * ReadValue(const char *in, TiXmlParsingData *prevData, TiXmlEncoding encoding)
TiXmlAttributeSet attributeSet
Definition: tinyxml.h:1092
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
The parent class for everything in the Document Object Model.
Definition: tinyxml.h:397
TiXmlNode * LinkEndChild(TiXmlNode *addThis)
Add a new node related to this.
Definition: tinyxml.cpp:178
TIXML_STRING value
Definition: tinyxml.h:715
virtual const TiXmlElement * ToElement() const
Cast to a more defined type. Will return null if not of the requested type.
Definition: tinyxml.h:654
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)=0
TiXmlNode * parent
Definition: tinyxml.h:709
TiXmlNode * firstChild
Definition: tinyxml.h:712
virtual const TiXmlDeclaration * ToDeclaration() const
Cast to a more defined type. Will return null if not of the requested type.
Definition: tinyxml.h:658
const TiXmlDocument * GetDocument() const
Return a pointer to the Document this node lives in.
Definition: tinyxml.cpp:501
friend class TiXmlElement
Definition: tinyxml.h:399
TiXmlNode * Identify(const char *start, TiXmlEncoding encoding)
const TiXmlCursor & Cursor() const
void Stamp(const char *now, TiXmlEncoding encoding)
TiXmlParsingData(const char *start, int _tabsize, int row, int col)
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
void SetCDATA(bool _cdata)
Turns on or off a CDATA representation of text.
Definition: tinyxml.h:1177
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
Any tag that tinyXml doesn't recognize is saved as an unknown.
Definition: tinyxml.h:1281
virtual const char * Parse(const char *p, TiXmlParsingData *data, TiXmlEncoding encoding)
virtual void StreamIn(std::istream *in, TIXML_STRING *tag)
TiXmlEncoding
Definition: tinyxml.h:139
@ TIXML_ENCODING_UNKNOWN
Definition: tinyxml.h:140
@ TIXML_ENCODING_LEGACY
Definition: tinyxml.h:142
@ TIXML_ENCODING_UTF8
Definition: tinyxml.h:141
const TiXmlEncoding TIXML_DEFAULT_ENCODING
Definition: tinyxml.h:145
#define TIXML_STRING
Definition: tinyxml.h:36
const unsigned char TIXML_UTF_LEAD_0
const unsigned char TIXML_UTF_LEAD_1
const unsigned char TIXML_UTF_LEAD_2