#ifndef __ICXML_READER_IMPL_H_INCLUDED__
#define __ICXML_READER_IMPL_H_INCLUDED__
#include "irrXML.h"
#include "irrString.h"
#include "irrArray.h"
#include "fast_atof.h"
#ifdef _DEBUG
#define IRR_DEBUGPRINT(x) printf((x));
#else
#define IRR_DEBUGPRINT(x)
#endif
namespace irr
{
namespace io
{
template<class char_type, class superclass>
class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>
{
public:
CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)
: TextData(0), P(0), TextBegin(0), TextSize(0), CurrentNodeType(EXN_NONE),
SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII)
{
if (!callback)
return;
storeTargetFormat();
readFile(callback);
if (deleteCallBack)
delete callback;
createSpecialCharacterList();
P = TextBegin;
}
virtual ~CXMLReaderImpl()
{
delete [] TextData;
}
virtual bool read()
{
if (P && ((unsigned int)(P - TextBegin) < TextSize - 1) && (*P != 0))
{
return parseCurrentNode();
}
_IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;
return false;
}
virtual EXML_NODE getNodeType() const
{
return CurrentNodeType;
}
virtual unsigned int getAttributeCount() const
{
return Attributes.size();
}
virtual const char_type* getAttributeName(int idx) const
{
if ((u32)idx >= Attributes.size())
return 0;
return Attributes[idx].Name.c_str();
}
virtual const char_type* getAttributeValue(int idx) const
{
if ((unsigned int)idx >= Attributes.size())
return 0;
return Attributes[idx].Value.c_str();
}
virtual const char_type* getAttributeValue(const char_type* name) const
{
const SAttribute* attr = getAttributeByName(name);
if (!attr)
return 0;
return attr->Value.c_str();
}
virtual const char_type* getAttributeValueSafe(const char_type* name) const
{
const SAttribute* attr = getAttributeByName(name);
if (!attr)
return EmptyString.c_str();
return attr->Value.c_str();
}
int getAttributeValueAsInt(const char_type* name) const
{
const SAttribute* attr = getAttributeByName(name);
if (!attr)
return 0;
core::stringc c(attr->Value.c_str());
return core::strtol10(c.c_str());
}
int getAttributeValueAsInt(int idx) const
{
const char_type* attrvalue = getAttributeValue(idx);
if (!attrvalue)
return 0;
core::stringc c(attrvalue);
return core::strtol10(c.c_str());
}
float getAttributeValueAsFloat(const char_type* name) const
{
const SAttribute* attr = getAttributeByName(name);
if (!attr)
return 0;
core::stringc c = attr->Value.c_str();
return core::fast_atof(c.c_str());
}
float getAttributeValueAsFloat(int idx) const
{
const char_type* attrvalue = getAttributeValue(idx);
if (!attrvalue)
return 0;
core::stringc c = attrvalue;
return core::fast_atof(c.c_str());
}
virtual const char_type* getNodeName() const
{
return NodeName.c_str();
}
virtual const char_type* getNodeData() const
{
return NodeName.c_str();
}
virtual bool isEmptyElement() const
{
return IsEmptyElement;
}
virtual ETEXT_FORMAT getSourceFormat() const
{
return SourceFormat;
}
virtual ETEXT_FORMAT getParserFormat() const
{
return TargetFormat;
}
private:
bool parseCurrentNode()
{
char_type* start = P;
while(*P != L'<' && *P)
++P;
if (!*P)
return false;
if (P - start > 0)
{
if (setText(start, P))
return true;
}
++P;
switch(*P)
{
case L'/':
parseClosingXMLElement();
break;
case L'?':
ignoreDefinition();
break;
case L'!':
if (!parseCDATA())
parseComment();
break;
default:
parseOpeningXMLElement();
break;
}
return true;
}
bool setText(char_type* start, char_type* end)
{
if (end - start < 3)
{
char_type* p = start;
for(; p != end; ++p)
if (!isWhiteSpace(*p))
break;
if (p == end)
return false;
}
core::string<char_type> s(start, (int)(end - start));
NodeName = replaceSpecialCharacters(s);
CurrentNodeType = EXN_TEXT;
return true;
}
void ignoreDefinition()
{
CurrentNodeType = EXN_UNKNOWN;
while(*P != L'>')
++P;
++P;
}
void parseComment()
{
CurrentNodeType = EXN_COMMENT;
P += 1;
char_type *pCommentBegin = P;
int count = 1;
while(count)
{
if (*P == L'>')
--count;
else
if (*P == L'<')
++count;
++P;
}
P -= 3;
NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));
P += 3;
}
void parseOpeningXMLElement()
{
CurrentNodeType = EXN_ELEMENT;
IsEmptyElement = false;
Attributes.clear();
const char_type* startName = P;
while(*P != L'>' && !isWhiteSpace(*P))
++P;
const char_type* endName = P;
while(*P != L'>')
{
if (isWhiteSpace(*P))
++P;
else
{
if (*P != L'/')
{
const char_type* attributeNameBegin = P;
while(!isWhiteSpace(*P) && *P != L'=')
++P;
const char_type* attributeNameEnd = P;
++P;
while( (*P != L'\"') && (*P != L'\'') && *P)
++P;
if (!*P)
return;
const char_type attributeQuoteChar = *P;
++P;
const char_type* attributeValueBegin = P;
while(*P != attributeQuoteChar && *P)
++P;
if (!*P)
return;
const char_type* attributeValueEnd = P;
++P;
SAttribute attr;
attr.Name = core::string<char_type>(attributeNameBegin,
(int)(attributeNameEnd - attributeNameBegin));
core::string<char_type> s(attributeValueBegin,
(int)(attributeValueEnd - attributeValueBegin));
attr.Value = replaceSpecialCharacters(s);
Attributes.push_back(attr);
}
else
{
++P;
IsEmptyElement = true;
break;
}
}
}
if (endName > startName && *(endName-1) == L'/')
{
IsEmptyElement = true;
endName--;
}
NodeName = core::string<char_type>(startName, (int)(endName - startName));
++P;
}
void parseClosingXMLElement()
{
CurrentNodeType = EXN_ELEMENT_END;
IsEmptyElement = false;
Attributes.clear();
++P;
const char_type* pBeginClose = P;
while(*P != L'>')
++P;
NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));
++P;
}
bool parseCDATA()
{
if (*(P+1) != L'[')
return false;
CurrentNodeType = EXN_CDATA;
int count=0;
while( *P && count<8 )
{
++P;
++count;
}
if (!*P)
return true;
char_type *cDataBegin = P;
char_type *cDataEnd = 0;
while(*P && !cDataEnd)
{
if (*P == L'>' &&
(*(P-1) == L']') &&
(*(P-2) == L']'))
{
cDataEnd = P - 2;
}
++P;
}
if ( cDataEnd )
NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));
else
NodeName = "";
return true;
}
struct SAttribute
{
core::string<char_type> Name;
core::string<char_type> Value;
};
const SAttribute* getAttributeByName(const char_type* name) const
{
if (!name)
return 0;
core::string<char_type> n = name;
for (int i=0; i<(int)Attributes.size(); ++i)
if (Attributes[i].Name == n)
return &Attributes[i];
return 0;
}
core::string<char_type> replaceSpecialCharacters(
core::string<char_type>& origstr)
{
int pos = origstr.findFirst(L'&');
int oldPos = 0;
if (pos == -1)
return origstr;
core::string<char_type> newstr;
while(pos != -1 && pos < (int)origstr.size()-2)
{
int specialChar = -1;
for (int i=0; i<(int)SpecialCharacters.size(); ++i)
{
const char_type* p = &origstr.c_str()[pos]+1;
if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))
{
specialChar = i;
break;
}
}
if (specialChar != -1)
{
newstr.append(origstr.subString(oldPos, pos - oldPos));
newstr.append(SpecialCharacters[specialChar][0]);
pos += SpecialCharacters[specialChar].size();
}
else
{
newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
pos += 1;
}
oldPos = pos;
pos = origstr.findNext(L'&', pos);
}
if (oldPos < (int)origstr.size()-1)
newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));
return newstr;
}
bool readFile(IFileReadCallBack* callback)
{
long size = callback->getSize();
if (size<0)
return false;
size += 4;
char* data8 = new char[size];
if (!callback->read(data8, size-4))
{
delete [] data8;
return false;
}
memset(data8+size-4, 0, 4);
char16* data16 = reinterpret_cast<char16*>(data8);
char32* data32 = reinterpret_cast<char32*>(data8);
const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF};
const u16 UTF16_BE = 0xFFFE;
const u16 UTF16_LE = 0xFEFF;
const u32 UTF32_BE = 0xFFFE0000;
const u32 UTF32_LE = 0x0000FEFF;
if (size >= 4 && data32[0] == static_cast<char32>(UTF32_BE))
{
SourceFormat = ETF_UTF32_BE;
convertTextData(data32+1, data8, (size/4)-1);
}
else
if (size >= 4 && data32[0] == static_cast<char32>(UTF32_LE))
{
SourceFormat = ETF_UTF32_LE;
convertTextData(data32+1, data8, (size/4)-1);
}
else
if (size >= 2 && data16[0] == UTF16_BE)
{
SourceFormat = ETF_UTF16_BE;
convertTextData(data16+1, data8, (size/2)-1);
}
else
if (size >= 2 && data16[0] == UTF16_LE)
{
SourceFormat = ETF_UTF16_LE;
convertTextData(data16+1, data8, (size/2)-1);
}
else
if (size >= 3 && memcmp(data8,UTF8,3)==0)
{
SourceFormat = ETF_UTF8;
convertTextData(data8+3, data8, size-3);
}
else
{
SourceFormat = ETF_ASCII;
convertTextData(data8, data8, size);
}
return true;
}
template<class src_char_type>
void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)
{
if (sizeof(src_char_type) > 1 &&
isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))
convertToLittleEndian(source);
if (sizeof(src_char_type) == sizeof(char_type))
{
TextBegin = (char_type*)source;
TextData = (char_type*)pointerToStore;
TextSize = sizeWithoutHeader;
}
else
{
TextData = new char_type[sizeWithoutHeader];
if ( sizeof(src_char_type) == 1 )
{
for (int i=0; i<sizeWithoutHeader; ++i)
{
TextData[i] = static_cast<char_type>(static_cast<unsigned char>(source[i]));
}
}
else
{
for (int i=0; i<sizeWithoutHeader; ++i)
TextData[i] = static_cast<char_type>(source[i]);
}
TextBegin = TextData;
TextSize = sizeWithoutHeader;
delete [] pointerToStore;
}
}
template<class src_char_type>
void convertToLittleEndian(src_char_type* t)
{
if (sizeof(src_char_type) == 4)
{
while(*t)
{
*t = ((*t & 0xff000000) >> 24) |
((*t & 0x00ff0000) >> 8) |
((*t & 0x0000ff00) << 8) |
((*t & 0x000000ff) << 24);
++t;
}
}
else
{
while(*t)
{
*t = (*t >> 8) | (*t << 8);
++t;
}
}
}
inline bool isLittleEndian(ETEXT_FORMAT f)
{
return f == ETF_ASCII ||
f == ETF_UTF8 ||
f == ETF_UTF16_LE ||
f == ETF_UTF32_LE;
}
inline bool isWhiteSpace(char_type c)
{
return (c==' ' || c=='\t' || c=='\n' || c=='\r');
}
void createSpecialCharacterList()
{
SpecialCharacters.push_back("&");
SpecialCharacters.push_back("<lt;");
SpecialCharacters.push_back(">gt;");
SpecialCharacters.push_back("\"quot;");
SpecialCharacters.push_back("'apos;");
}
bool equalsn(const char_type* str1, const char_type* str2, int len)
{
int i;
for(i=0; str1[i] && str2[i] && i < len; ++i)
if (str1[i] != str2[i])
return false;
return (i == len) || (str1[i] == 0 && str2[i] == 0);
}
void storeTargetFormat()
{
switch(sizeof(char_type))
{
case 1:
TargetFormat = ETF_UTF8;
break;
case 2:
TargetFormat = ETF_UTF16_LE;
break;
case 4:
TargetFormat = ETF_UTF32_LE;
break;
default:
TargetFormat = ETF_ASCII;
}
}
char_type* TextData;
char_type* P;
char_type* TextBegin;
unsigned int TextSize;
EXML_NODE CurrentNodeType;
ETEXT_FORMAT SourceFormat;
ETEXT_FORMAT TargetFormat;
core::string<char_type> NodeName;
core::string<char_type> EmptyString;
bool IsEmptyElement;
core::array< core::string<char_type> > SpecialCharacters;
core::array<SAttribute> Attributes;
};
}
}
#endif