首页 > 代码库 > rapidxml对unicode的支持

rapidxml对unicode的支持

  为了提高duilib创建布局控件的效率,在LuaDui项目中使用rapidxml解析器替换了duilib库自带的xml解析器。

duilib使用unicode编译,所以rapidxml需要解析unicode xml字符串。

  使用rapidxml解析unicode字符串很简单,只需在rapidxml的模板参数中设置为TCHAR即可,所以定义以下类型方便使用。

 

#include <rapidxml/rapidxml.hpp>typedef rapidxml::xml_document<TCHAR> XmlDoc;typedef rapidxml::xml_node<TCHAR> XmlNode;typedef rapidxml::xml_attribute<TCHAR> XmlAttr;

  在使用过程中发现了解析xml中的中文字符出现bug,解析如下xml会出现问题抛出异常。

<?xml version="1.0" encoding="UTF-8"?><Window caption="0,0,0,30" sizebox="5,5,5,5" mininfo="480,360" defaultfontcolor="#ff010000" width="600" height="480"><Font name="微软雅黑" size="12" bold="false"/><VerticalLayout bkcolor="#ff019bd0" inset="1,1,1,1" bordersize="1" bordercolor="#FF010000">    <HorizontalLayout height="30" inset="5,0,0,0">	<Label name="标题" text="调试窗口" textcolor="#FFFFFFFF"></Label>	<Control />        <Button name="minbtn" width="40" height="22" text="最小化" bkcolor="#ff3fd536">	  <Event click="DebugUIEvent.minBtnClick" />	</Button>        <Button name="closebtn" width="47" height="22" text="关闭" bkcolor="#ffef2f4d">	  <Event click="DebugUIEvent.closeBtnClick" />	</Button>    </HorizontalLayout>  <VerticalLayout  bkcolor="#66ffffff">  </VerticalLayout></VerticalLayout></Window>

  断点时发现在解析 text="最小化" 属性时出现问题,解析text值的时候把后面的内容全部当做text的属性值,无法再往下解析了。

最后终于找到了问题所在,rapidxml为提高解析效率,定义了如下的表:

        template<int Dummy>        struct lookup_tables        {            static const unsigned char lookup_whitespace[256];              // Whitespace table            static const unsigned char lookup_node_name[256];               // Node name table            static const unsigned char lookup_text[256];                    // Text table            static const unsigned char lookup_text_pure_no_ws[256];         // Text table            static const unsigned char lookup_text_pure_with_ws[256];       // Text table            static const unsigned char lookup_attribute_name[256];          // Attribute name table            static const unsigned char lookup_attribute_data_1[256];        // Attribute data table with single quote            static const unsigned char lookup_attribute_data_1_pure[256];   // Attribute data table with single quote            static const unsigned char lookup_attribute_data_2[256];        // Attribute data table with double quotes            static const unsigned char lookup_attribute_data_2_pure[256];   // Attribute data table with double quotes            static const unsigned char lookup_digits[256];                  // Digits            static const unsigned char lookup_upcase[256];                  // To uppercase conversion table for ASCII characters        };

  来识别xml中的标志符,在进行查找的时候直接通过数组直接找到使用了

如下操作:

 internal::lookup_tables<0>::lookup_text_pure_no_ws[static_cast<unsigned char>(ch)];

但在unicode下static_cast<unsigned char>(ch)的ch是wchar占两个字节直接转换为unsigned char会出现判断出错问题。所以要在rapidxml中解析unicode需要修改rapidxml代码:

       // Detect whitespace character        struct whitespace_pred        {            static unsigned char test(Ch ch)            {				if(ch<=255)					return internal::lookup_tables<0>::lookup_whitespace[static_cast<unsigned char>(ch)];				else					return 0;            }        };        // Detect node name character        struct node_name_pred        {            static unsigned char test(Ch ch)            {				if(ch<=255)					return internal::lookup_tables<0>::lookup_node_name[static_cast<unsigned char>(ch)];				else					return 1;            }        };        // Detect attribute name character        struct attribute_name_pred        {            static unsigned char test(Ch ch)            {				if(ch<=255)					return internal::lookup_tables<0>::lookup_attribute_name[static_cast<unsigned char>(ch)];				else					return 1;            }        };        // Detect text character (PCDATA)        struct text_pred        {            static unsigned char test(Ch ch)            {				if(ch<=255)					return internal::lookup_tables<0>::lookup_text[static_cast<unsigned char>(ch)];				else					return 1;            }        };        // Detect text character (PCDATA) that does not require processing        struct text_pure_no_ws_pred        {            static unsigned char test(Ch ch)            {				if(ch<=255)					return internal::lookup_tables<0>::lookup_text_pure_no_ws[static_cast<unsigned char>(ch)];				else					return 1;            }        };        // Detect text character (PCDATA) that does not require processing        struct text_pure_with_ws_pred        {            static unsigned char test(Ch ch)            {				if(ch<=255)					return internal::lookup_tables<0>::lookup_text_pure_with_ws[static_cast<unsigned char>(ch)];				else					return 1;            }        };        // Detect attribute value character        template<Ch Quote>        struct attribute_value_pred        {            static unsigned char test(Ch ch)            {                if (Quote == Ch(‘\‘‘))					if(ch<=255)						return internal::lookup_tables<0>::lookup_attribute_data_1[static_cast<unsigned char>(ch)];					else						return 1;                if (Quote == Ch(‘\"‘))					if(ch<=255)						return internal::lookup_tables<0>::lookup_attribute_data_2[static_cast<unsigned char>(ch)];					else						return 1;                return 0;       // Should never be executed, to avoid warnings on Comeau            }        };        // Detect attribute value character        template<Ch Quote>        struct attribute_value_pure_pred        {            static unsigned char test(Ch ch)            {                if (Quote == Ch(‘\‘‘))					if(ch<=255)						return internal::lookup_tables<0>::lookup_attribute_data_1_pure[static_cast<unsigned char>(ch)];					else						return 1;                if (Quote == Ch(‘\"‘))					if(ch<=255)						return internal::lookup_tables<0>::lookup_attribute_data_2_pure[static_cast<unsigned char>(ch)];					else						return 1;                return 0;       // Should never be executed, to avoid warnings on Comeau            }        };

  

 

rapidxml对unicode的支持