在使用LuaXml解析xml报文时,如果xml报文中带有unicode编码,解码后依然是unicode编码,无法被解析成实际的内容,
这种情况需要修改Lua_XML_lib.c,修改如下
将251行到257行替换成如下内容:
while(pos < s_size)
{
if(found&&(pos+5<s_size)&&(*(found+5)==';')&&isdigit(*(found+2))&&isdigit(*(found+3))&&isdigit(*(found+4)) )
{
if(pos>start) luaL_addlstring(&b,s+start, pos-start);
luaL_addchar(&b, 100*(s[pos+2]-48)+10*(s[pos+3]-48)+(s[pos+4]-48));
start=pos+6;
found = strstr(found+6, "&#");
if(!found) pos = s_size;
else pos = found-s;
}
else if(found&&(pos+7<s_size)&&(*(found+2)=='x')&&(*(found+7)==';')&&isxdigit(*(found+3))&&isxdigit(*(found+4))&&isxdigit(*(found+5))&&isxdigit(*(found+6)))
{
if(pos>start) luaL_addlstring(&b,s+start, pos-start);
char buf[32];
memset(buf,0,sizeof(buf));
strncpy(buf,found+3,4);
unsigned int value_int;
sscanf(buf,"%04X",&value_int);
char result[32];
memset(result,0,sizeof(result));
if(1)
{
if (value_int <= 0x7f)
{
result[0] = value_int;
}
else if (value_int <= 0x7FF)
{
result[1] = (0x80 | (0x3f & value_int));
result[0] = (0xC0 | (0x1f & (value_int >> 6)));
}
else if (value_int <= 0xFFFF)
{
result[2] = (0x80 | (0x3f & value_int));
result[1] = 0x80 | ((0x3f & (value_int >> 6)));
result[0] = 0xE0 | ((0xf & (value_int >> 12)));
}
else if (value_int <= 0x10FFFF)
{
result[3] = (0x80 | (0x3f & value_int));
result[2] = (0x80 | (0x3f & (value_int >> 6)));
result[1] = (0x80 | (0x3f & (value_int >> 12)));
result[0] = (0xF0 | (0x7 & (value_int >> 18)));
}
}
luaL_addstring(&b,result);
start=pos+8;
found = strstr(found+8, "&#");
if(!found) pos = s_size;
else pos = found-s;
}
else
{
luaL_addlstring(&b,s+start, pos-start);
}
差分如下:
if(!found) pos = s_size; | if(!found) pos = s_size; | |
else pos = found-s; | else pos = found-s; | |
while(pos < s_size) | <> | |
{ | ||
if(found&&(pos+5<s_size)&&(*(found+5)==';')&&isdigit(*(found+2))&&isdigit(*(found+3))&&isdigit(*(found+4)) ) | while(found&&(pos+5<s_size)&&(*(found+5)==';')&&isdigit(*(found+2))&&isdigit(*(found+3))&&isdigit(*(found+4)) ) { | |
{ | ||
if(pos>start) luaL_addlstring(&b,s+start, pos-start); | = | if(pos>start) luaL_addlstring(&b,s+start, pos-start); |
luaL_addchar(&b, 100*(s[pos+2]-48)+10*(s[pos+3]-48)+(s[pos+4]-48)); | luaL_addchar(&b, 100*(s[pos+2]-48)+10*(s[pos+3]-48)+(s[pos+4]-48)); | |
start=pos+6; | start=pos+6; | |
found = strstr(found+6, "&#"); | found = strstr(found+6, "&#"); | |
if(!found) pos = s_size; | if(!found) pos = s_size; | |
else pos = found-s; | else pos = found-s; | |
} | +- | |
else if(found&&(pos+7<s_size)&&(*(found+2)=='x')&&(*(found+7)==';')&&isxdigit(*(found+3))&&isxdigit(*(found+4))&&isxdigit(*(found+5))&&isxdigit(*(found+6))) | ||
{ | ||
if(pos>start) luaL_addlstring(&b,s+start, pos-start); | ||
char buf[32]; | ||
memset(buf,0,sizeof(buf)); | ||
strncpy(buf,found+3,4); | ||
unsigned int value_int; | ||
sscanf(buf,"%04X",&value_int); | ||
char result[32]; | ||
memset(result,0,sizeof(result)); | ||
if(1) | ||
{ | ||
if (value_int <= 0x7f) | ||
{ | ||
result[0] = value_int; | ||
} | ||
else if (value_int <= 0x7FF) | ||
{ | ||
result[1] = (0x80 | (0x3f & value_int)); | ||
result[0] = (0xC0 | (0x1f & (value_int >> 6))); | ||
} | ||
else if (value_int <= 0xFFFF) | ||
{ | ||
result[2] = (0x80 | (0x3f & value_int)); | ||
result[1] = 0x80 | ((0x3f & (value_int >> 6))); | ||
result[0] = 0xE0 | ((0xf & (value_int >> 12))); | ||
} | ||
else if (value_int <= 0x10FFFF) | ||
{ | ||
result[3] = (0x80 | (0x3f & value_int)); | ||
result[2] = (0x80 | (0x3f & (value_int >> 6))); | ||
result[1] = (0x80 | (0x3f & (value_int >> 12))); | ||
result[0] = (0xF0 | (0x7 & (value_int >> 18))); | ||
} | ||
} | ||
= | ||
luaL_addstring(&b,result); | +- | |
= | ||
start=pos+8; | +- | |
found = strstr(found+8, "&#"); | ||
if(!found) pos = s_size; | ||
else pos = found-s; | ||
} | ||
else | ||
{ | ||
luaL_addlstring(&b,s+start, pos-start); | ||
} | ||
} | = | } |
if(pos>start) luaL_addlstring(&b,s+start, pos-start); | if(pos>start) luaL_addlstring(&b,s+start, pos-start); | |
luaL_pushresult(&b); | luaL_pushresult(&b); | |
size_t i; | size_t i; | |
for(i=sv_code_size-1; i<sv_code_size; i-=2) { | for(i=sv_code_size-1; i<sv_code_size; i-=2) { | |
luaL_gsub(L, lua_tostring(L,-1), sv_code[i], sv_code[i-1]); | luaL_gsub(L, lua_tostring(L,-1), sv_code[i], sv_code[i-1]); | |
lua_remove(L,-2); | lua_remove(L,-2); | |
} | } | |
} | } |