libxml2的xpath检索中文

时间:2020-12-24 22:18:16

ZC: xmlXPathEvalExpression(...) 当 xpath的字符串中 包含中文的时候,返回NULL,暂时不知道该怎么处理了...

ZC: 下面是测试的一些代码/文件,留着以后再研究吧...

1、Qt5.3.2

2、XML 的节点的属性中包含中文(XML保存成 UTF-8的格式)

<?xml version="1.0" encoding="utf-8" ?>
<root> <newNode2>content changed</newNode2>
<newNode3 newAttr="YES">newNode3 content</newNode3>
<ceshi attribute="测试">测试一下</ceshi>
<node2 attribute="no">NODE CONTENT</node2> <son>
<grandson>This is a grandson node</grandson>
<newGrandSon>new content</newGrandSon></son>
</root>

3、测试代码:

  ZC: 尝试了 使用 UTF-8的字符串、本地编码格式的字符串,都解析不到 我要的节点...

#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>
//#include <iconv.h> #include <QDebug>
#include <QTextCodec> MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent),
ui(new Ui::MainWindow)
{
ui->setupUi(this);
} MainWindow::~MainWindow()
{
delete ui;
} int code_convert(char* from_charset, char* to_charset, char* inbuf,
int inlen, char* outbuf, int outlen)
{
iconv_t cd;
char** pin = &inbuf;
char** pout = &outbuf;
cd = iconv_open(to_charset,from_charset);
if(cd == )
return -;
memset(outbuf,,outlen);
if(iconv(cd,(const char**)pin,(unsigned int *)&inlen,pout,(unsigned int*)&outlen)
== -)
return -;
iconv_close(cd);
return ;
} //UNICODE码转为GB2312码
//成功则返回一个动态分配的char*变量,需要在使用完毕后手动free,失败返回NULL
char* u2g(char *inbuf)
{
int nOutLen = * strlen(inbuf) - ;
char* szOut = (char*)malloc(nOutLen);
if (- == code_convert("utf-8","gb2312",inbuf,strlen(inbuf),szOut,nOutLen))
{
free(szOut);
szOut = NULL;
}
return szOut;
} //GB2312码转为UNICODE码
//成功则返回一个动态分配的char*变量,需要在使用完毕后手动free,失败返回NULL
char* g2u(char *inbuf)
{
int nOutLen = * strlen(inbuf) - ;
char* szOut = (char*)malloc(nOutLen);
if (- == code_convert("gb2312","utf-8",inbuf,strlen(inbuf),szOut,nOutLen))
{
free(szOut);
szOut = NULL;
}
return szOut;
} xmlXPathObject* Get_NodeSet(xmlDoc* _pDoc, const xmlChar *szXpath)
{
xmlXPathContextPtr context;
xmlXPathObjectPtr result; context = xmlXPathNewContext(_pDoc);
if (context == NULL)
{
//printf("context is NULL\n");
return NULL;
} result = xmlXPathEvalExpression(szXpath, context);
xmlXPathFreeContext(context);
if (result == NULL)
{
//printf("xmlXPathEvalExpression return NULL\n");
return NULL;
} if (xmlXPathNodeSetIsEmpty(result->nodesetval))
{
xmlXPathFreeObject(result);
//printf("nodeset is empty\n");
return NULL;
} return result;
} void MainWindow::on_pbtnXPath_clicked()
{
xmlDocPtr doc = NULL; //定义解析文档指针
xmlNodePtr curNode = NULL; //定义结点指针(你需要它为了在各个结点间移动) char *szDocName = "F:/ZZ_Qt5/Qt532_vs2010/build-libxml2_zz-z-Debug/debug/ChangedXml.xml"; doc = xmlReadFile(szDocName, "GB2312", XML_PARSE_RECOVER); //解析文件
//doc = xmlReadFile(szDocName, "UTF-8", XML_PARSE_RECOVER); if (NULL == doc)
{
qDebug() << "Document not parsed successfully.";
return;
} char* pcCeShi = "测试";
QTextCodec *pCodec = QTextCodec::codecForName("GBK");
QString strCeShi = pCodec->toUnicode(pcCeShi); //QString str = "/root/node2[@attribute='no']";
QString str = "/root/node2[@attribute='"+strCeShi+"']";
QByteArray ba = str.toUtf8();
//QByteArray ba = str.toLocal8Bit();
char pc[] = {};
memcpy(&pc[], ba.data(), ba.length());
//pc[ba.length()] = '\0'; char *p0 = "/root/node2[@attribute='测试']";
char* p1 = g2u(p0);
char pc1[] = {};
memcpy(&pc1[], p1, strlen(p1)); //xmlChar *szXpath =BAD_CAST ("/root/node2[@attribute='no']");
xmlChar *szXpath = BAD_CAST (p1);
xmlXPathObjectPtr app_result = Get_NodeSet(doc, szXpath); //查询并得到结果 if (NULL == app_result)
{
qDebug() << "app_result is NULL";
return;
}
xmlChar *szValue = NULL;
if(app_result)
{
xmlNodeSetPtr nodeset = app_result->nodesetval;
for (int i = ; i < nodeset->nodeNr; i++)
{
curNode = nodeset->nodeTab[i];
if(curNode != NULL)
{
szValue = xmlGetProp(curNode,BAD_CAST "attribute");
if (szValue != NULL)
{
qDebug() << "attribute = " << (char*)szValue;
xmlFree(szValue);
} szValue = xmlNodeGetContent(curNode);
if (szValue != NULL)
{
qDebug() << "content = " << (char*)szValue;
xmlFree(szValue);
}
}
}
xmlXPathFreeObject (app_result);
}
xmlFreeDoc(doc); free(p1);
}

4、

5、

6、