C++中处理XML文件

写Unmanaged Code在.NET时代成为一种很悲惨的事,当你需要处理XML文件时,这种感觉会变得尤其强烈。FCL中的System.Xml多简单啊,连Steve Ballmer都知道怎么用。

事情不会总是那么理想的,如果你要在C/C++程序里处理XML怎么办呢?

选择一:市面上的XML lib还是有几个的,最有名的当然是libxml。我一年前用过,很不错,我还特意写了一份简明教程,后来不知搁哪儿了。

选择二:MS的MSXML,我要介绍的就是这个。

先说一下在MSDN哪里找文档吧,往下看的时候也好有个参考:在Index里打:Windows Media Services 9 Series SDK=>Programming Reference=>Programming Reference (C++)=>XML DOM Interfaces (C++)。什么?Windows Media?呵呵,不错,我觉得这个guide反而是最清楚的,你直接找MSXML,得到的结果,我觉得还没这个好。

在C程序里调用MSXML基本就是一堆COM接口,不过在Visual Studio里操作先要做点简单的设置:

在你的Project里Add References=>COM标签=>Microsoft XML v4.0,5.0其实也有了,但因为是和Office一起发布的,觉得有点怪,不想用,反正也未必用什么很怪异的功能,4.0可以了。

然后在加入这两行:

#include

  1<msxml2.h>   
  2#import <msxml4.dll>
  3
  4头文件和dll库。什么?在哪里加?头文件或者c/cpp文件啊,哪里合适放哪儿。 
  5
  6然后就开始编程了,先定义两个必用的变量: 
  7
  8IXMLDOMDocumentPtr xmlFile = NULL;   
  9IXMLDOMElement* xmlRoot = NULL; 
 10
 11为什么是必用的? 汗... 
 12
 13第一步当然是初始化COM: 
 14
 15if(FAILED(CoInitialize(NULL))) .... 
 16
 17接下来初始化xmlFile对象: 
 18
 19if(FAILED(xmlFile.CreateInstance("Msxml2.DOMDocument.4.0"))) ... 
 20
 21然后就可以加载xml文件了: 
 22
 23_variant_t varXml(L"C:\\\test.xml"); //L for unicode   
 24VARIANT_BOOL varOut;   
 25xmlFile-&gt;load(varXml, &amp;varOut); 
 26
 27取得root element: 
 28
 29xmlFile-&gt;get_documentElement(&amp;xmlRoot)) 
 30
 31取得第一级element: 
 32
 33IXMLDOMNodeList* xmlChildNodes = NULL;   
 34xmlRoot-&gt;get_childNodes(&amp;xmlChildNodes); 
 35
 36遍历所有第一级element: 
 37
 38IXMLDOMNode* currentNode = NULL;   
 39while(!FAILED(xmlChildNodes-&gt;nextNode(&amp;currentNode)) &amp;&amp; currentNode != NULL)   
 40{   
 41//do something   
 42} 
 43
 44取得当前element的名称: 
 45
 46BSTR nodeName;   
 47currentNode-&gt;get_nodeName(&amp;nodeName); 
 48
 49取得当前element的一个attribute(假设这个attribute叫type)的值: 
 50
 51IXMLDOMNamedNodeMap* attributes = NULL;   
 52IXMLDOMNode* attributeName = NULL;   
 53_bstr_t bstrAttributeName = "type";   
 54BSTR nameVal;   
 55currentNode-&gt;get_attributes(&amp;attributes);   
 56attributes-&gt;getNamedItem(bstrAttributeName, &amp;attributeName);   
 57attributeName-&gt;get_text(&amp;nameVal); 
 58
 59需要注意的是,你要记住释放所有的借口,IXMLDOM***-&gt;Release(),这可不是.NET,有人帮你GC,你得自己调用Release()来减reference count,it's COM, remember? 
 60
 61好了,大致就这样,顺便提一下XPath: 
 62
 63_bstr_t bstrXmlQuery = L"/books/book[@type=scifi and @author=fox]";   
 64IXMLDOMNodeList* nodes = NULL;   
 65if(FAILED(xmlRoot-&gt;selectNodes(bstrXmlQuery, &amp;nodes)) || FAILED(nodes-&gt;get_length(&amp;length)) || length == 0)   
 66//no match found or something went wrong   
 67else   
 68//match found 
 69
 70上面是找这样的node: 
 71
 72<books>
 73<book author="fox" type="scifi">....   
 74</book>   
 75....   
 76</books>
 77
 78具体的XPath语法就查手册吧,到处都有。 
 79
 80哦,对了,忘了说:如果你全部用ATL的类的话,借口的调用会简单一点,不过很容易转换的,比如: 
 81
 82IXMLDOMDocument* 对应 IXMLDOMDocumentPtr(我这里用了),其他基本也是加个Ptr,我不废话了。 
 83
 84最后提供一个sample,我临时攒的。工作的时候写的程序当然不能拿来贴的,呵呵。这个sample基本就是遍历整个xml,然后报告一遍文件的结构,对每个node,如果它有一个叫id的attribute,就同时打印id的值。If you want the complete VS project, shoot me an email. But I guess no one really needs it anyway, right, : ) 
 85
 86#include "stdafx.h"   
 87#include <windows.h>   
 88#include <msxml2.h>   
 89#import <msxml4.dll>
 90
 91HANDLE logFile = NULL; 
 92
 93#define INDENT 4 
 94
 95#define TESTHR(hr) \   
 96{ \   
 97if(FAILED(hr)) goto fail; \   
 98} 
 99
100void PrintChild(IXMLDOMNodeList* nodeList, int level)   
101{   
102if(nodeList == NULL)   
103return; 
104
105IXMLDOMNode* currentNode = NULL;   
106IXMLDOMNodeList* childNodes = NULL;   
107IXMLDOMNamedNodeMap* attributes = NULL;   
108IXMLDOMNode* attributeID = NULL; 
109
110while(!FAILED(nodeList-&gt;nextNode(&amp;currentNode)) &amp;&amp; currentNode != NULL)   
111{   
112BSTR nodeName;   
113TESTHR(currentNode-&gt;get_nodeName(&amp;nodeName));   
114DWORD dwBytesWritten;   
115for(int i=0; i<level*indent; ",="" &dwbyteswritten,="" (dword)(sizeof(wchar)),="" (dword)(wcslen(nodename)*sizeof(wchar)),="" i++)="" l"="" l"%s="" msg[max_size];="" nodename);="" nodename,="" null);="" testhr(currentnode-="" wchar="" writefile(logfile,="" wsprintf(msg,="">get_attributes(&amp;attributes));   
116if(attributes!=NULL)   
117{   
118_bstr_t bstrAttributeName = "id";   
119BSTR idVal;   
120TESTHR(attributes-&gt;getNamedItem(bstrAttributeName, &amp;attributeID));   
121if(attributeID != NULL)   
122{   
123TESTHR(attributeID-&gt;get_text(&amp;idVal));   
124WriteFile(logFile, L" ", (DWORD)(sizeof(WCHAR)), &amp;dwBytesWritten, NULL);   
125WriteFile(logFile, idVal, (DWORD)(wcslen(idVal)*sizeof(WCHAR)), &amp;dwBytesWritten, NULL);   
126WriteFile(logFile, L"\r\n", (DWORD)(2*sizeof(WCHAR)), &amp;dwBytesWritten, NULL);   
127attributeID-&gt;Release(); attributeID = NULL;   
128}   
129else   
130{   
131WriteFile(logFile, L"\r\n", (DWORD)(2*sizeof(WCHAR)), &amp;dwBytesWritten, NULL);   
132}   
133attributes-&gt;Release(); attributes = NULL; 
134
135}   
136else   
137{   
138WriteFile(logFile, L"\r\n", (DWORD)(2*sizeof(WCHAR)), &amp;dwBytesWritten, NULL);   
139}   
140  
141TESTHR(currentNode-&gt;get_childNodes(&amp;childNodes));   
142PrintChild(childNodes, level+1);   
143currentNode=NULL;   
144} 
145
146fail:   
147if(childNodes!=NULL)   
148childNodes-&gt;Release();   
149if(attributeID!=NULL)   
150attributeID-&gt;Release();   
151if(attributes!=NULL)   
152attributes-&gt;Release();   
153if(currentNode != NULL)   
154currentNode-&gt;Release();   
155} 
156
157int _tmain(int argc, _TCHAR* argv[])   
158{   
159  
160IXMLDOMDocumentPtr xmlFile = NULL;   
161IXMLDOMElement* xmlRoot = NULL;   
162_variant_t varXml(L"C:\\\demo1.xml"); 
163
164logFile = CreateFile(L"log.txt", GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);   
165if(logFile == INVALID_HANDLE_VALUE)   
166goto fail; 
167
168TESTHR(CoInitialize(NULL)); 
169
170TESTHR(xmlFile.CreateInstance("Msxml2.DOMDocument.4.0")); 
171
172VARIANT_BOOL varOut;   
173TESTHR(xmlFile-&gt;load(varXml, &amp;varOut)); 
174
175TESTHR(xmlFile-&gt;get_documentElement(&amp;xmlRoot)); 
176
177BSTR rootName;   
178DWORD dwBytesWritten;   
179TESTHR(xmlRoot-&gt;get_nodeName(&amp;rootName));   
180WriteFile(logFile, rootName, (DWORD)(wcslen(rootName)*sizeof(WCHAR)), &amp;dwBytesWritten, NULL);   
181WriteFile(logFile, L"\r\n", (DWORD)(2*sizeof(WCHAR)), &amp;dwBytesWritten, NULL); 
182
183IXMLDOMNodeList* xmlChildNodes = NULL;   
184TESTHR(xmlRoot-&gt;get_childNodes(&amp;xmlChildNodes)); 
185
186PrintChild(xmlChildNodes, 2); 
187
188fail:   
189if(logFile != INVALID_HANDLE_VALUE)   
190CloseHandle(logFile);   
191if(xmlChildNodes!=NULL)   
192xmlChildNodes-&gt;Release();   
193if(xmlRoot!=NULL)   
194xmlRoot-&gt;Release();   
195return 0;   
196}</level*indent;></msxml4.dll></msxml2.h></windows.h></msxml4.dll></msxml2.h>
Published At
Categories with Web编程
Tagged with
comments powered by Disqus