XML简单分析问题

有如下xml文件:
根节点为reference,其子节点有paper,article,thesis等,且每个子节点都有author,title,year等子节点(0个或多个)。

问题:
对于xml文件中的每个节点,求出其所有的子节点标签名的集合

形如:
reference : {paper,article,thesis...}
paper_1 : {author,title}
thesis_1 : {author,title,year}
thesis_2 : {title}
...

最好用java+SAX实现
---------------------------------------------------------------

what parser are you using? in DOM, each node has a ChildNodes, you just need to write some recursive method to retrieve all subnodes

if you are using MSXML, see

http://www.codeproject.com/soap/ce_xml.asp

---------------------------------------------------------------

using MSXML, vc60

// XMLAnalyst.cpp: implementation of the XMLAnalyst class.
//
//////////////////////////////////////////////////////////////////////

#include "XMLAnalyst.h"

#include

  1<string>   
  2#include <iostream>   
  3#include <fstream>   
  4using namespace std;   
  5  
  6#import <msxml4.dll>   
  7using namespace MSXML2;   
  8  
  9//////////////////////////////////////////////////////////////////////   
 10// Construction/Destruction   
 11//////////////////////////////////////////////////////////////////////   
 12  
 13XMLAnalyst::XMLAnalyst()   
 14{   
 15  
 16}   
 17  
 18XMLAnalyst::~XMLAnalyst()   
 19{   
 20  
 21}   
 22  
 23int XMLAnalyst::childNameAnalysis()   
 24//Traverse the DOM tree, numbering the nodes in the pre-root traverse order,   
 25//get the chileNodeName of every node and output in such format:   
 26// number nodeName {childNodeName}   
 27//The target file is set to dblp_sample.xml   
 28{   
 29if (init())   
 30{   
 31cout &lt;&lt; "Unable to initialize.\n";   
 32return -1;   
 33}   
 34  
 35IXMLDOMNodePtr pNode = pXMLDom-&gt;firstChild;   
 36elementNumber = 0;   
 37traverse(pNode);   
 38cout &lt;&lt; "Traverse complete.\n";   
 39  
 40unInit();   
 41return 0;   
 42}   
 43  
 44int XMLAnalyst::init()   
 45{   
 46CoInitialize(NULL);   
 47  
 48hr= pXMLDom.CreateInstance(__uuidof(DOMDocument40));   
 49if (FAILED(hr))   
 50{   
 51printf("Failed to instantiate an XML DOM.\n");   
 52return -1;   
 53}   
 54  
 55pXMLDom-&gt;async = VARIANT_FALSE; // default - true,   
 56  
 57//Load XML from dblp_sample.xml   
 58cout &lt;&lt; "Loading XML file...\n";   
 59if (pXMLDom-&gt;load("dblp_sample.xml") != VARIANT_TRUE)   
 60{   
 61printf("Failed to load dblp_sample.xml:\n%s\n",   
 62(LPCSTR)pXMLDom-&gt;parseError-&gt;Getreason());   
 63return -1;   
 64}   
 65  
 66outFile.open("z_result_childNameAnalysis.txt");   
 67if (! outFile)   
 68{   
 69cout &lt;&lt; "Cannot open file to write result.\n";   
 70return -1;   
 71}   
 72outFile &lt;&lt; "number" &lt;&lt; '\t' &lt;&lt; "elementName" &lt;&lt; '\t' &lt;&lt; "chileNodeName" &lt;&lt; "\n\n";   
 73  
 74return 0;   
 75}   
 76  
 77int XMLAnalyst::unInit()   
 78{   
 79outFile &lt;&lt; endl;   
 80outFile.close();   
 81  
 82outFile_IL &lt;&lt; endl;   
 83outFile_IL.close();   
 84  
 85pXMLDom.Release();   
 86// CoUninitialize();   
 87return 0;   
 88}   
 89  
 90int XMLAnalyst::traverse(IXMLDOMNodePtr pn)   
 91{   
 92visit(pn);   
 93  
 94IXMLDOMNodeListPtr pNodeList = pn-&gt;childNodes;   
 95if (pNodeList)   
 96for(int i=0; i<pnodelist->length; i++)   
 97traverse(pNodeList-&gt;item[i]);   
 98  
 99return 0;   
100}   
101  
102int XMLAnalyst::visit(IXMLDOMNodePtr pn)   
103{   
104if (pn-&gt;hasChildNodes())   
105elementNumber++;   
106  
107createILofNode(pn);   
108  
109if (pn-&gt;hasChildNodes())   
110{   
111if ( (pn-&gt;firstChild == pn-&gt;lastChild) &amp;&amp; (!pn-&gt;firstChild-&gt;hasChildNodes()) )   
112return 0;   
113  
114map<string, int=""> childName;   
115IXMLDOMNodeListPtr pNodeList = pn-&gt;childNodes;   
116IXMLDOMNodePtr pCurrNode;   
117string currChildName;   
118for (int i=0; i<pnodelist->length; i++)   
119{   
120pCurrNode = pNodeList-&gt;item[i];   
121if (pCurrNode-&gt;hasChildNodes)   
122{   
123currChildName = pCurrNode-&gt;nodeName;   
124childName[currChildName]++;   
125}   
126}   
127  
128string name = pn-&gt;nodeName;   
129// cout &lt;&lt; elementNumber &lt;&lt; '\t' &lt;&lt; name &lt;&lt; '\n';   
130outFile &lt;&lt; elementNumber &lt;&lt; '\t' &lt;&lt; name &lt;&lt; '\t';   
131if (name.length()&lt;8)   
132outFile &lt;&lt; '\t';   
133outFile &lt;&lt; '{';   
134map<string, int="">::iterator it = childName.begin();   
135for( ; it != childName.end(); ++it)   
136outFile &lt;&lt; it-&gt;first &lt;&lt; ':' &lt;&lt; it-&gt;second &lt;&lt; ' ';   
137outFile &lt;&lt; '}' &lt;&lt; '\n';   
138}   
139  
140return 0;   
141}   
142\---------------------------------------------------------------   
143  
144还有个定义~~   
145IXMLDOMDocumentPtr pXMLDom;</string,></pnodelist-></string,></pnodelist-></msxml4.dll></fstream></iostream></string>
Published At
Categories with Web编程
comments powered by Disqus