有如下xml文件:
根节点为reference,其子节点有paper,article,thesis等,且每个子节点都有author,title,year等子节点(0个或多个)。
问题:
对于xml文件中的每个节点,求出其所有的子节点标签名的集合
形如:
reference : {paper,article,thesis...}
paper_1 : {author,title}
thesis_1 : {author,title,year}
thesis_2 : {title}
...
最好用java+SAX实现
---------------------------------------------------------------
what parser are you using? in DOM, each node has a ChildNodes, you just need to write some recursive method to retrieve all subnodes
if you are using MSXML, see
http://www.codeproject.com/soap/ce_xml.asp
---------------------------------------------------------------
using MSXML, vc60
// XMLAnalyst.cpp: implementation of the XMLAnalyst class.
//
//////////////////////////////////////////////////////////////////////
#include "XMLAnalyst.h"
#include
1<string>
2#include <iostream>
3#include <fstream>
4using namespace std;
5
6#import <msxml4.dll>
7using namespace MSXML2;
8
9//////////////////////////////////////////////////////////////////////
10// Construction/Destruction
11//////////////////////////////////////////////////////////////////////
12
13XMLAnalyst::XMLAnalyst()
14{
15
16}
17
18XMLAnalyst::~XMLAnalyst()
19{
20
21}
22
23int XMLAnalyst::childNameAnalysis()
24//Traverse the DOM tree, numbering the nodes in the pre-root traverse order,
25//get the chileNodeName of every node and output in such format:
26// number nodeName {childNodeName}
27//The target file is set to dblp_sample.xml
28{
29if (init())
30{
31cout << "Unable to initialize.\n";
32return -1;
33}
34
35IXMLDOMNodePtr pNode = pXMLDom->firstChild;
36elementNumber = 0;
37traverse(pNode);
38cout << "Traverse complete.\n";
39
40unInit();
41return 0;
42}
43
44int XMLAnalyst::init()
45{
46CoInitialize(NULL);
47
48hr= pXMLDom.CreateInstance(__uuidof(DOMDocument40));
49if (FAILED(hr))
50{
51printf("Failed to instantiate an XML DOM.\n");
52return -1;
53}
54
55pXMLDom->async = VARIANT_FALSE; // default - true,
56
57//Load XML from dblp_sample.xml
58cout << "Loading XML file...\n";
59if (pXMLDom->load("dblp_sample.xml") != VARIANT_TRUE)
60{
61printf("Failed to load dblp_sample.xml:\n%s\n",
62(LPCSTR)pXMLDom->parseError->Getreason());
63return -1;
64}
65
66outFile.open("z_result_childNameAnalysis.txt");
67if (! outFile)
68{
69cout << "Cannot open file to write result.\n";
70return -1;
71}
72outFile << "number" << '\t' << "elementName" << '\t' << "chileNodeName" << "\n\n";
73
74return 0;
75}
76
77int XMLAnalyst::unInit()
78{
79outFile << endl;
80outFile.close();
81
82outFile_IL << endl;
83outFile_IL.close();
84
85pXMLDom.Release();
86// CoUninitialize();
87return 0;
88}
89
90int XMLAnalyst::traverse(IXMLDOMNodePtr pn)
91{
92visit(pn);
93
94IXMLDOMNodeListPtr pNodeList = pn->childNodes;
95if (pNodeList)
96for(int i=0; i<pnodelist->length; i++)
97traverse(pNodeList->item[i]);
98
99return 0;
100}
101
102int XMLAnalyst::visit(IXMLDOMNodePtr pn)
103{
104if (pn->hasChildNodes())
105elementNumber++;
106
107createILofNode(pn);
108
109if (pn->hasChildNodes())
110{
111if ( (pn->firstChild == pn->lastChild) && (!pn->firstChild->hasChildNodes()) )
112return 0;
113
114map<string, int=""> childName;
115IXMLDOMNodeListPtr pNodeList = pn->childNodes;
116IXMLDOMNodePtr pCurrNode;
117string currChildName;
118for (int i=0; i<pnodelist->length; i++)
119{
120pCurrNode = pNodeList->item[i];
121if (pCurrNode->hasChildNodes)
122{
123currChildName = pCurrNode->nodeName;
124childName[currChildName]++;
125}
126}
127
128string name = pn->nodeName;
129// cout << elementNumber << '\t' << name << '\n';
130outFile << elementNumber << '\t' << name << '\t';
131if (name.length()<8)
132outFile << '\t';
133outFile << '{';
134map<string, int="">::iterator it = childName.begin();
135for( ; it != childName.end(); ++it)
136outFile << it->first << ':' << it->second << ' ';
137outFile << '}' << '\n';
138}
139
140return 0;
141}
142\---------------------------------------------------------------
143
144还有个定义~~
145IXMLDOMDocumentPtr pXMLDom;</string,></pnodelist-></string,></pnodelist-></msxml4.dll></fstream></iostream></string>