/*
.Net/C#: 利用反射编写通用的 rss 2.0 的 reader
最近在写一个 Simple Rss Reader
网上找到现成代码两种:
1.代码简单的,但不够通用 (如: 本站的一些专用 rss reader)
2.代码复杂的,但没有足够时间去消化 (如: rssbandit)
遂自己动手:
由于 rss 的基本属性大家都有!
但一些特殊不通用属性,如:
slash:comments
wfw:comment
wfw:commentRss
trackbackping
不一定存在! 如何处理???
我想到了 Reflection,就此提出以下解决方案:
1. Class RssHeader 用于表示 Rss 的头信息
你可以在为其添加新属性,原则是:
成员变量 Fieild 的名称为 rss 的 XML 源对应的属性名称前加下划线,XML 属性名称含有 ":" 将其滤掉!
如:
1<dc:language>zh-CHS</dc:language>
将其影射为:
private string _dclanguage
public string DcLanguage
{
get
{
return this._dclanguage;
}
}
2. Class RssItem 用于表示 Rss 的 Item
添加新属性的原则同 RssHeader!
3. 获取 rss 的 XML 源后通过递归遍历节点 (class SimpleRssReader)
根据实际存在的 rss 属性,通过反射,"构造实例化" RssHeader 和 RssItem!
请仔细参阅 class SimpleRssReader 的 Travel 方法!
4. 数据库 (本文使用了 Micrshaoft Data Access Application Block 3.1)
表:
Channels (主表)
ChannelsDetails (细表)
字段名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
存储过程:
SP_AddChannel
SP_AddChannelsDetails
参数名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
命令行编译:
csc SimpleRsReader.cs /r:C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.OracleClient.dll
全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar
*/
namespace Microshaoft
{
using System;
using System.Xml;
using System.Text;
using System.Reflection;
using System.Collections;
using System.Text.RegularExpressions;
全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar
*/
namespace Microshaoft
{
using System;
using System.Xml;
using System.Text;
using System.Reflection;
using System.Collections;
using System.Text.RegularExpressions;
public class RssHeader
{
//feed URL
public RssHeader(string URL)
{
this._URL = URL;
}
public string Title
{
get
{
return this._title;
}
}
public string Description
{
get
{
return this._description;
}
}
public string Link
{
get
{
return this._link;
}
}
public string Language
{
get
{
return this._language;
}
}
public string Generator
{
get
{
return this._generator;
}
}
public string Ttl
{
get
{
return this._ttl;
}
}
public string Copyright
{
get
{
return this._copyright;
}
}
public DateTime PubDate
{
get
{
return Util.ParseDateTime(this._pubDate);
}
}
public string Category
{
get
{
return this._category;
}
}
public DateTime LastBuildDate
{
get
{
return Util.ParseDateTime(this._lastBuildDate);
}
}
public string ManagingEditor
{
get
{
return this._managingEditor;
}
}
public string URL
{
get
{
return this._URL;
}
}
public string DcLanguage
{
get
{
return this._dclanguage;
}
}
//下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
private string _dclanguage; //dc:language
private string _URL;
private string _managingEditor;
private string _lastBuildDate;
private string _title;
private string _description;
private string _link;
private string _language;
private string _generator;
private string _ttl;
private string _copyright;
private string _pubDate;
private string _category;
}
public class RssItem
{
private RssHeader _Header;
public RssHeader Header
{
get
{
return this._Header;
}
}
//下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
private string _title;
private string _link;
private string _description;
private string _category;
private string _author;
private string _pubDate;
private string _comments;
private string _guid;
private string _slashcomments;
private string _wfwcomment;
private string _wfwcommentRss;
private string _trackbackping;
public string TrackbackPing
{
get
{
return this._trackbackping;
}
}
public string WfwCommentRss
{
get
{
return this._wfwcommentRss;
}
}
public string WfwComment
{
get
{
return this._wfwcomment;
}
}
public string SlashComments
{
get
{
return this._slashcomments;
}
}
public string Title
{
get
{
return this._title;
}
}
public string Link
{
get
{
return this._link;
}
}
public string Description
{
get
{
return this._description;
}
}
public string Category
{
get
{
return this._category;
}
}
public string Author
{
get
{
return this._author;
}
}
public DateTime PubDate
{
get
{
return Util.ParseDateTime(this._pubDate);
}
}
public string Comments
{
get
{
return this._comments;
}
}
public string Guid
{
get
{
return this._guid;
}
}
}
public class SimpleRssReader
{
//RssHeader header 解析处理完毕事件
public delegate void RssHeaderReceiveEventHandler(SimpleRssReader Sender, RssHeader Header);
public event RssHeaderReceiveEventHandler RssHeaderReceive;
//某一个 RssItem 解析处理完毕事件
public delegate void RssItemReceiveEventHandler(SimpleRssReader Sender, RssItem Item);
public event RssItemReceiveEventHandler RssItemReceive;
private Type _TRS; //typeof(RssHeader)
private Type _tri; //typeof(RssItem)
private ArrayList _RssItemsAL;
private RssHeader _rs;
public RssHeader RssHeader
{
get
{
return this._rs;
}
}
//用于存储所有的 RssItem
private RssItem[] _RssItems;
public RssItem[] RssItems
{
get
{
return this._RssItems;
}
}
public void Rss(string URL)
{
XmlDocument xd = new XmlDocument();
//如果效率不高可采用 WebRequest 替代
xd.Load(URL);
XmlNodeList xnl = xd.SelectNodes("/rss/channel");
this._rs = new RssHeader(URL);
this._TRS = typeof(RssHeader);
this._tri = typeof(RssItem);
this._RssItemsAL = new ArrayList();
foreach (XmlNode xn in xnl)
{
//递归遍历
this.Travel(xn, 0);
}
if (this._RssItemsAL.Count > 0)
{
this._RssItems = new RssItem[this._RssItemsAL.Count];
int i = 0;
foreach (object o in this._RssItemsAL)
{
this._RssItems[i++] = (RssItem) o;
}
}
}
///
1<header>
2/// 递归遍历
3/// </header>
///
1<param name="xn"/>
节点
///
1<param name="i"/>
项目数
private void Travel(XmlNode xn, int i)
{
if (xn.HasChildNodes)
{
foreach (XmlNode x in xn.ChildNodes)
{
if (x.ParentNode != null)
{
if (x.ParentNode.Name == "channel")
{
if (x.Name == "item")
{
i ++;
if (i >= 1)
{
XmlNode node = null;
bool b = false; //是否是 Rss Item
RssItem ri = null;
if (i == 1) //Header
{
node = xn;
b = false;
}
else if (i > 1) //Item
{
node = x;
b = true;
ri = new RssItem();
}
foreach (XmlNode n in node.ChildNodes)
{
if (n.Name != "item")
{
if (!b) //Rss Header Header
{
//根据 XML 实际存在的属性,利用反射为 RssHeader 实例的私有成员赋值
FieldInfo fi = this.TRS.GetField("" + n.Name.Replace(":","") ,BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(this._rs,n.InnerText);
}
}
else //Rss Item
{
//根据 XML 实际存在的属性,利用反射为 RssItem 实例的私有成员赋值
FieldInfo fi = this.tri.GetField("" + n.Name.Replace(":",""),BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(ri,n.InnerText);
}
}
}
}
if (!b)
{
//触发 RssHeaderReceive 事件
if (this.RssHeaderReceive != null)
{
this.RssHeaderReceive(this,this._rs);
}
}
else
{
//制定 RssItem 实例的 Header/Header
FieldInfo fi = this._tri.GetField("_Header",BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(ri,this._rs);
}
//触发 RssItemReceive 事件
if (this.RssItemReceive != null)
{
this.RssItemReceive(this,ri);
}
this._RssItemsAL.Add(ri);
}
}
}
}
}
if (!x.HasChildNodes)
{
this.Travel(x, i);
}
}
}
}
}
public class Util
{
public static DateTime ParseDateTime(string s)
{
DateTime dt;
if (s == null || s.ToString().Length <= 0)
{
dt = DateTime.Now;
}
else
{
try
{
dt = DateTime.Parse(s);
}
catch
{
dt = DateTime.Now;
}
}
return dt;
}
///
1<header>
2/// 去除 HTML tag
3/// </header>
///
1<param name="HTML"/>
源
///
1<returns>结果</returns>
public static string StripHTML(string HTML) //google "StripHTML" 得到
{
string[] Regexs =
{
@"
1<script[^>]*?>.*?",
2@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])( \\\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s *)?>",
3@"([\r\n])[\s]+",
4@"&(quot|#34);",
5@"&(amp|#38);",
6@"&(lt|#60);",
7@"&(gt|#62);",
8@"&(nbsp|#160);",
9@"&(iexcl|#161);",
10@"&(cent|#162);",
11@"&(pound|#163);",
12@"&(copy|#169);",
13@"&#(\d+);",
14@"-->",
15@"<!--.*\n"
16};
17
18string[] Replaces =
19{
20"",
21"",
22"",
23"\"",
24"&",
25"<",
26">",
27" ",
28"\xa1", //chr(161),
29"\xa2", //chr(162),
30"\xa3", //chr(163),
31"\xa9", //chr(169),
32"",
33"\r\n",
34""
35};
36
37string s = HTML;
38for (int i = 0; i < Regexs.Length; i++)
39{
40s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
41}
42s.Replace("<", "");
43s.Replace(">", "");
44s.Replace("\r\n", "");
45return s;
46}
47}
48}
49
50//测试程序
51namespace Test
52{
53using System;
54using System.Data;
55using System.Reflection;
56using System.Data.SqlClient;
57
58using Microshaoft;
59using Microshaoft.Data;
60
61class ConsoleApplication
62{
63private SqlConnection _Connection;
64public string _Channel;
65
66public SqlConnection Connection
67{
68set
69{
70this._Connection = value;
71}
72get
73{
74return this._Connection;
75}
76}
77
78static void Main()
79{
80
81string s = " http://www.ccw.com.cn/rss/news2/1.xml ";
82s = " http://dzh.mop.com/topic/rss.jsp?type=28 ";
83s = " http://www.ccw.com.cn/rss/news2/15.xml ";
84s = " http://www.cnblogs.com/rss.aspx?id=-1 ";
85s = " http://localhost/rss.xml ";
86//s = " http://weblog.siliconvalley.com/column/dangillmor/index.xml ";
87//s= " http://www.skyone.com.cn/sub/rss/list_jjsc.xml ";
88
89ConsoleApplication a = new ConsoleApplication();
90
91a.Connection = new SqlConnection("server=SERVER\\\PSQLKE;user id=sa;password=;database=rss");
92a.Connection.Open();
93
94SimpleRssReader srr = new SimpleRssReader();
95
96srr.RssHeaderReceive += new Microshaoft.SimpleRssReader.RssHeaderReceiveEventHandler(a.srr_RssHeaderReceive);
97srr.RssItemReceive +=new Microshaoft.SimpleRssReader.RssItemReceiveEventHandler(a.srr_RssItemReceive);
98
99System.Console.WriteLine("waiting ....");
100srr.Rss(s); //以后改成多线程或异步
101
102System.Console.WriteLine("print all rss Header and items ....");
103System.Console.ReadLine();
104System.Console.WriteLine("Header: "+ srr.RssHeader.Title);
105foreach (RssItem ri in srr.RssItems)
106{
107System.Console.WriteLine("item: " + ri.Title);
108}
109System.Console.ReadLine();
110
111}
112
113private void srr_RssHeaderReceive(SimpleRssReader Sender, RssHeader Header)
114{
115System.Console.WriteLine("Header:" + Header.Link);
116System.Console.WriteLine("Header:" + Header.Title);
117
118this.SaveToDataBase("SP_AddChannel",typeof(RssHeader),Header);
119
120}
121
122private void srr_RssItemReceive(SimpleRssReader Sender, RssItem Item)
123{
124System.Console.WriteLine("Item: " + Item.Title);
125System.Console.WriteLine("Item: " + Item.Link);
126System.Console.WriteLine("Item: " + Util.StripHTML(Item.Description));
127
128this.SaveToDataBase("SP_AddChannelsDetails",typeof(RssItem),Item);
129
130}
131private void SaveToDataBase(string sp, Type t,object instance)
132{
133//获取 sp 所有参数
134SqlParameter[] spa = SqlHelperParameterCache.GetSpParameterSet(this.Connection, sp);
135System.Collections.Hashtable ht = new System.Collections.Hashtable();
136
137for (int i = 0; i < spa.Length; i++)
138{
139//保存 参数名称与其位置(次序) 的关系
140ht.Add(spa[i].ParameterName.ToLower().Replace("@", ""), i);
141
142//相当于为存储过程的所有参数赋初值
143spa[i].Value = null;
144}
145
146//得到所有的属性
147PropertyInfo[] pi = t.GetProperties();
148foreach (PropertyInfo x in pi)
149{
150if (ht.ContainsKey( x.Name.ToLower()))
151{
152//根据参数(属性)名称得到参数的次序!
153int i = (int) ht[x.Name.ToLower()];
154if (spa[i].Direction == System.Data.ParameterDirection.Input || spa[i].Direction == System.Data.ParameterDirection.InputOutput)
155{
156object o;
157if (x.PropertyType.Name == "String")
158{
159o = x.GetValue(instance,null);
160if (o != null)
161{
162string s = Util.StripHTML((string) o);
163o = s;
164}
165}
166else
167{
168o = x.GetValue(instance,null);
169}
170
171spa[i].Value = o;
172}
173}
174
175}
176
177if (t == typeof(RssItem))
178{
179spa[0].Value = ((RssItem) instance).Header.URL;
180}
181
182SqlHelper.ExecuteNonQuery(this.Connection, CommandType.StoredProcedure, sp, spa);
183if (spa[spa.Length - 1].Value != System.DBNull.Value)
184{
185System.Console.WriteLine("Save to ID: {0} successful!", spa[spa.Length - 1].Value);
186}
187else
188{
189System.Console.WriteLine("save failed! may be duplicate!");
190}
191}
192}
193}
194
195//==========================================================================================================
196/*
197\--sql Script
198if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannel]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
199drop procedure [dbo].[SP_AddChannel]
200GO
201
202if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannelsDetails]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
203drop procedure [dbo].[SP_AddChannelsDetails]
204GO
205
206if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Channels]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
207drop table [dbo].[Channels]
208GO
209
210if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ChannelsDetails]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
211drop table [dbo].[ChannelsDetails]
212GO
213
214CREATE TABLE [dbo].[Channels] (
215[ID] [int] IDENTITY (1, 1) NOT NULL ,
216[URL] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
217[Channel] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
218[Title] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
219[Description] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
220[link] [varchar] (500) COLLATE Chinese_PRC_CI_AS NULL ,
221[language] [varchar] (10) COLLATE Chinese_PRC_CI_AS NULL ,
222[generator] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
223[ttl] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
224[copyright] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
225[pubDate] [datetime] NULL ,
226[category] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
227[dclanguage] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL
228) ON [PRIMARY]
229GO
230
231CREATE TABLE [dbo].[ChannelsDetails] (
232[ID] [int] IDENTITY (1, 1) NOT NULL ,
233[ChannelID] [int] NULL ,
234[title] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
235[link] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
236[description] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
237[category] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
238[author] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
239[pubDate] [datetime] NULL ,
240[comments] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
241[guid] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
242[trackbackping] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL
243) ON [PRIMARY]
244GO
245
246SET QUOTED_IDENTIFIER ON
247GO
248SET ANSI_NULLS ON
249GO
250
251
252CREATE proc SP_AddChannel
253@URL varchar(8000)
254,@link varchar(8000)
255,@Channel varchar(8000)
256,@Title varchar(8000)
257,@Image varchar(8000)
258,@Description varchar(7999)
259,@language varchar(8000)
260,@generator varchar(8000)
261,@ttl varchar(8000)
262,@copyright varchar(8000)
263,@pubDate datetime
264,@category varchar(8000)
265,@Docs varchar(8000)
266,@ManagingEditor varchar(8000)
267,@dclanguage varchar(8000)
268,@ int out
269as
270set @ = 0
271insert into Channels ([URL],[Channel],[Title],[Description],[link],[language],[generator],[ttl],[copyright],[pubDate],[category],[dclanguage])
272select @URL,@Channel,@Title,@Description,@link,@language,@generator,@ttl,@copyright,@pubDate,@category,@dclanguage
273where not exists(select 1 from Channels where [URL] = @URL)
274select @ = SCOPE_IDENTITY()
275GO
276SET QUOTED_IDENTIFIER OFF
277GO
278SET ANSI_NULLS ON
279GO
280
281SET QUOTED_IDENTIFIER ON
282GO
283SET ANSI_NULLS ON
284GO
285
286CREATE proc SP_AddChannelsDetails
287@URL varchar(8000)
288,@Title varchar(8000)
289,@Description varchar(7000)
290,@link varchar(8000)
291,@pubDate datetime
292,@category varchar(8000)
293,@Comments varchar(8000)
294,@Guid varchar(8000)
295,@trackbackping varchar(8000)
296,@ int out
297as
298set @ = 0
299insert into ChannelsDetails ([ChannelID],[Title],[Description],[link],[pubDate],[category],[comments],[guid],[trackbackping])
300select id,@Title,@Description,@link,@pubDate,@category,@comments,isnull(@guid,@link),@trackbackping
301from Channels
302where not exists (select 1 from ChannelsDetails where guid = isnull(@guid,@link)) and URL = @URL
303select @ = SCOPE_IDENTITY()
304GO
305SET QUOTED_IDENTIFIER OFF
306GO
307SET ANSI_NULLS ON
308GO
309*/</script[^>