.Net/C#: 利用反射编写通用的 rss 2.0 的 reader

/*
.Net/C#: 利用反射编写通用的 rss 2.0 的 reader

最近在写一个 Simple Rss Reader
网上找到现成代码两种:
1.代码简单的,但不够通用 (如: 本站的一些专用 rss reader)
2.代码复杂的,但没有足够时间去消化 (如: rssbandit)

遂自己动手:
由于 rss 的基本属性大家都有!
但一些特殊不通用属性,如:
slash:comments
wfw:comment
wfw:commentRss
trackbackping
不一定存在! 如何处理???
我想到了 Reflection,就此提出以下解决方案:
1. Class RssHeader 用于表示 Rss 的头信息
你可以在为其添加新属性,原则是:
成员变量 Fieild 的名称为 rss 的 XML 源对应的属性名称前加下划线,XML 属性名称含有 ":" 将其滤掉!
如:

1<dc:language>zh-CHS</dc:language>

将其影射为:
private string _dclanguage
public string DcLanguage
{
get
{
return this._dclanguage;
}
}

2. Class RssItem 用于表示 Rss 的 Item
添加新属性的原则同 RssHeader!

3. 获取 rss 的 XML 源后通过递归遍历节点 (class SimpleRssReader)
根据实际存在的 rss 属性,通过反射,"构造实例化" RssHeader 和 RssItem!
请仔细参阅 class SimpleRssReader 的 Travel 方法!

4. 数据库 (本文使用了 Micrshaoft Data Access Application Block 3.1)
表:
Channels (主表)
ChannelsDetails (细表)
字段名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
存储过程:
SP_AddChannel
SP_AddChannelsDetails
参数名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!

命令行编译:
csc SimpleRsReader.cs /r:C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.OracleClient.dll

全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar

*/
namespace Microshaoft
{
using System;
using System.Xml;
using System.Text;
using System.Reflection;
using System.Collections;
using System.Text.RegularExpressions;

全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar

*/
namespace Microshaoft
{
using System;
using System.Xml;
using System.Text;
using System.Reflection;
using System.Collections;
using System.Text.RegularExpressions;

public class RssHeader
{
//feed URL
public RssHeader(string URL)
{
this._URL = URL;
}

public string Title
{
get
{
return this._title;
}
}

public string Description
{
get
{
return this._description;
}
}

public string Link
{
get
{
return this._link;
}
}

public string Language
{
get
{
return this._language;
}
}

public string Generator
{
get
{
return this._generator;
}
}

public string Ttl
{
get
{
return this._ttl;
}
}

public string Copyright
{
get
{
return this._copyright;
}
}

public DateTime PubDate
{
get
{
return Util.ParseDateTime(this._pubDate);
}
}

public string Category
{
get
{
return this._category;
}
}

public DateTime LastBuildDate
{
get
{
return Util.ParseDateTime(this._lastBuildDate);
}
}
public string ManagingEditor
{
get
{
return this._managingEditor;
}
}

public string URL
{
get
{
return this._URL;
}
}

public string DcLanguage
{
get
{
return this._dclanguage;
}
}

//下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
private string _dclanguage; //dc:language
private string _URL;
private string _managingEditor;
private string _lastBuildDate;
private string _title;
private string _description;
private string _link;
private string _language;
private string _generator;
private string _ttl;
private string _copyright;
private string _pubDate;
private string _category;

}
public class RssItem
{
private RssHeader _Header;

public RssHeader Header
{
get
{
return this._Header;
}
}

//下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
private string _title;
private string _link;
private string _description;
private string _category;
private string _author;
private string _pubDate;
private string _comments;
private string _guid;
private string _slashcomments;
private string _wfwcomment;
private string _wfwcommentRss;
private string _trackbackping;

public string TrackbackPing
{
get
{
return this._trackbackping;
}
}

public string WfwCommentRss
{
get
{
return this._wfwcommentRss;
}
}

public string WfwComment
{
get
{
return this._wfwcomment;
}
}

public string SlashComments
{
get
{
return this._slashcomments;
}
}
public string Title
{
get
{
return this._title;
}
}

public string Link
{
get
{
return this._link;
}
}

public string Description
{
get
{
return this._description;
}
}

public string Category
{
get
{
return this._category;
}
}

public string Author
{
get
{
return this._author;
}
}

public DateTime PubDate
{
get
{
return Util.ParseDateTime(this._pubDate);
}
}

public string Comments
{
get
{
return this._comments;
}
}

public string Guid
{
get
{
return this._guid;
}
}
}
public class SimpleRssReader
{
//RssHeader header 解析处理完毕事件
public delegate void RssHeaderReceiveEventHandler(SimpleRssReader Sender, RssHeader Header);
public event RssHeaderReceiveEventHandler RssHeaderReceive;

//某一个 RssItem 解析处理完毕事件
public delegate void RssItemReceiveEventHandler(SimpleRssReader Sender, RssItem Item);
public event RssItemReceiveEventHandler RssItemReceive;

private Type _TRS; //typeof(RssHeader)
private Type _tri; //typeof(RssItem)

private ArrayList _RssItemsAL;

private RssHeader _rs;
public RssHeader RssHeader
{
get
{
return this._rs;
}
}

//用于存储所有的 RssItem
private RssItem[] _RssItems;

public RssItem[] RssItems
{
get
{
return this._RssItems;
}
}

public void Rss(string URL)
{
XmlDocument xd = new XmlDocument();
//如果效率不高可采用 WebRequest 替代
xd.Load(URL);
XmlNodeList xnl = xd.SelectNodes("/rss/channel");

this._rs = new RssHeader(URL);

this._TRS = typeof(RssHeader);
this._tri = typeof(RssItem);

this._RssItemsAL = new ArrayList();

foreach (XmlNode xn in xnl)
{
//递归遍历
this.Travel(xn, 0);
}

if (this._RssItemsAL.Count > 0)
{
this._RssItems = new RssItem[this._RssItemsAL.Count];
int i = 0;
foreach (object o in this._RssItemsAL)
{
this._RssItems[i++] = (RssItem) o;
}
}
}

///

1<header>   
2/// 递归遍历   
3/// </header>

///

1<param name="xn"/>

节点
///

1<param name="i"/>

项目数
private void Travel(XmlNode xn, int i)
{
if (xn.HasChildNodes)
{
foreach (XmlNode x in xn.ChildNodes)
{
if (x.ParentNode != null)
{
if (x.ParentNode.Name == "channel")
{
if (x.Name == "item")
{
i ++;
if (i >= 1)
{
XmlNode node = null;
bool b = false; //是否是 Rss Item
RssItem ri = null;
if (i == 1) //Header
{
node = xn;
b = false;
}
else if (i > 1) //Item
{
node = x;
b = true;
ri = new RssItem();
}

foreach (XmlNode n in node.ChildNodes)
{
if (n.Name != "item")
{
if (!b) //Rss Header Header
{
//根据 XML 实际存在的属性,利用反射为 RssHeader 实例的私有成员赋值
FieldInfo fi = this.TRS.GetField("" + n.Name.Replace(":","") ,BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(this._rs,n.InnerText);
}
}
else //Rss Item
{
//根据 XML 实际存在的属性,利用反射为 RssItem 实例的私有成员赋值
FieldInfo fi = this.tri.GetField("" + n.Name.Replace(":",""),BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(ri,n.InnerText);
}
}

}
}
if (!b)
{
//触发 RssHeaderReceive 事件
if (this.RssHeaderReceive != null)
{
this.RssHeaderReceive(this,this._rs);
}
}
else
{
//制定 RssItem 实例的 Header/Header
FieldInfo fi = this._tri.GetField("_Header",BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(ri,this._rs);
}

//触发 RssItemReceive 事件
if (this.RssItemReceive != null)
{
this.RssItemReceive(this,ri);
}
this._RssItemsAL.Add(ri);
}
}
}
}
}
if (!x.HasChildNodes)
{
this.Travel(x, i);
}
}
}
}
}

public class Util
{
public static DateTime ParseDateTime(string s)
{
DateTime dt;
if (s == null || s.ToString().Length <= 0)
{
dt = DateTime.Now;
}
else
{
try
{
dt = DateTime.Parse(s);
}
catch
{
dt = DateTime.Now;
}
}
return dt;
}
///

1<header>   
2/// 去除 HTML tag   
3/// </header>

///

1<param name="HTML"/>


///

1<returns>结果</returns>

public static string StripHTML(string HTML) //google "StripHTML" 得到
{
string[] Regexs =
{
@"

  1<script[^>]*?&gt;.*?",   
  2@"&lt;(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])( \\\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s *)?&gt;",   
  3@"([\r\n])[\s]+",   
  4@"&amp;(quot|#34);",   
  5@"&amp;(amp|#38);",   
  6@"&amp;(lt|#60);",   
  7@"&amp;(gt|#62);",   
  8@"&amp;(nbsp|#160);",   
  9@"&amp;(iexcl|#161);",   
 10@"&amp;(cent|#162);",   
 11@"&amp;(pound|#163);",   
 12@"&amp;(copy|#169);",   
 13@"&amp;#(\d+);",   
 14@"--&gt;",   
 15@"&lt;!--.*\n"   
 16}; 
 17
 18string[] Replaces =   
 19{   
 20"",   
 21"",   
 22"",   
 23"\"",   
 24"&amp;",   
 25"&lt;",   
 26"&gt;",   
 27" ",   
 28"\xa1", //chr(161),   
 29"\xa2", //chr(162),   
 30"\xa3", //chr(163),   
 31"\xa9", //chr(169),   
 32"",   
 33"\r\n",   
 34""   
 35}; 
 36
 37string s = HTML;   
 38for (int i = 0; i &lt; Regexs.Length; i++)   
 39{   
 40s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);   
 41}   
 42s.Replace("&lt;", "");   
 43s.Replace("&gt;", "");   
 44s.Replace("\r\n", "");   
 45return s;   
 46}   
 47}   
 48} 
 49
 50//测试程序   
 51namespace Test   
 52{   
 53using System;   
 54using System.Data;   
 55using System.Reflection;   
 56using System.Data.SqlClient; 
 57
 58using Microshaoft;   
 59using Microshaoft.Data; 
 60
 61class ConsoleApplication   
 62{   
 63private SqlConnection _Connection;   
 64public string _Channel; 
 65
 66public SqlConnection Connection   
 67{   
 68set   
 69{   
 70this._Connection = value;   
 71}   
 72get   
 73{   
 74return this._Connection;   
 75}   
 76} 
 77
 78static void Main()   
 79{   
 80  
 81string s = " http://www.ccw.com.cn/rss/news2/1.xml ";   
 82s = " http://dzh.mop.com/topic/rss.jsp?type=28 ";   
 83s = " http://www.ccw.com.cn/rss/news2/15.xml ";   
 84s = " http://www.cnblogs.com/rss.aspx?id=-1 ";   
 85s = " http://localhost/rss.xml ";   
 86//s = " http://weblog.siliconvalley.com/column/dangillmor/index.xml ";   
 87//s= " http://www.skyone.com.cn/sub/rss/list_jjsc.xml "; 
 88
 89ConsoleApplication a = new ConsoleApplication(); 
 90
 91a.Connection = new SqlConnection("server=SERVER\\\PSQLKE;user id=sa;password=;database=rss");   
 92a.Connection.Open(); 
 93
 94SimpleRssReader srr = new SimpleRssReader(); 
 95
 96srr.RssHeaderReceive += new Microshaoft.SimpleRssReader.RssHeaderReceiveEventHandler(a.srr_RssHeaderReceive);   
 97srr.RssItemReceive +=new Microshaoft.SimpleRssReader.RssItemReceiveEventHandler(a.srr_RssItemReceive); 
 98
 99System.Console.WriteLine("waiting ....");   
100srr.Rss(s); //以后改成多线程或异步 
101
102System.Console.WriteLine("print all rss Header and items ....");   
103System.Console.ReadLine();   
104System.Console.WriteLine("Header: "+ srr.RssHeader.Title);   
105foreach (RssItem ri in srr.RssItems)   
106{   
107System.Console.WriteLine("item: " + ri.Title);   
108}   
109System.Console.ReadLine(); 
110
111} 
112
113private void srr_RssHeaderReceive(SimpleRssReader Sender, RssHeader Header)   
114{   
115System.Console.WriteLine("Header:" + Header.Link);   
116System.Console.WriteLine("Header:" + Header.Title); 
117
118this.SaveToDataBase("SP_AddChannel",typeof(RssHeader),Header); 
119
120} 
121
122private void srr_RssItemReceive(SimpleRssReader Sender, RssItem Item)   
123{   
124System.Console.WriteLine("Item: " + Item.Title);   
125System.Console.WriteLine("Item: " + Item.Link);   
126System.Console.WriteLine("Item: " + Util.StripHTML(Item.Description)); 
127
128this.SaveToDataBase("SP_AddChannelsDetails",typeof(RssItem),Item); 
129
130}   
131private void SaveToDataBase(string sp, Type t,object instance)   
132{   
133//获取 sp 所有参数   
134SqlParameter[] spa = SqlHelperParameterCache.GetSpParameterSet(this.Connection, sp);   
135System.Collections.Hashtable ht = new System.Collections.Hashtable();   
136  
137for (int i = 0; i &lt; spa.Length; i++)   
138{   
139//保存 参数名称与其位置(次序) 的关系   
140ht.Add(spa[i].ParameterName.ToLower().Replace("@", ""), i); 
141
142//相当于为存储过程的所有参数赋初值   
143spa[i].Value = null;   
144} 
145
146//得到所有的属性   
147PropertyInfo[] pi = t.GetProperties();   
148foreach (PropertyInfo x in pi)   
149{   
150if (ht.ContainsKey( x.Name.ToLower()))   
151{   
152//根据参数(属性)名称得到参数的次序!   
153int i = (int) ht[x.Name.ToLower()];   
154if (spa[i].Direction == System.Data.ParameterDirection.Input || spa[i].Direction == System.Data.ParameterDirection.InputOutput)   
155{   
156object o;   
157if (x.PropertyType.Name == "String")   
158{   
159o = x.GetValue(instance,null);   
160if (o != null)   
161{   
162string s = Util.StripHTML((string) o);   
163o = s;   
164}   
165}   
166else   
167{   
168o = x.GetValue(instance,null);   
169}   
170  
171spa[i].Value = o;   
172}   
173}   
174  
175} 
176
177if (t == typeof(RssItem))   
178{   
179spa[0].Value = ((RssItem) instance).Header.URL;   
180} 
181
182SqlHelper.ExecuteNonQuery(this.Connection, CommandType.StoredProcedure, sp, spa);   
183if (spa[spa.Length - 1].Value != System.DBNull.Value)   
184{   
185System.Console.WriteLine("Save to ID: {0} successful!", spa[spa.Length - 1].Value);   
186}   
187else   
188{   
189System.Console.WriteLine("save failed! may be duplicate!");   
190}   
191}   
192}   
193} 
194
195//==========================================================================================================   
196/*   
197\--sql Script   
198if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannel]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)   
199drop procedure [dbo].[SP_AddChannel]   
200GO 
201
202if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannelsDetails]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)   
203drop procedure [dbo].[SP_AddChannelsDetails]   
204GO 
205
206if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Channels]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)   
207drop table [dbo].[Channels]   
208GO 
209
210if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ChannelsDetails]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)   
211drop table [dbo].[ChannelsDetails]   
212GO 
213
214CREATE TABLE [dbo].[Channels] (   
215[ID] [int] IDENTITY (1, 1) NOT NULL ,   
216[URL] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,   
217[Channel] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,   
218[Title] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,   
219[Description] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,   
220[link] [varchar] (500) COLLATE Chinese_PRC_CI_AS NULL ,   
221[language] [varchar] (10) COLLATE Chinese_PRC_CI_AS NULL ,   
222[generator] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,   
223[ttl] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,   
224[copyright] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,   
225[pubDate] [datetime] NULL ,   
226[category] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,   
227[dclanguage] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL   
228) ON [PRIMARY]   
229GO 
230
231CREATE TABLE [dbo].[ChannelsDetails] (   
232[ID] [int] IDENTITY (1, 1) NOT NULL ,   
233[ChannelID] [int] NULL ,   
234[title] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,   
235[link] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,   
236[description] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,   
237[category] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,   
238[author] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,   
239[pubDate] [datetime] NULL ,   
240[comments] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,   
241[guid] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,   
242[trackbackping] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL   
243) ON [PRIMARY]   
244GO 
245
246SET QUOTED_IDENTIFIER ON   
247GO   
248SET ANSI_NULLS ON   
249GO 
250
251  
252CREATE proc SP_AddChannel   
253@URL varchar(8000)   
254,@link varchar(8000)   
255,@Channel varchar(8000)   
256,@Title varchar(8000)   
257,@Image varchar(8000)   
258,@Description varchar(7999)   
259,@language varchar(8000)   
260,@generator varchar(8000)   
261,@ttl varchar(8000)   
262,@copyright varchar(8000)   
263,@pubDate datetime   
264,@category varchar(8000)   
265,@Docs varchar(8000)   
266,@ManagingEditor varchar(8000)   
267,@dclanguage varchar(8000)   
268,@ int out   
269as   
270set @ = 0   
271insert into Channels ([URL],[Channel],[Title],[Description],[link],[language],[generator],[ttl],[copyright],[pubDate],[category],[dclanguage])   
272select @URL,@Channel,@Title,@Description,@link,@language,@generator,@ttl,@copyright,@pubDate,@category,@dclanguage   
273where not exists(select 1 from Channels where [URL] = @URL)   
274select @ = SCOPE_IDENTITY()   
275GO   
276SET QUOTED_IDENTIFIER OFF   
277GO   
278SET ANSI_NULLS ON   
279GO 
280
281SET QUOTED_IDENTIFIER ON   
282GO   
283SET ANSI_NULLS ON   
284GO 
285
286CREATE proc SP_AddChannelsDetails   
287@URL varchar(8000)   
288,@Title varchar(8000)   
289,@Description varchar(7000)   
290,@link varchar(8000)   
291,@pubDate datetime   
292,@category varchar(8000)   
293,@Comments varchar(8000)   
294,@Guid varchar(8000)   
295,@trackbackping varchar(8000)   
296,@ int out   
297as   
298set @ = 0   
299insert into ChannelsDetails ([ChannelID],[Title],[Description],[link],[pubDate],[category],[comments],[guid],[trackbackping])   
300select id,@Title,@Description,@link,@pubDate,@category,@comments,isnull(@guid,@link),@trackbackping   
301from Channels   
302where not exists (select 1 from ChannelsDetails where guid = isnull(@guid,@link)) and URL = @URL   
303select @ = SCOPE_IDENTITY()   
304GO   
305SET QUOTED_IDENTIFIER OFF   
306GO   
307SET ANSI_NULLS ON   
308GO   
309*/</script[^>
Published At
Categories with Web编程
Tagged with
comments powered by Disqus