利用正则表达式将html网页数据变成Web Service

这次的题目很简单,中国银行有一个查当天汇率的网页( http://www.bank-of-china.com/info/qpindex.shtml ),不过是传统的Html格式,而其又没有提供Xml格式或者WebService查询。现在如果希望其他的信息系统能够随时读取其中的数据,那么方便的莫过于中行提供一个WebService接口供大家调用,这也是典型的安全的WebService应用。可惜中行没有做,那么我们能不能自己来做呢?当然可以,只要用程序分析其html网页,那么就可以很容易的读取其中的数据。文本分析,当然要看我们的"Regular Expression"(呵呵,其实这才是写这个程序的真实目的 -- 应用正则表达式。)

中行的页面类似于:

**

** 日期: ** **2004/09/30 有效期至2004/10/07

货币名称现汇买入价现钞买入价卖出价基准价
英镑

1488.1700

|

1453.1500

|

1492.6400

|

港币 |

105.9700

|

105.3300

|

106.2900

|

106.1100

美元 |

826.4200

|

821.4500

|

828.9000

|

827.6600

瑞士法郎 |

655.9300

|

641.1400

|

659.2200

|

新加坡元 |

488.7600

|

477.2600

|

490.2300

|

瑞典克朗 |

112.4900

|

109.8400

|

112.8300

|

丹麦克朗 |

136.5900

|

133.3700

|

137.0000

|

挪威克朗 |

121.9500

|

119.0800

|

122.3100

|

日元 |

7.4344

|

7.3785

|

7.4717

|

7.4519

加拿大元 |

650.8000

|

635.4800

|

652.7600

|

澳大利亚元 |

591.9900

|

578.6400

|

594.9600

|

欧元 |

1019.6400

|

1010.9600

|

1022.7000

|

1019.7000

澳门元 |

103.2200

|

102.6000

|

103.5300

|

菲律宾比索 |

14.6700

|

14.3300

|

14.7200

|

泰国铢 |

19.9000

|

19.4300

|

19.9600

|

新西兰元 |

553.7000

|

|

555.3600

|

**

对其代码分析后,给出了一个正则表达式,当然这个表达式还不完善,但是针对目前比较固定的中行的汇率页面来说,暂时还没有问题。

@ "

  1<tr bgcolor="#\w+"><td height="20">(?<currency>.*)</currency></td>\s*"  \+   
  2@  "<td height="20"><p align="right">(?<bankbuytt>\d*.?\d*)( )+.?</bankbuytt></p></td>\s*"  \+   
  3@  "<td height="20"><p align="right">(?<buynotes>\d*.?\d*)( )+.?</buynotes></p></td>\s*"  \+   
  4@  "<td height="20"><p align="right">(?<sell>\d*.?\d*)( )+.?</sell></p></td>\s*"  \+   
  5@  "<td height="20"><p align="right">(?<base/>\d*.?\d*)( )+.?</p></td>\s*"   
  6
  7
  8然后过滤就非常简单了。我一直以为代码是最好的说明,特别是对于优雅的语言来说,因为我就不多说了,代码伺候。 
  9
 10这是所建WebService页面ForeignExchange.asmx的代码: 
 11
 12using System  ;   
 13using System  .  Collections  ;   
 14using System  .  ComponentModel  ;   
 15using System  .  Data  ;   
 16using System  .  Diagnostics  ;   
 17using System  .  Web  ;   
 18using System  .  Net  ;   
 19using System  .  Web  .  Services  ;   
 20using System  .  Xml  ;   
 21using System  .  Text  ;   
 22using System  .  Text  .  RegularExpressions  ;   
 23using System  .  IO  ;   
 24  
 25namespace ChinaBank   
 26{   
 27/// <summary>   
 28/// Summary description for ForeignExchange.   
 29/// </summary>   
 30[  WebService  (  Namespace  =  "http://dancefires.com/ChinaBank/"  )]   
 31public  class  ForeignExchange  :  System  .  Web  .  Services  .  WebService   
 32{   
 33public ForeignExchange  ()   
 34{   
 35//CODEGEN: This call is required by the ASP.NET Web Services Designer   
 36InitializeComponent  ();   
 37}   
 38  
 39#region Component Designer generated code   
 40  
 41//Required by the Web Services Designer   
 42private IContainer components  =  null  ;   
 43  
 44/// <summary>   
 45/// Required method for Designer support - do not modify   
 46/// the contents of this method with the code editor.   
 47/// </summary>   
 48private void InitializeComponent  ()   
 49{   
 50}   
 51  
 52/// <summary>   
 53/// Clean up any resources being used.   
 54/// </summary>   
 55protected override void Dispose  (  bool disposing  )   
 56{   
 57if(  disposing  &amp;&amp; components  !=  null  )   
 58{   
 59components  .  Dispose  ();   
 60}   
 61base  .  Dispose  (  disposing  );   
 62}   
 63  
 64#endregion   
 65  
 66[  WebMethod  ]   
 67public XmlDataDocument GetForeignExchangeRates  ()   
 68{   
 69return  getXmlDoc  ();   
 70}   
 71[  WebMethod  ]   
 72public DataSet GetForeignExchangeRatesDataSet  ()   
 73{   
 74return  getXmlDoc  ().  DataSet  ;   
 75}   
 76[  WebMethod  ]   
 77public string GetBankPage  ()   
 78{   
 79return  getWebContent  (  "http://www.bank-of-china.com/info/whjrpj.html"  );   
 80}   
 81// private methods   
 82private string getWebContent  (  string url  )   
 83{   
 84using  (  WebClient client  = new  WebClient  () )   
 85{   
 86byte  []  buffer  =  client  .  DownloadData  (  url  );   
 87string str  =  Encoding  .  GetEncoding  (  "GB2312"  ).  GetString  (  buffer  ,  0  ,  buffer  .  Length  );   
 88return  str  ;   
 89}   
 90}   
 91private XmlDataDocument getXmlDoc  ()   
 92{   
 93string webcontent  =  getWebContent  (  "http://www.bank-of-china.com/info/whjrpj.html"  );   
 94  
 95// Prepair for DataSet   
 96DataSet ds  = new  DataSet  (  "Exchange"  );   
 97DataTable dt  = new  DataTable  (  "ForeignExchange"  );   
 98ds  .  Tables  .  Add  (  dt  );   
 99dt  .  Columns  .  Add  (  "Currency"  ,  typeof  (string) );   
100dt  .  Columns  .  Add  (  "BankBuyTT"  ,  typeof  (double) );   
101dt  .  Columns  .  Add  (  "BankBuyNotes"  ,  typeof  (double) );   
102dt  .  Columns  .  Add  (  "BankSell"  ,  typeof  (double) );   
103dt  .  Columns  .  Add  (  "Baseline"  ,  typeof  (double) );   
104XmlDataDocument xmldoc  = new  XmlDataDocument  (  ds  );   
105  
106Regex expr  = new  Regex  (   
107@  "<tr bgcolor="#\w+"><td height="20">(?<currency>.*)</currency></td>\s*"  \+   
108@  "<td height="20"><p align="right">(?<bankbuytt>\d*.?\d*)( )+.?</bankbuytt></p></td>\s*"  \+   
109@  "<td height="20"><p align="right">(?<buynotes>\d*.?\d*)( )+.?</buynotes></p></td>\s*"  \+   
110@  "<td height="20"><p align="right">(?<sell>\d*.?\d*)( )+.?</sell></p></td>\s*"  \+   
111@  "<td height="20"><p align="right">(?<base/>\d*.?\d*)( )+.?</p></td>\s*"   
112,  RegexOptions  .  Compiled  );   
113for(  Match m  =  expr  .  Match  (  webcontent  ) ;  m  .  Success  ;  m  =  m  .  NextMatch  () )   
114{   
115string key  ;   
116DataRow row  =  dt  .  NewRow  ();   
117row  [  "Currency"  ] =  m  .  Groups  [  "currency"  ];   
118key  =  m  .  Groups  [  "bankbuytt"  ].  ToString  ();   
119row  [  "BankBuyTT"  ] =  key  .  Length  &gt; 0  ?  Convert  .  ToDouble  (  key  )/  100  :  0  ;   
120key  =  m  .  Groups  [  "buynotes"  ].  ToString  ();   
121row  [  "BankBuyNotes"  ] =  key  .  Length  &gt; 0  ?  Convert  .  ToDouble  (  key  )/  100  :  0  ;   
122key  =  m  .  Groups  [  "sell"  ].  ToString  ();   
123row  [  "BankSell"  ] =  key  .  Length  &gt; 0  ?  Convert  .  ToDouble  (  key  )/  100  :  0  ;   
124key  =  m  .  Groups  [  "base"  ].  ToString  ();   
125row  [  "Baseline"  ] =  key  .  Length  &gt; 0  ?  Convert  .  ToDouble  (  key  )/  100  :  0  ;   
126dt  .  Rows  .  Add  (  row  );   
127}   
128return  xmldoc  ;   
129}   
130}   
131}   
132
133
134客户端也很容易,只要用wsdl生成了相应的WebService Proxy后,直接调用就行了,由于我让Server端返回了DataSet,因此客户端直接用DataGrid来显示DataSet即可,非常Easy,在这个问题上客户端没有什么技术关键点。 
135
136using System  ;   
137using System  .  Threading  ;   
138using System  .  Drawing  ;   
139using System  .  Collections  ;   
140using System  .  ComponentModel  ;   
141using System  .  Windows  .  Forms  ;   
142  
143namespace BankDataClient   
144{   
145/// <summary>   
146/// Summary description for frmMainBankRates.   
147/// </summary>   
148public  class  frmMainBankRates  :  System  .  Windows  .  Forms  .  Form   
149{   
150private System  .  Windows  .  Forms  .  DataGrid dataGrid1  ;   
151private System  .  Windows  .  Forms  .  Button btnConnect  ;   
152private System  .  Data  .  DataSet ds  ;   
153private BankDataClient  .  com  .  dancefires  .[  url  ]  www  .  ForeignExchange  [/  url  ]  proxy  = new  BankDataClient  .  com  .  dancefires  .  www  .  ForeignExchange  ();   
154private System  .  Windows  .  Forms  .  TextBox txtUrl  ;   
155/// <summary>   
156/// Required designer variable.   
157/// </summary>   
158private System  .  ComponentModel  .  Container components  =  null  ;   
159  
160public frmMainBankRates  ()   
161{   
162//   
163// Required for Windows Form Designer support   
164//   
165InitializeComponent  ();   
166try   
167{   
168txtUrl  .  Text  =  System  .  Configuration  .  ConfigurationSettings  .  AppSettings  [  "url"  ];   
169proxy  .  Url  =  txtUrl  .  Text  ;   
170}   
171catch  (  Exception  )   
172{   
173proxy  .  Url  =  "http://www.dancefires.com/ChinaBank/ForeignExchange.asmx"  ;   
174txtUrl  .  Text  =  proxy  .  Url  ;   
175}   
176}   
177  
178/// <summary>   
179/// Clean up any resources being used.   
180/// </summary>   
181protected override void Dispose  (  bool disposing  )   
182{   
183if(  disposing  )   
184{   
185if(  components  !=  null  )   
186{   
187components  .  Dispose  ();   
188}   
189}   
190base  .  Dispose  (  disposing  );   
191}   
192  
193#region Windows Form Designer generated code   
194/// <summary>   
195/// Required method for Designer support - do not modify   
196/// the contents of this method with the code editor.   
197/// </summary>   
198private void InitializeComponent  ()   
199{   
200this  .  dataGrid1  = new  System  .  Windows  .  Forms  .  DataGrid  ();   
201this  .  ds  = new  System  .  Data  .  DataSet  ();   
202this  .  btnConnect  = new  System  .  Windows  .  Forms  .  Button  ();   
203this  .  txtUrl  = new  System  .  Windows  .  Forms  .  TextBox  ();   
204((  System  .  ComponentModel  .  ISupportInitialize  )(  this  .  dataGrid1  )).  BeginInit  ();   
205((  System  .  ComponentModel  .  ISupportInitialize  )(  this  .  ds  )).  BeginInit  ();   
206this  .  SuspendLayout  ();   
207//   
208// dataGrid1   
209//   
210this  .  dataGrid1  .  DataMember  =  ""  ;   
211this  .  dataGrid1  .  DataSource  =  this  .  ds  ;   
212this  .  dataGrid1  .  HeaderForeColor  =  System  .  Drawing  .  SystemColors  .  ControlText  ;   
213this  .  dataGrid1  .  Location  = new  System  .  Drawing  .  Point  (  32  ,  48  );   
214this  .  dataGrid1  .  Name  =  "dataGrid1"  ;   
215this  .  dataGrid1  .  Size  = new  System  .  Drawing  .  Size  (  480  ,  256  );   
216this  .  dataGrid1  .  TabIndex  =  0  ;   
217//   
218// ds   
219//   
220this  .  ds  .  DataSetName  =  "Exchange"  ;   
221this  .  ds  .  Locale  = new  System  .  Globalization  .  CultureInfo  (  "zh-CN"  );   
222//   
223// btnConnect   
224//   
225this  .  btnConnect  .  Location  = new  System  .  Drawing  .  Point  (  432  ,  16  );   
226this  .  btnConnect  .  Name  =  "btnConnect"  ;   
227this  .  btnConnect  .  TabIndex  =  1  ;   
228this  .  btnConnect  .  Text  =  "连接"  ;   
229this  .  btnConnect  .  Click  += new  System  .  EventHandler  (  this  .  btnConnect_Click  );   
230//   
231// txtUrl   
232//   
233this  .  txtUrl  .  Location  = new  System  .  Drawing  .  Point  (  32  ,  16  );   
234this  .  txtUrl  .  Name  =  "txtUrl"  ;   
235this  .  txtUrl  .  Size  = new  System  .  Drawing  .  Size  (  384  ,  20  );   
236this  .  txtUrl  .  TabIndex  =  2  ;   
237this  .  txtUrl  .  Text  =  ""  ;   
238//   
239// frmMainBankRates   
240//   
241this  .  AutoScaleBaseSize  = new  System  .  Drawing  .  Size  (  5  ,  13  );   
242this  .  ClientSize  = new  System  .  Drawing  .  Size  (  544  ,  318  );   
243this  .  Controls  .  Add  (  this  .  txtUrl  );   
244this  .  Controls  .  Add  (  this  .  btnConnect  );   
245this  .  Controls  .  Add  (  this  .  dataGrid1  );   
246this  .  Name  =  "frmMainBankRates"  ;   
247this  .  Text  =  "Foreign Exchange Rates of Bank of China"  ;   
248((  System  .  ComponentModel  .  ISupportInitialize  )(  this  .  dataGrid1  )).  EndInit  ();   
249((  System  .  ComponentModel  .  ISupportInitialize  )(  this  .  ds  )).  EndInit  ();   
250this  .  ResumeLayout  (  false  );   
251  
252}   
253#endregion   
254  
255private void btnConnect_Click  (  object sender  ,  System  .  EventArgs e  )   
256{   
257UpdateDataGrid  ();   
258}   
259private void UpdateDataGrid  ()   
260{   
261try   
262{   
263btnConnect  .  Enabled  =  false  ;   
264txtUrl  .  ReadOnly  =  true  ;   
265proxy  .  Url  =  txtUrl  .  Text  ;   
266ds  =  proxy  .  GetForeignExchangeRatesDataSet  ();   
267dataGrid1  .  SetDataBinding  (  ds  ,  "ForeignExchange"  );   
268dataGrid1  .  Update  ();   
269}   
270catch  (  Exception err  )   
271{   
272MessageBox  .  Show  (  err  .  Message  );   
273}   
274finally   
275{   
276txtUrl  .  ReadOnly  =  false  ;   
277btnConnect  .  Enabled  =  true  ;   
278}   
279}   
280[  STAThread  ]   
281static  void Main  (  string  []  args  )   
282{   
283Application  .  Run  ( new  frmMainBankRates  () );   
284}   
285}   
286}   
287
288
289有了这个例子,应该可以从中了解最基本的XML, WebService, Regular Expression, DataSet, DataGrid的知识。 
290
291软件所有代码,及相关截屏可以从下面的连接中获得: 
292
293http://www.dancefires.com/ChinaBank/ 
294
295**</tr></tr>
Published At
Categories with Web编程
Tagged with
comments powered by Disqus