教你一次下载网页中的所有资源

看过一篇关于下载网页中图片的文章,它只能下载以http头的图片,我做了些改进,可以下载网页中的所有连接资源,并按照网页中的目录结构建立本地目录,存放资源。
download.asp?url=你要下载的网页

download.asp代码如下

  1   
  2Server.ScriptTimeout=9999   
  3function SaveToFile(from,tofile)   
  4on error resume next   
  5dim geturl,objStream,imgs   
  6geturl=trim(from)   
  7Mybyval=getHTTPstr(geturl)   
  8Set objStream = Server.CreateObject("ADODB.Stream")   
  9objStream.Type =1   
 10objStream.Open   
 11objstream.write Mybyval   
 12objstream.SaveToFile tofile,2   
 13objstream.Close()   
 14set objstream=nothing   
 15if err.number<>0 then err.Clear   
 16end function 
 17
 18function geturlencodel(byval url)'中文文件名转换   
 19Dim i,code   
 20geturlencodel=""   
 21if trim(Url)="" then exit function   
 22for i=1 to len(Url)   
 23code=Asc(mid(Url,i,1))   
 24if code<0 Then code = code + 65536   
 25If code>255 Then   
 26geturlencodel=geturlencodel&"%"&Left(Hex(Code),2)&"%"&Right(Hex(Code),2)   
 27else   
 28geturlencodel=geturlencodel∣(Url,i,1)   
 29end if   
 30next   
 31end function   
 32function getHTTPPage(url)   
 33on error resume next   
 34dim http   
 35set http=Server.createobject("Msxml2.XMLHTTP")   
 36Http.open "GET",url,false   
 37Http.send()   
 38if Http.readystate<>4 then exit function   
 39getHTTPPage=bytes2BSTR(Http.responseBody)   
 40set http=nothing   
 41if err.number<>0 then err.Clear   
 42end function 
 43
 44Function bytes2BSTR(vIn)   
 45dim strReturn   
 46dim i,ThisCharCode,NextCharCode   
 47strReturn = ""   
 48For i = 1 To LenB(vIn)   
 49ThisCharCode = AscB(MidB(vIn,i,1))   
 50If ThisCharCode < &H80 Then   
 51strReturn = strReturn & Chr(ThisCharCode)   
 52Else   
 53NextCharCode = AscB(MidB(vIn,i+1,1))   
 54strReturn = strReturn & Chr(CLng(ThisCharCode) * &H100 + CInt(NextCharCode))   
 55i = i + 1   
 56End If   
 57Next   
 58bytes2BSTR = strReturn   
 59End Function   
 60  
 61function getFileName(byval filename)   
 62if instr(filename,"/")>0 then   
 63fileExt_a=split(filename,"/")   
 64getFileName=lcase(fileExt_a(ubound(fileExt_a)))   
 65if instr(getFileName,"?")>0 then   
 66getFileName=left(getFileName,instr(getFileName,"?")-1)   
 67end if   
 68else   
 69getFileName=filename   
 70end if   
 71end function 
 72
 73  
 74function getHTTPstr(url)   
 75on error resume next   
 76dim http   
 77set http=server.createobject("MSXML2.XMLHTTP")   
 78Http.open "GET",url,false   
 79Http.send()   
 80if Http.readystate<>4 then exit function   
 81getHTTPstr=Http.responseBody   
 82set http=nothing   
 83if err.number<>0 then err.Clear   
 84end function 
 85
 86  
 87Function CreateDIR(ByVal LocalPath) '建立目录的程序,如果有多级目录,则一级一级的创建   
 88On Error Resume Next   
 89LocalPath = Replace(LocalPath, "\", "/")   
 90Set FileObject = server.CreateObject("Scripting.FileSystemObject")   
 91patharr = Split(LocalPath, "/")   
 92path_level = UBound(patharr)   
 93For I = 0 To path_level   
 94If I = 0 Then pathtmp = patharr(0) & "/" Else pathtmp = pathtmp & patharr(I) & "/"   
 95cpath = Left(pathtmp, Len(pathtmp) - 1)   
 96If Not FileObject.FolderExists(cpath) Then FileObject.CreateFolder cpath 
 97
 98Next   
 99Set FileObject = Nothing   
100If Err.Number <> 0 Then   
101CreateDIR = False   
102Err.Clear   
103Else   
104CreateDIR = True   
105End If   
106End Function   
107function GetfileExt(byval filename)   
108fileExt_a=split(filename,".")   
109GetfileExt=lcase(fileExt_a(ubound(fileExt_a)))   
110end function 
111
112function getvirtual(str,path,urlhead)   
113if left(str,7)="http://" then   
114url=str   
115elseif left(str,1)="/" then   
116start=instrRev(str,"/")   
117if start=1 then   
118url="/"   
119else   
120url=left(str,start)   
121end if   
122url=urlhead&url   
123elseif left(str,3)="../" then   
124str1=mid(str,inStrRev(str,"../")+2)   
125ar=split(str,"../")   
126lv=ubound(ar)+1   
127ar=split(path,"/")   
128url="/"   
129for i=1 to (ubound(ar)-lv)   
130url=url&ar(i)   
131next   
132url=url&str1   
133url=urlhead&url   
134else   
135url=urlhead&str   
136end if   
137getvirtual=url   
138end function   
139'示例代码   
140dim dlpath 
141
142virtual="/downweb/"   
143truepath=server.MapPath(virtual) 
144
145if request("url")<> "" then   
146url=request("url")   
147fn=getFileName(url)   
148urlhead=left(url,(instr(replace(url,"//",""),"/")+1))   
149urlpath=replace(left(url,instrRev(url,"/")),urlhead,"")   
150strContent = getHTTPPage(url)   
151mystr=strContent   
152Set objRegExp = New Regexp   
153objRegExp.IgnoreCase = True   
154objRegExp.Global = True   
155objRegExp.Pattern = "(src|href)=.[^\>]+? "   
156Set Matches =objRegExp.Execute(strContent)   
157For Each Match in Matches   
158str=Match.Value   
159str=replace(str,"src=","")   
160str=replace(str,"href=","")   
161str=replace(str,"""","")   
162str=replace(str,"'","")   
163filename=GetfileName(str)   
164getRet=getVirtual(str,urlpath,urlhead)   
165temp=Replace(getRet,"//","**")   
166start=instr(temp,"/")   
167endt=instrRev(temp,"/")-start+1   
168if start>0 then   
169repl=virtual∣(temp,start)&" "   
170'response.Write repl&"

<br/>

1"   
2mystr=Replace(mystr,str,repl) 
3
4dir=mid(temp,start,endt)   
5temp=truepath&Replace(dir,"/","\")   
6CreateDir(temp)   
7'response.Write getRet&"||"&temp&filename&"

<br/>

<br/>

1"   
2SaveToFile getRet,temp&filename   
3end if   
4Next   
5set Matches=nothing   
6end if   
Published At
Categories with Web编程
Tagged with
comments powered by Disqus