页面爬虫(获取其他页面HTML)加载到自己页面示例

时间:2022-08-23 21:57:43
代码如下:

//前台 
<div id="showIframe"></div> 
$(document).ready(function() { 
var url = "@Url.Action("GetPageHtml","Catalog")"; 
$.ajax({ 
url: url, 
type: "POST", 
dataType:"json", 
data: { url: "http://www.baidu.com" }, 
error: function () { 
alert("bbb"); 
}, 
success: function (data) { 
$("#showIframe").append(data); 
//$("#showIframe div").hide(); 
//$("#showIframe>#container").show(); 
//$("#showIframe>#container>#content").show(); 
//$("#showIframe>#container>#content>.cmsPage").show(); 

}); 
}); 
//后台 
//爬虫本质,发送URL请求,返回整个页面HTML 
[HttpPost] 
public JsonResult GetPageHtml(string url) 

string pageinfo; 
try 

HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(url); 
myReq.Accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"; 
myReq.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)"; 
HttpWebResponse myRep = (HttpWebResponse)myReq.GetResponse(); 
Stream myStream = myRep.GetResponseStream(); 
StreamReader sr = new StreamReader(myStream, Encoding.Default); 
pageinfo = sr.ReadToEnd().ToString(); 

catch 

pageinfo = ""; 

return Json(pageinfo);