最近公司需要去XXX网站爬取某些分类下的商铺信息,访问速率过快的时候容易被站点查封本机IP,要求输入验证码,不得已采用代理IP机制来访问。出于公司机密考虑,IP给屏蔽了,大家可以自己去网上找代理IP来使用。
示例代码
package com.yulore.httpclient;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.util.EntityUtils;
public class HttpClientProxy {
/**
* @param args
*/
public static void main(String[] args) {
String content = httpGet();
System.out.println("返回内容:"+content);
}
/**
* java使用代理发送http请求
* @return
*/
public static String httpGet() {
String ip = "p04.xxxxx.cn";
String content = null;
DefaultHttpClient httpclient = null;
try {
httpclient = new DefaultHttpClient();
/** 设置代理IP **/
HttpHost proxy = new HttpHost(ip, 8080);
httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,proxy);
HttpGet httpget = new HttpGet("http://www.baidu.com");
httpget.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT,1000*30);//设置请求超时时间
httpget.setHeader("Proxy-Authorization","Basic eXVsb3JlOll1bG9yZVByb3h5MjAxMzo=");
httpget.setHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1");
httpget.setHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
HttpResponse responses = httpclient.execute(httpget);
HttpEntity entity = responses.getEntity();
content = EntityUtils.toString(entity);
} catch (Exception e) {
e.printStackTrace();
} finally {
httpclient.getConnectionManager().shutdown();//关闭连接
}
return content;
}
}
运行结果
返回内容:<!DOCTYPE html><!--STATUS OK--> <html><head> <meta http-equiv="content-type" content="text/html;charset=utf-8"> <title>百度一下,你就知道</title> <style >html,body{height:100%}html{overflow-y:auto}#wrapper{position:relative;_position:;min-height:100%}#content{padding-bottom:100px;text-align:center}#ftCon{height:100px;position:absolute;bottom:44px;text-align:center;width:100%;margin:0 auto;z-index:0;overflow:hidden}#ftConw{width:720px;margin:0 auto}body{font:12px arial;text-align:;background:#fff}body,p,form,ul,li{margin:0;padding:0;list-style:none}body,form,#fm{position:relative}td{text-align:left}img{border:0}a{color:#00c}a:active{color:#f60}#u{color:#999;padding:4px 10px 5px 0;text-align:right}#u a{margin:0 5px}#u .reg{margin:0}#m{width:720px;margin:0 auto}#nv a,#nv b,.btn,#lk{font-size:14px}#fm{padding-left:110px;text-align:left;z-index:1}input{border:0;padding:0}#nv{height:19px;font-size:16px;margin:0 0 4px;text-align:left;text-indent:137px}.s_ipt_wr{width:418px;height:30px;display:inline-block;margin-right:5px;background:url(http://s1.bdstatic.com/r/www/img/i-1.0.0.png) no-repeat -304px 0;border:1px solid #b6b6b6;border-color:#9a9a9a #cdcdcd #cdcdcd #9a9a9a;vertical-align:top}.s_ipt{width:405px;height:22px;font:16px/22px arial;margin:5px 0 0 7px;background:#fff;outline:0;-webkit-appearance:none}.s_btn{width:95px;height:32px;padding-top:2px\9;font-size:14px;background:#ddd url(http://s1.bdstatic.com/r/www/img/i-1.0.0.png);cursor:pointer}.s_btn_h{background-position:-100px 0}.s_btn_wr{width:97px;height:34px;display:inline-block;background:url(http://s1.bdstatic.com/r/www/img/i-1.0.0.png) no-repeat -202px 0;*position:relative;z-index:0;vertical-align:top}#lg img{vertical-align:top;margin-bottom:3px}#lk{margin:33px 0}#lk span{font:14px "宋体"}#lm{height:60px}#lh{margin:16px 0 5px;word-spacing:3px}.tools{position:absolute;top:-4px;*top:10px;right:7px}#mHolder{width:62px;position:relative;z-index:296;display:none}#mCon{height:18px;line-height:18px;position:absolute;cursor:pointer;padding:0 18px 0 0;background:url(http://s1.bdstatic.com/r/www/img/bg-1.0.0.gif) no-repeat right -134px;background-position:right -136px\9}#mCon span{color:#00c;cursor:default;display:block}#mCon .hw{text-decoration:underline;cursor:pointer}#mMenu a{width:100%;height:100%;display:block;line-height:22px;text-indent:6px;text-decoration:none;filter:none\9}#mMenu,#user ul{box-shadow:1px 1px 2px #ccc;-moz-box-shadow:1px 1px 2px #ccc;-webkit-box-shadow:1px 1px 2px #ccc;filter:progid:DXImageTransform.Microsoft.Shadow(Strength=2,Direction=135,Color="#cccccc")\9}#mMenu{width:56px;border:1px solid #9b9b9b;list-style:none;position:absolute;right:27px;top:28px;display:none;background:#fff}#mMenu a:hover{background:#ebebeb}#mMenu .ln{height:1px;background:#ebebeb;overflow:hidden;font-size:1px;line-height:1px;margin-top:-1px}#cp,#cp a{color:#666}#seth{display:none;behavior:url(#default#homepage)}#setf{display:none}#sekj{margin-left:14px}#shouji{margin-right:14px}</style> <script>function h(obj){obj.style.behavior='url(#default#homepage)';var a = obj.setHomePage('http://www.baidu.com/');}</script></head><body> <div id="wrapper"><div id="content"> <div id="u"><a href="http://www.baidu.com/gaoji/preferences.html" name="tj_setting">搜索设置</a>|<a href="https://passport.baidu.com/v2/?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F" name="tj_login" id="lb" onclick="return false;">登录</a><a href="https://passport.baidu.com/v2/?reg®Type=1&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2F" target="_blank" name="tj_reg" class="reg">注册</a></div> <div id="m"> <p id="lg"><img src="http://www.baidu.com/img/0308_c3d9a429ad71647eb5f59623e7ec60a0.gif" width="270" height="129" usemap="#mp" ><map name="mp"><area shape="rect" coords="7,3,264,126" href="http://music.baidu.com/huodong/sendsong2013" target="_blank" title="今天把爱唱给她听" onmousedown="return ns_c({'fm':'behs','tab':'bdlogo'})" ></map></p> <p id="nv"><a href="http://news.baidu.com">新 闻</a> <b>网 页</b> <a href="http://tieba.baidu.com">贴 吧</a> <a href="http://zhidao.baidu.com">知 道</a> <a href="http://music.baidu.com">音 乐</a> <a href="http://image.baidu.com">图 片</a> <a href="http://video.baidu.com">视 频</a> <a href="http://map.baidu.com">地 图</a></p><div id="fm"><form name="f" action="/s"><span class="s_ipt_wr"><input type="text" name="wd" id="kw" maxlength="100" class="s_ipt"></span><input type="hidden" name="rsv_bp" value="0"><input type=hidden name=ch value=""><input type=hidden name=tn value="baidu"><input type=hidden name=bar value=""><input type="hidden" name="rsv_spt" value="3"><input type="hidden" name="ie" value="utf-8"><span class="s_btn_wr"><input type="submit" value="百度一下" id="su" class="s_btn" onmousedown="this.className='s_btn s_btn_h'" onmouseout="this.className='s_btn'"></span></form><span class="tools"><span id="mHolder"><div id="mCon"><span>输入法</span></div></span></span><ul id="mMenu"><li><a href="#" name="ime_hw">手写</a></li><li><a href="#" name="ime_py">拼音</a></li><li class="ln"></li><li><a href="#" name="ime_cl">关闭</a></li></ul></div> <p id="lk"><a href="http://baike.baidu.com">百科</a> <a href="http://wenku.baidu.com">文库</a> <a href="http://www.hao123.com">hao123</a><span> | <a href="http://www.baidu.com/more/">更多>></a></span></p><p id="lm"></p> </div> </div> <div id="ftCon"><div id="ftConw"> <p ><a id="seth" onClick="h(this)" href="/" onmousedown="return ns_c({'fm':'behs','tab':'homepage','pos':0})">把百度设为主页</a><a id="setf" href="http://www.baidu.com/cache/sethelp/index.html" onmousedown="return ns_c({'fm':'behs','tab':'favorites','pos':0})" target="_blank">把百度设为主页</a><span id="sekj"><a href="http://liulanqi.baidu.com/ps.php" target="_blank" onmousedown="return ns_c({'fm':'behs','tab':'bdbrwlk','pos':1})">安装百度浏览器</a></span></p><p id="lh"><a href="http://e.baidu.com/?refer=888" onmousedown="return ns_c({'fm':'behs','tab':'btlink','pos':2})">加入百度推广</a> | <a href="http://top.baidu.com">搜索风云榜</a> | <a href="http://home.baidu.com">关于百度</a> | <a href="http://ir.baidu.com">About Baidu</a></p><p id="cp">©2013 Baidu <a href="/duty/">使用百度前必读</a> <a href="http://www.miibeian.gov.cn" target="_blank">京ICP证030173号</a> <img src="http://www.baidu.com/cache/global/img/gs.gif"></p></div></div> </div> </body><script>var bds={se:{},comm : {ishome : 1,sid : "1443_1944_1788",user : "",username : "",sugHost : "http://suggestion.baidu.com/su",personalData : "",loginAction : []}}</script><script type="text/javascript" src="http://s1.bdstatic.com/r/www/cache/global/js/home-2.8.js" charset="gbk"></script><script>var bdUser = null;var w=window,d=document,n=navigator,k=d.f.wd,a=d.getElementById("nv").getElementsByTagName("a"),isIE=n.userAgent.indexOf("MSIE")!=-1&&!window.opera;(function(){if(/q=([^&]+)/.test(location.search)){k.value=decodeURIComponent(RegExp["\x241"])}})();if(n.cookieEnabled){bds.se.sug();};function addEV(o, e, f){if(w.attachEvent){o.attachEvent("on" + e, f);}else if(w.addEventListener){ o.addEventListener(e, f, false);}}function G(id){return d.getElementById(id);}function ns_c(q){var p = encodeURIComponent(window.document.location.href), sQ = '', sV = '', mu='', img = window["BD_PS_C" + (new Date()).getTime()] = new Image();for (v in q) {sV = q[v];sQ += v + "=" + sV + "&";} mu= "&mu=" + p ;img.src = "http://nsclick.baidu.com/v.gif?pid=201&pj=www&rsv_sid=1443_1944_1788&" + sQ + "path="+p+"&t="+new Date().getTime();return true;}if(/\bbdime=[12]/.test(d.cookie)){document.write('<script src=http://s1.bdstatic.com/r/www/cache/ime/js/openime-1.0.1.js charset="gbk"><\/script>');}(function(){var u = G("u").getElementsByTagName("a"), nv = G("nv").getElementsByTagName("a"), lk = G("lk").getElementsByTagName("a"), un = "";var tj_nv = ["news","tieba","zhidao","mp3","img","video","map"];var tj_lk = ["baike","wenku","hao123","more"];un = bds.comm.user == "" ? "" : bds.comm.user;function _addTJ(obj){addEV(obj, "mousedown", function(e){var e = e || window.event;var target = e.target || e.srcElement;ns_c({'fm':'behs','tab':target.name||'tj_user','un':encodeURIComponent(un)});});}for(var i = 0; i < u.length; i++){_addTJ(u[i]);}for(var i = 0; i < nv.length; i++){nv[i].name = 'tj_' + tj_nv[i];}for(var i = 0; i < lk.length; i++){lk[i].name = 'tj_' + tj_lk[i];}})();(function() {var links = {'tj_news': ['word', 'http://news.baidu.com/ns?tn=news&cl=2&rn=20&ct=1&ie=utf-8'],'tj_tieba': ['kw', 'http://tieba.baidu.com/f?ie=utf-8'],'tj_zhidao': ['word', 'http://zhidao.baidu.com/search?pn=0&rn=10&lm=0'],'tj_mp3': ['key', 'http://music.baidu.com/search?fr=ps&ie=utf-8'],'tj_img': ['word', 'http://image.baidu.com/i?ct=201326592&cl=2&nc=1&lm=-1&st=-1&tn=baiduimage&istype=2&fm=&pv=&z=0&ie=utf-8'],'tj_video': ['word', 'http://video.baidu.com/v?ct=301989888&s=25&ie=utf-8'],'tj_map': ['wd', 'http://map.baidu.com/?newmap=1&ie=utf-8&s=s'],'tj_baike': ['word', 'http://baike.baidu.com/search/word?pic=1&sug=1&enc=utf8'],'tj_wenku': ['word', 'http://wenku.baidu.com/search?ie=utf-8']};var domArr = [G('nv'), G('lk')],kw = G('kw');for (var i = 0, l = domArr.length; i < l; i++) {domArr[i].onmousedown = function(e) {e = e || window.event;var target = e.target || e.srcElement,name = target.getAttribute('name'),items = links[name],reg = new RegExp('^\\s+|\\s+\x24'),key = kw.value.replace(reg, '');if (items) {if (key.length > 0) {var wd = items[0], url = items[1],url = url + ( name === 'tj_map' ? encodeURIComponent('&' + wd + '=' + key) : ( ( url.indexOf('?') > 0 ? '&' : '?' ) + wd + '=' + encodeURIComponent(key) ) );target.href = url;} else {target.href = target.href.match(new RegExp('^http:\/\/.+\.baidu\.com'))[0];}}name && ns_c({'fm': 'behs','tab': name,'query': encodeURIComponent(key),'un': encodeURIComponent(bds.comm.user || '') });};}})();addEV(w,"load",function(){k.focus()});w.onunload=function(){};</script><script type="text/javascript" src="http://s1.bdstatic.com/r/www/cache/global/js/tangram-1.3.4c1.0.js"></script><script type="text/javascript" src="http://s1.bdstatic.com/r/www/cache/user/js/u-1.3.4.js" charset="gbk"></script><script>try{document.cookie="WWW_ST=;expires=Sat, 01 Jan 2000 00:00:00 GMT";baidu.on(document.forms[0],"submit",function(){var _t=new Date().getTime();document.cookie = "WWW_ST=" + _t +";expires=" + new Date(_t + 10000).toGMTString()})}catch(e){}</script></html><!--c478e250a09cb729-->
OK,搞定啦!!!