做了一个测试的一个小工具,需求如下:
1、有一批URL列表,需要知道哪个URL请求响应内容中包含http:关键字的。
2、url请求包括http和https 2种协议
3、要部署在linux服务器上,且linux服务器只能通过代理来连接外网
帖一下我的核心代码吧:
package com.cn.util; import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import java.net.URLConnection;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map; import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.security.cert.CertificateException;
import javax.security.cert.X509Certificate;
import javax.xml.ws.Response; import net.sf.json.JSONObject; public class HttpGet {
// SSL
private static class TrustAnyTrustManager implements X509TrustManager { public void checkClientTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
} public void checkServerTrusted(X509Certificate[] chain, String authType)
throws CertificateException {
} public void checkClientTrusted(
java.security.cert.X509Certificate[] arg0, String arg1)
throws java.security.cert.CertificateException {
// TODO Auto-generated method stub } public void checkServerTrusted(
java.security.cert.X509Certificate[] arg0, String arg1)
throws java.security.cert.CertificateException {
// TODO Auto-generated method stub } public java.security.cert.X509Certificate[] getAcceptedIssuers() {
// TODO Auto-generated method stub
return null;
}
} private static class TrustAnyHostnameVerifier implements HostnameVerifier {
public boolean verify(String hostname, SSLSession session) {
return true;
}
} // 得到URL
public List getUrl(String file) {
BufferedReader bf = null;
List urlList = new ArrayList();
int linenum = 0;
try {
bf = new BufferedReader(new FileReader(file));
String info = "";
while ((info = bf.readLine()) != null) {
linenum++;
if (!"".equals(info)) {
/*
* if ((info.startsWith("http:") ||
* info.startsWith("https:"))) {
* System.out.println("url===>" + info); urlList.add(info);
* }else{ System.out.println("第"+linenum+"行,请求协议或有问题。。:"); }
*/
System.out.println("url===>" + info);
urlList.add(info);
} else {
System.out.println("第" + linenum + "行url为空");
} }
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
bf.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return urlList;
} // 获取请求非200状态的url
public List<String> http404(List<String> urls) {
List<String> result404 = new ArrayList<String>();
try { /*
* System.setProperty("proxySet", "true");
* System.setProperty("http.proxyHost", "192.168.11.254");
* System.setProperty("http.proxyPort", "8080");
*/
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(
"192.168.11.254", 8080)); for (String url2 : urls) {
if (url2.startsWith("http:") || (url2.startsWith("https:"))) {
URL realUrl = new URL(new String(url2.getBytes("utf-8")));
HttpURLConnection connection = (HttpURLConnection) realUrl
.openConnection(proxy);
// 如果是https
if (connection instanceof HttpsURLConnection) {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null,
new TrustManager[] { new TrustAnyTrustManager() },
new java.security.SecureRandom());
((HttpsURLConnection) connection)
.setSSLSocketFactory(sc.getSocketFactory());
((HttpsURLConnection) connection)
.setHostnameVerifier(new TrustAnyHostnameVerifier());
}
// 设置通用的请求属性
connection.setRequestProperty("accept", "*/*");
connection.setRequestProperty("connection", "Keep-Alive");
connection
.setRequestProperty("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
// 连接
connection.connect(); if (connection.getResponseCode() != 200) {
result404.add(url2);
}
} else {
result404.add(url2);
}
} } catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (KeyManagementException e) {
e.printStackTrace();
}
return result404;
} // 请求获取响应,把包含搜索关键字的URL存入LIST中
public List<String> sendUrl(List<String> urls, String keyword) {
List<String> result200 = new ArrayList<String>();
BufferedReader in = null;
try { /*
* System.setProperty("proxySet", "true");
* System.setProperty("http.proxyHost", "192.168.11.254");
* System.setProperty("http.proxyPort", "8080");
*/
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(
"192.168.11.254", 8080));
for (String url2 : urls) {
StringBuffer response = new StringBuffer();
if (url2.startsWith("http:") || (url2.startsWith("https:"))) {
URL realUrl = new URL(new String(url2.getBytes("utf-8")));
HttpURLConnection connection = (HttpURLConnection) realUrl
.openConnection(proxy);
// 如果是https
if (connection instanceof HttpsURLConnection) {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null,
new TrustManager[] { new TrustAnyTrustManager() },
new java.security.SecureRandom());
((HttpsURLConnection) connection)
.setSSLSocketFactory(sc.getSocketFactory());
((HttpsURLConnection) connection)
.setHostnameVerifier(new TrustAnyHostnameVerifier());
}
// 设置通用的请求属性
connection.setRequestProperty("accept", "*/*");
connection.setRequestProperty("connection", "Keep-Alive");
connection
.setRequestProperty("user-agent",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
// 连接
connection.connect(); if (connection.getResponseCode() == 200) {
in = new BufferedReader(new InputStreamReader(
connection.getInputStream()));
String line = "";
while ((line = in.readLine()) != null) {
response.append(line);
} in.close();
}
if (response.toString().contains(keyword)) {
result200.add(url2);
}
}
System.out.println(url2 + "=============>"
+ response.toString());
} } catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (KeyManagementException e) {
e.printStackTrace();
} return result200;
} public static void main(String[] args) {
HttpGet ht = new HttpGet();
List urls = ht.getUrl("E:\\课件\\https\\1.txt");
String keyword = "http:";
List<String> urllist = ht.http404(urls);
for (String string : urllist) {
System.out.println("404=" + string);
} } }
TrustAnyTrustManager 类中的方法,是我在网上找的,目地是解决SSL加密请求,针对https://协议的请求来处理的。具体是什么我也不懂
/*
* System.setProperty("proxySet", "true");
* System.setProperty("http.proxyHost", "192.168.11.254");
* System.setProperty("http.proxyPort", "8080");
*/
这种设置代理的方式,不行的
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(
"192.168.11.254", 8080));
HttpURLConnection connection = (HttpURLConnection) realUrl
.openConnection(proxy);