java分别通过httpclient和HttpURLConnection获取图片验证码内容

时间:2020-12-28 02:36:43

前面的文章,介绍了如何通过selenium+Tesseract-OCR来识别图片验证码,如果用接口来访问的话,再用selenium就闲的笨重,下面就介绍一下分别通过httpclient和HttpURLConnection,用流的方式获取图片验证码内容。

1.通过HttpURLConnection

package com.imgyzm;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL; import org.openqa.selenium.io.FileHandler; /**
* @author QiaoJiafei
* @version 创建时间:2015年11月9日 上午11:31:14
* 类说明
*/
public class GetYZMByURL {
public static void main(String[] args) throws Exception {
getYzm();
} public static String getYzm() {
//new一个URL对象
URL url;
String s="";
try {
url = new URL("http://172.16.30.226:8099/bms/checkcode.do?0.9858807739801705");
HttpURLConnection conn = (HttpURLConnection)url.openConnection();
//设置请求方式为"GET"
conn.setRequestMethod("GET");
//超时响应时间为5秒
conn.setConnectTimeout(5 * 1000);
//通过输入流获取图片数据
InputStream inStream = conn.getInputStream();
//得到图片的二进制数据,以二进制封装得到数据,具有通用性
byte[] data = readInputStream(inStream);
//new一个文件对象用来保存图片,默认保存当前工程根目录
File imageFile = new File("D:/BeautyGirl.jpg");
//创建输出流
FileOutputStream outStream = new FileOutputStream(imageFile);
//写入数据
outStream.write(data);
//关闭输出流
outStream.close(); Runtime rt = Runtime.getRuntime();
rt.exec("cmd.exe /C tesseract.exe D:\\BeautyGirl.jpg D:\\ddd\\yzm -1 ");
Thread.sleep(1000);
File file = new File("D:\\ddd\\yzm.txt");
if(file.exists()) {
FileHandler fh = new FileHandler();
s = fh.readAsString(file).trim();
System.out.println("========="+s);
} else {
System.out.print("yzm.txt不存在");
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return s;
//打开链接 }
public static byte[] readInputStream(InputStream inStream) throws Exception{
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
//创建一个Buffer字符串
byte[] buffer = new byte[1024];
//每次读取的字符串长度,如果为-1,代表全部读取完毕
int len = 0;
//使用一个输入流从buffer里把数据读取出来
while( (len=inStream.read(buffer)) != -1 ){
//用输出流往buffer里写入数据,中间参数代表从哪个位置开始读,len代表读取的长度
outStream.write(buffer, 0, len);
}
//关闭输入流
inStream.close();
//把outStream里的数据写入内存
return outStream.toByteArray();
}
}

2.通过HttpClient

package com.imgyzm;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream; import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.openqa.selenium.io.FileHandler; /**
* @author QiaoJiafei
* @version 创建时间:2015年11月9日 上午10:53:11
* 类说明
*/
public class GetYZMByHttpClient {
public static void main(String args[]) throws Exception { String s="";
HttpClient httpclient = new DefaultHttpClient(new PoolingClientConnectionManager());
String imgurl = "http://172.16.30.226:8099/bms/checkcode.do?0.9858807739801705";
HttpGet ht = new HttpGet(imgurl);
HttpResponse response = null;
response = httpclient.execute(ht);
HttpEntity entity = response.getEntity();
InputStream inStream = entity.getContent();
byte[] data = readInputStream(inStream);
//new一个文件对象用来保存图片,默认保存当前工程根目录
File imageFile = new File("D:/yzm.jpg");
//创建输出流
FileOutputStream outStream = new FileOutputStream(imageFile);
//写入数据
outStream.write(data);
//关闭输出流
outStream.close(); Runtime rt = Runtime.getRuntime();
rt.exec("cmd.exe /C tesseract.exe D:\\yzm.jpg D:\\ddd\\yzm -1 ");
Thread.sleep(1000);
File file = new File("D:\\ddd\\yzm.txt");
if(file.exists()) {
FileHandler fh = new FileHandler();
s = fh.readAsString(file).trim();
System.out.println("========="+s);
} else {
System.out.print("yzm.txt不存在");
} /*===========下面是登录接口==========*/
String url = "http://172.16.30.226:8099/bms/staff/login.do?account=admin123&checkcode="+s+"&pwd=aaaaaa1";
System.out.println("url=========="+url);
HttpPost httppost = new HttpPost(url);
response = httpclient.execute(httppost);
entity = response.getEntity();
s = EntityUtils.toString(entity, "UTF-8");
System.out.println(s); //打开链接
}
public static byte[] readInputStream(InputStream inStream) throws Exception{
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
//创建一个Buffer字符串
byte[] buffer = new byte[1024];
//每次读取的字符串长度,如果为-1,代表全部读取完毕
int len = 0;
//使用一个输入流从buffer里把数据读取出来
while( (len=inStream.read(buffer)) != -1 ){
//用输出流往buffer里写入数据,中间参数代表从哪个位置开始读,len代表读取的长度
outStream.write(buffer, 0, len);
}
//关闭输入流
inStream.close();
//把outStream里的数据写入内存
return outStream.toByteArray();
}
}

那么这两种方式有什么区别呢,通过测试,使用HttpURLConnection获取验证码,再被其它接口调用的时候,该验证码已经失效了。而httpclient,只要保证程序接口调用和获取验证码用的是同一个httpclient,获取到的验证码,再被其它接口调用,该验证码仍生效。

保存图片也可以用下面的方法,更简便一些

response = client.execute(httpget);
entity = response.getEntity();
InputStream in = entity.getContent();
BufferedImage input = ImageIO.read(in);
ImageIO.write(input, "jpg", new File("D:/11.jpg"));

HTPPCLIENT API:http://hc.apache.org/httpcomponents-client-ga/httpclient/apidocs/