亲爱的开发者朋友们,知道百度网址翻译么?他们为何能够翻译源网页呢,iframe可是不能跨域操作的哦,那么可以用代理实现。直接上代码:
本Demo基于MVC写的,灰常简单,copy过去,简单改改就可以用的哦。
//Action层
/**
* 网址翻译代理服务器接口层
* @Description: 此接口层可完成对所请求网址的代理,实现同域访问
* @author zhanglongping
* @CreateDate: 2016-8-23 上午10:52:49
*/ @At("/proxy")
public class ProxyModule { /**
* 获取网页
* @return
* @author zhanglongping
* @date 2016-8-23 上午10:54:13
*/
@At("/gethtml")
@Ok("Raw")
@Authority("")
public Object gethtml(@Param("yeekit_proxy_url") String url,HttpServletRequest request, HttpServletResponse response){
try { String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; String html = new ProxyUtils().getUrlMap(url,basePath);
// return html;
InputStream is = new StringInputStream(html);
BufferedReader in = new BufferedReader(new InputStreamReader(is,"UTF-8")); String line;
PrintWriter out = response.getWriter();
while ((line = in.readLine()) != null) {
out.println(line);
}
out.flush();
in.close();
} catch (Exception e) {
e.printStackTrace();
}
return null;
} /**
* 使用GET提交到目标服务器。
*
* @param request
* @param response
* @param targetUrl
* @throws IOException
*/
@At("/forward")
@Ok("Raw")
@Authority("")
public Object urlRedirect(@Param("yeekit_proxy_url") String targetUrl,HttpServletRequest request, HttpServletResponse response) throws IOException { if(targetUrl.endsWith(".htm") || targetUrl.endsWith(".html") || targetUrl.endsWith(".shtml")){
try { String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; String html = new ProxyUtils().getUrlMap(targetUrl,basePath);
// return html;
InputStream is = new StringInputStream(html);
BufferedReader in = new BufferedReader(new InputStreamReader(is,"UTF-8")); String line;
PrintWriter out = response.getWriter();
while ((line = in.readLine()) != null) {
out.println(line);
}
out.flush();
in.close();
} catch (Exception e) {
e.printStackTrace();
// return null;
} }else if(targetUrl.endsWith(".css") || targetUrl.endsWith(".js") || targetUrl.endsWith(".jpg")||
targetUrl.endsWith(".png") || targetUrl.endsWith(".svg") || targetUrl.endsWith(".gif")){
String fileName = targetUrl.split("/")[targetUrl.split("/").length-1];
// response.setHeader("Content-Disposition", "attachment; filename="
// + java.net.URLEncoder.encode(fileName, "UTF-8"));
//图片的名称
String imgName = fileName;
//名称转码,避免中文乱码
imgName = new String(imgName.getBytes("iso8859-1"),"UTF-8");
//图片的资源地址,http://10.80.3.229:8081/mediaserver/574fe515e30ab97c9068d2e1
//这是媒体服务器返回的地址,因为是网络地址,所以需要使用HttpURLConnection去获取图片
String imgUrl = targetUrl;
//输入流,用来读取图片
InputStream ins = null;
HttpURLConnection httpURL = null;
try{
URL url = new URL(imgUrl);
//打开一个网络连接
httpURL = (HttpURLConnection)url.openConnection();
//设置网络连接超时时间
httpURL.setConnectTimeout(3000);
//设置应用程序要从网络连接读取数据
httpURL.setDoInput(true);
//设置请求方式
httpURL.setRequestMethod("GET");
//获取请求返回码
int responseCode = httpURL.getResponseCode();
if(responseCode == 200){
//如果响应为“200”,表示成功响应,则返回一个输入流
ins = httpURL.getInputStream();
//设置response响应头
//encodeChineseDownloadFileName()用来解决文件名为中文的问题,方法体在下面
if(fileName.indexOf(".css")>-1){
response.setContentType("text/css");
}
response.setHeader("content-disposition", "attachment;filename="+ ProxyUtils.encodeChineseDownloadFileName(request,imgName));
//输出流到response中
byte[] data = new byte[1024];
int len = 0;
//输出流
OutputStream out = response.getOutputStream();
while((len = ins.read(data)) > 0){
out.write(data, 0, len);
}
out.flush();
ins.close(); }
}catch(Exception e){
System.out.println("下载附件图片出错!"+targetUrl);
e.printStackTrace();
}
}
return null; }
工具类
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import javax.servlet.http.HttpServletRequest; import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements; /**
* 译库网址翻译代理服务工具类
*
* @Description:包含:提取HTML中网址,并转换为代理的网址服务地址;
* @author zhanglongping
* @CreateDate: 2016-8-23 上午10:15:08
* @UpdateUser: zhanglongping
* @UpdateDate: 2016-8-23 上午10:15:08
* @UpdateRemark: 说明本次修改内容
*/
public class ProxyUtils {
// public static void main(String[] args) throws IOException {
//// ProxyUtils pu = new ProxyUtils();
//// pu.getUrlMap("http://english.cas.cn");
// Connection conn = Jsoup.connect("http://www.bbc.com");
// Document doc_one = conn.get();
// System.out.println(doc_one);
// } /**
* 获取url哈希:key:源url value:代理url
* @param url
* @author zhanglongping
* @date 2016-8-23 上午10:42:41
*/
public String getUrlMap(String url,String basePath){
// String url_protocol = "",url_host = "";
try {
//特殊网址转换
url = transformation(url);
URL urlcurr = new URL(url);
// url_protocol = urlcurr.getProtocol();
// url_host = urlcurr.getHost();
String hostname = urlcurr.getProtocol()+"://"+urlcurr.getHost(); // String proxyHost = basePath; // String proxyHostName = proxyHost+"proxy/forward?yeekit_proxy_url="; Document doc_one; Connection conn = Jsoup.connect(hostname);
doc_one = conn.get();
doc_one.setBaseUri(hostname); // Elements links = doc_one.select("a[href]");
// Elements media = doc_one.select("[src]");
// Elements imports = doc_one.select("link[href]"); Elements head = doc_one.select("meta");
head.get(0).before("<base href=\""+hostname+"/"+"\" />");
//鼠标悬停翻译js脚本注入
//悬停脚本引用
String hover_js = "<script src=\""+basePath+"/yeekit_translate_url/js/yeekit_hover_trans.js\" type=\"text/javascript\"></script>";
String jquery_js = "<script src=\"http://cdn.bootcss.com/jquery/3.1.0/jquery.min.js\" type=\"text/javascript\"></script>";
head.get(0).after(jquery_js + hover_js);
// for (Element src : media) {
// String key = src.attr("abs:src");
// src.attr("src", proxyHostName+key);
// }
//
// for (Element link : imports) {
// String key = link.attr("abs:href");
// link.attr("href", proxyHostName+key);
// }
//
// for (Element link : links) {
// String key = link.attr("abs:href");
// link.attr("href", proxyHostName+key);
// } String dochtml = doc_one.html().toString(); //增强型处理 - 处理js脚本里静态资源地址引用
// List<String> list_src_img = getImgSrc(dochtml);
// for(String src:list_src_img){
// if(src.indexOf("./") > -1){
// dochtml = dochtml.replaceAll(src, proxyHostName+hostname+src.substring(1));
// }
// } // System.out.println(dochtml);
return dochtml;
} catch (IOException e) {
e.printStackTrace();
return null;
} } /**
* 内容获取
* @return
* @author zhanglongping
* @throws IOException
* @date 2016-8-30 下午5:44:31
*/
public String get_https_html(String url) throws IOException{ URL urlcurr = new URL(url);
String hostname = urlcurr.getProtocol()+"://"+urlcurr.getHost(); Document doc_one; Connection conn = Jsoup.connect(hostname);
doc_one = conn.post();
doc_one.setBaseUri(hostname); Elements head = doc_one.select("meta"); head.get(0).before("<base href=\""+hostname+"/"+"\" />"); String dochtml = doc_one.html().toString(); return dochtml;
} /*
* 解决文件为中文名的乱码问题
*/
public static String encodeChineseDownloadFileName(HttpServletRequest request, String pFileName) throws UnsupportedEncodingException{
String filename = null;
//获取请求头中的浏览器标识
String agent = request.getHeader("USER-AGENT");
if(agent != null){
if(agent.indexOf("Firefox") != -1){
//Firefox
filename = "=?UTF-8?B?" +
(new String(Base64.encodeBase64(pFileName.getBytes("UTF-8")))) + "?=";
}else if(agent.indexOf("Chrome") != -1){
//Chrome
filename = new String(pFileName.getBytes(), "ISO8859-1");
}else{
//IE7+
filename = URLEncoder.encode(pFileName, "UTF-8");
//替换空格
filename = StringUtils.replace(filename, "+", "%20");
}
}else{
filename = pFileName;
}
return filename;
} /**
* 获取img标签中的src值
* @param content
* @return
*/
public List<String> getImgSrc(String content){ List<String> list = new ArrayList<String>();
//目前img标签标示有3种表达式
//<img alt="" src="1.jpg"/> <img alt="" src="1.jpg"></img> <img alt="" src="1.jpg">
//开始匹配content中的<img />标签
Pattern p_img = Pattern.compile("<(img|IMG)(.*?)(/>|></img>|>)");
Matcher m_img = p_img.matcher(content);
boolean result_img = m_img.find();
if (result_img) {
while (result_img) {
//获取到匹配的<img />标签中的内容
String str_img = m_img.group(2); //开始匹配<img />标签中的src
Pattern p_src = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
Matcher m_src = p_src.matcher(str_img);
if (m_src.find()) {
String str_src = m_src.group(3);
list.add(str_src);
}
//结束匹配<img />标签中的src //匹配content中是否存在下一个<img />标签,有则继续以上步骤匹配<img />标签中的src
result_img = m_img.find();
}
}
return list;
} /**
* 特殊网址转换
* @param url
* @return
* @author zhanglongping
* @date 2016-8-30 下午6:18:48
*/
public String transformation(String url){
//百度的二级域名www.baidu.com重定向存在问题
if(url.equals("http://www.baidu.com")){
url = "http://baidu.com";
} return url;
}
}