Java实现爬取往期所有双色球开奖结果功能示例

本文实例讲述了java实现爬取往期所有双色球开奖结果功能。分享给大家供大家参考，具体如下：

梦想还是要有的，万一实现了呢？我相信经常买双色球的朋友和我都会有一个疑问，就是往期双色球的开奖结果是什么？我钟意的这一注双色球在往期是否开过一等奖，如果开过的话，基本上可以放弃这一注了，因为历史上应该没有出现过两期双色球开奖完全一致的吧？那么往期的开奖结果是什么呢？我自己用java写了一个简易的类，爬取所有双色球开奖结果，本来想开发安卓版本的，由于ui等需要时间准备，有缘再开发吧。

				?

									import java.io.bufferedreader;

									import java.io.bufferedwriter;

									import java.io.file;

									import java.io.filewriter;

									import java.io.ioexception;

									import java.io.inputstream;

									import java.io.inputstreamreader;

									import java.net.httpurlconnection;

									import java.net.url;

									import java.util.regex.matcher;

									import java.util.regex.pattern;

									import java.util.zip.gzipinputstream;

									public class allballs {

									 private static stringbuffer mstringbuffer;

									 public static void main(string[] args) {

									  system.out.println("正在获取...");

									  mstringbuffer = new stringbuffer();

									  string baseurlprefix = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_";

									  string baseurlsuffix = ".html";

									  string homeurl = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html";

									  string pagecountcontent = gethtmlstring(homeurl);

									  int pagecount = getpagecount(pagecountcontent);

									  if (pagecount > 0) {

									   for (int i = 1; i <= pagecount; i++) {

									    string url = baseurlprefix + i + baseurlsuffix;

									    string pagecontent = gethtmlstring(url);

									    if (pagecontent != null && !pagecontent.equals("")) {

									     getonetermcontent(pagecontent);

									    } else {

									     system.out.println("第" + i + "页丢失");

									    }

									    try {

									     thread.sleep(1200);

									    } catch (exception e) {

									     // todo: handle exception

									    }

									   }

									   file file = new file("双色球.txt");

									   if (file.exists()) {

									    file.delete();

									   }

									   try {

									    filewriter writer = new filewriter(file);

									    bufferedwriter bufferedwriter = new bufferedwriter(writer);

									    bufferedwriter.write(mstringbuffer.tostring());

									    bufferedwriter.close();

									    writer.close();

									   } catch (ioexception e) {

									    // todo auto-generated catch block

									    e.printstacktrace();

									   }

									   //bufferedwriter writer = new bufferedwriter(new outputs)

									  } else {

									   system.out.println("结果页数为0");

									  }

									  system.out.println("完成！");

									 }

									 /**

									  * 获取总页数

									  * @param result

									  */

									 private static int getpagecount(string result) {

									  string regex = "\\d+\">末页";

									  pattern pattern = pattern.compile(regex);

									  matcher matcher = pattern.matcher(result);

									  string[] splits = null;

									  while (matcher.find()) {

									   string content = matcher.group();

									   splits = content.split("\"");

									   break;

									  }

									  if (splits != null && splits.length == 2) {

									   string countstring = splits[0];

									   if (countstring != null && !countstring.equals("")) {

									    return integer.parseint(countstring);

									   }

									  }

									  return 0;

									 }

									  /**

									  * 获取网页源码

									  * @return

									  */

									 private static string gethtmlstring(string targeturl) {

									  string content = null;

									  httpurlconnection connection = null;

									  try {

									   url url = new url(targeturl);

									   connection = (httpurlconnection) url.openconnection();

									   connection.setrequestmethod("post");

									   connection.setrequestproperty("user-agent", "mozilla/4.0 (compatible; msie 7.0; windows 7)");

									   connection.setrequestproperty("accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-powerpoint, application/vnd.ms-excel, application/msword, */*");

									   connection.setrequestproperty("accept-language", "zh-cn");

									   connection.setrequestproperty("ua-cpu", "x86");

									   //为什么没有deflate呢

									   connection.setrequestproperty("accept-encoding", "gzip");

									   connection.setrequestproperty("content-type", "text/html");

									   //keep-alive，有什么用呢，你不是在访问网站，你是在采集。嘿嘿。减轻别人的压力，也是减轻自己。

									   connection.setrequestproperty("connection", "close");

									   //不要用cache，用了也没有什么用，因为我们不会经常对一个链接频繁访问。（针对程序）

									   connection.setusecaches(false);

									   connection.setconnecttimeout(6 * 1000);

									   connection.setreadtimeout(6 * 1000);

									   connection.setdooutput(true);

									   connection.setdoinput(true);

									   connection.setrequestproperty("charset", "utf-8");

									   connection.connect();

									   if (200 == connection.getresponsecode()) {

									    inputstream inputstream = null;

									    if (connection.getcontentencoding() != null && !connection.getcontentencoding().equals("")) {

									     string encode = connection.getcontentencoding().tolowercase();

									     if (encode != null && !encode.equals("") && encode.indexof("gzip") >= 0) {

									      inputstream = new gzipinputstream(connection.getinputstream());

									     }

									    }

									    if (null == inputstream) {

									     inputstream = connection.getinputstream();

									    }

									    bufferedreader reader = new bufferedreader(new inputstreamreader(inputstream, "utf-8"));

									    stringbuilder builder = new stringbuilder();

									    string line = null;

									    while ((line = reader.readline()) != null) {

									     builder.append(line).append("\n");

									    }

									    content = builder.tostring();

									   }

									  } catch (exception e) {

									   e.printstacktrace();

									  } finally {

									   if (connection != null) {

									    connection.disconnect();

									   }

									  }

									  return content;

									 }

									 private static void getonetermcontent(string pagecontent) {

									  string regex = "<td align=\"center\" style=\"padding-left:10px;\">[\\s\\s]+?</em></td>";

									  pattern pattern = pattern.compile(regex);

									  matcher matcher = pattern.matcher(pagecontent);

									  while (matcher.find()) {

									   string onetermcontent = matcher.group();

									   getonetermnumbers(onetermcontent);

									  }

									 }

									 private static void getonetermnumbers(string onetermcontent) {

									  string regex = ">\\d+<";

									  pattern pattern = pattern.compile(regex);

									  matcher matcher = pattern.matcher(onetermcontent);

									  while (matcher.find()) {

									   string content = matcher.group();

									   string ballnumber = content.substring(1, content.length()-1);

									   mstringbuffer.append(ballnumber).append(" ");

									  }

									  mstringbuffer.append("\r\n");

									 }

									}

运行结果：

Java实现爬取往期所有双色球开奖结果功能示例

希望本文所述对大家java程序设计有所帮助。

原文链接：https://blog.csdn.net/ithouse/article/details/50908296

秒客网

Java实现爬取往期所有双色球开奖结果功能示例

相关文章