新浪微博模拟登陆+数据抓取(java实现)

时间:2022-09-21 16:09:36

模拟登陆部分实现:

package token.exe;

import java.math.BigInteger;
import java.util.Random; import org.apache.commons.codec.binary.Base64; public class WeiboEncoder { private static BigInteger n = null;
private static BigInteger e = null; /**
* 使用Base64加密用户名(su的获取)
* @param account
* @return
*/
@SuppressWarnings("deprecation")
public static String encodeAccount(String account){
return new String(Base64.encodeBase64(account.getBytes()));
} /**
* 使用RSAEncrypt对用户密码进行加密(sp的获取)
* @param pwd
* @param nStr
* @param eStr
* @return
*/
public static String RSAEncrypt(String pwd, String nStr, String eStr){
n = new BigInteger(nStr,16);
e = new BigInteger(eStr,16); BigInteger r = RSADoPublic(pkcs1pad2(pwd,(n.bitLength()+7)>>3));
String sp = r.toString(16);
if((sp.length()&1) != 0 )
sp = "0" + sp;
return sp;
} private static BigInteger RSADoPublic(BigInteger x){
return x.modPow(e, n);
} private static BigInteger pkcs1pad2(String s, int n){
if(n < s.length() + 11) { // TODO: fix for utf-8
System.err.println("Message too long for RSA");
return null;
}
byte[] ba = new byte[n];
int i = s.length()-1;
while(i >= 0 && n > 0) {
int c = s.codePointAt(i--);
if(c < 128) { // encode using utf-8
ba[--n] = new Byte(String.valueOf(c));
}
else if((c > 127) && (c < 2048)) {
ba[--n] = new Byte(String.valueOf((c & 63) | 128));
ba[--n] = new Byte(String.valueOf((c >> 6) | 192));
}
else {
ba[--n] = new Byte(String.valueOf((c & 63) | 128));
ba[--n] = new Byte(String.valueOf(((c >> 6) & 63) | 128));
ba[--n] = new Byte(String.valueOf((c >> 12) | 224));
}
}
ba[--n] = new Byte("0"); byte[] temp = new byte[1];
Random rdm = new Random(47L); while(n > 2) { // random non-zero pad
temp[0] = new Byte("0");
while(temp[0] == 0)
rdm.nextBytes(temp);
ba[--n] = temp[0];
}
ba[--n] = 2;
ba[--n] = 0; return new BigInteger(ba);
} } 参数实体: package token.def; import java.io.Serializable; public class LoginParams implements Serializable { private static final long serialVersionUID = -5775728968372860382L;
private String pcid;
private String servertime;
private String nonce;
private String rsakv;
private String imgUrl;
private String sp;
private String code;
private boolean isLogin = true; public String getPcid() {
return pcid;
} public void setPcid(String pcid) {
this.pcid = pcid;
} public String getServertime() {
return servertime;
} public void setServertime(String servertime) {
this.servertime = servertime;
} public String getNonce() {
return nonce;
}
public void setNonce(String nonce) {
this.nonce = nonce;
} public String getRsakv() {
return rsakv;
} public void setRsakv(String rsakv) {
this.rsakv = rsakv;
} public String getImgUrl() {
return imgUrl;
} public void setImgUrl(String imgUrl) {
this.imgUrl = imgUrl;
} public String getSp() {
return sp;
} public void setSp(String sp) {
this.sp = sp;
} public String getCode() {
return code;
} public void setCode(String code) {
this.code = code;
} public boolean isLogin() {
return isLogin;
} public void setLogin(boolean isLogin) {
this.isLogin = isLogin;
} @Override
public String toString() {
return "LoginParams [pcid=" + pcid + ", servertime=" + servertime
+ ", nonce=" + nonce + ", rsakv=" + rsakv + ", imgUrl="
+ imgUrl + ", sp=" + sp + ", code=" + code + ", isLogin="
+ isLogin + "]";
} }

登陆部分实现: package token.exe; import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URLEncoder;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;
import java.util.Scanner; import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.HttpVersion;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import org.apache.commons.httpclient.protocol.Protocol;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import token.SinaWeiboOAuth;
import token.def.LoginParams;
import weibo4j.model.MySSLSocketFactory; public class WeiboLoginer { private HttpClient httpClient; //httpClient实例初始化 public WeiboLoginer() { //httpclient连接配置
MultiThreadedHttpConnectionManager httpManager = new MultiThreadedHttpConnectionManager();
HttpConnectionManagerParams connectParams = httpManager.getParams();
connectParams.setConnectionTimeout(3000);
connectParams.setDefaultMaxConnectionsPerHost(100);
connectParams.setSoTimeout(3000);
//httpclient参数配置
HttpClientParams httpParams = new HttpClientParams();
httpParams.setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
httpParams.setVersion(HttpVersion.HTTP_1_1);
//设置默认Header
List<Header> headers = new ArrayList<Header>();
headers.add(new Header("Content-Type", "application/x-www-form-urlencoded"));
headers.add(new Header("Host", "login.sina.com.cn"));
headers.add(new Header("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"));
headers.add(new Header("API-RemoteIP", "192.168.0.1"));//伪造新浪验证IP
headers.add(new Header("X-Forwarded-For","192.168.0.1"));//伪造真实IP
headers.add(new Header("CLIENT-IP", "192.168.0.1"));//伪造客户端IP
//初始化httpclient
httpClient = new HttpClient(httpParams, httpManager);
httpClient.getHostConfiguration().getParams().setParameter("http.default-headers", headers);
//设置ssl协议
Protocol protocol = new Protocol("https",new MySSLSocketFactory(), 443);
Protocol.registerProtocol("https", protocol);
//设置代理
// httpClient.getHostConfiguration().setProxy("", 0);
// httpClient.getParams().setAuthenticationPreemptive(false);
} /**
* 登陆并获取code值,如果出现验证码则返回还有验证码的参数信息
* @return
*/
public LoginParams doLogin(String username, String password) { Properties properties = initProperties();
String base64UserCount = WeiboEncoder.encodeAccount(username);
HashMap<String, String> pubkeyMap = null;
String sp = null;
String imgUrl = null;
LoginParams loginParams = new LoginParams();
try {
pubkeyMap = pubKeyMap(base64UserCount);
sp = WeiboEncoder.RSAEncrypt(password, pubkeyMap.get("pubkey"),"10001");
imgUrl = getPin(pubkeyMap);
if (imgUrl != null) {
loginParams.setPcid(pubkeyMap.get("pcid"));
loginParams.setNonce(pubkeyMap.get("nonce"));
loginParams.setServertime(pubkeyMap.get("servertime"));
loginParams.setRsakv(pubkeyMap.get("rsakv"));
loginParams.setImgUrl(imgUrl);
loginParams.setSp(sp);
return loginParams;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} HashMap<String, String> ticketMap = null;
try {
ticketMap = getTicket(base64UserCount, sp, pubkeyMap);
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} //确认在最终登陆后是否再需要验证码(账号为新浪的注册邮箱)
String vcUrl = isHasPinAgain(pubkeyMap, ticketMap);
if (vcUrl != null) {
loginParams.setPcid(pubkeyMap.get("pcid"));
loginParams.setNonce(pubkeyMap.get("nonce"));
loginParams.setServertime(pubkeyMap.get("servertime"));
loginParams.setRsakv(pubkeyMap.get("rsakv"));
loginParams.setImgUrl(imgUrl);
loginParams.setSp(sp);
return loginParams;
} try {
String code = authorize(ticketMap.get("ticket"), properties.getProperty("authorizeURL"),
properties.getProperty("redirect_URI"), properties.getProperty("client_ID"),
username, ticketMap.get("uid")); loginParams.setCode(code);
} catch (KeyManagementException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (NoSuchAlgorithmException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return loginParams; } /**
* 有验证码时登陆
* @param sp
* @param pin
* @param pcid
* @param servertime
* @param nonce
* @param rsakv
* @return
*/
public LoginParams doLoginByPin(String username, String sp, String pin, String pcid,
String servertime,String nonce,String rsakv ) { Properties properties = initProperties();
String base64UserCount = WeiboEncoder.encodeAccount(username);
HashMap<String, String> ticketMap = null;
LoginParams params = new LoginParams();
try {
ticketMap = getTicket(base64UserCount, sp, pin, pcid,
servertime, nonce, rsakv);
if (ticketMap.containsKey("reason")) {
//意为"输入的验证码不正确"
String reply = "\\u8f93\\u5165\\u7684\\u9a8c\\u8bc1\\u7801\\u4e0d\\u6b63\\u786e";
String reasonStr = ticketMap.get("reason");
if (reasonStr.equals(reply)) {
params.setLogin(false);
return params;
}
}
String code = authorize(ticketMap.get("ticket"), properties.getProperty("authorizeURL"),
properties.getProperty("redirect_URI"), properties.getProperty("client_ID"),
username, ticketMap.get("uid"));
params.setCode(code);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} return params;
} /**
* 模拟新浪授权
* @param ticket ticket参数
* @param redirectURI 回调地址
* @param clientId appKey
* @param username 用户名
* @return token
* @throws IOException
* @throws KeyManagementException
* @throws NoSuchAlgorithmException
*/
private String authorize(String ticket, String authorizeURL, String redirectURI,
String clientId, String username, String uid) throws IOException,
KeyManagementException, NoSuchAlgorithmException { String code = null;
String url = authorizeURL + "?client_id=" + clientId + "&redirect_uri="
+ redirectURI + "&response_type=code&forcelogin=true";
String regCallback = authorizeURL + "?client_id=" + clientId + "&redirect_uri="
+ redirectURI + "&response_type=code&display=default&from=&with_cookie=";
PostMethod post = new PostMethod(authorizeURL);
//模拟申请token的链接,如果不添加,那么回调地址返回则为空
post.setRequestHeader("Referer",url);
// 模拟登录时所要提交的参数信息
NameValuePair[] formpPairs=new NameValuePair[]{
new NameValuePair("action", "login"),
new NameValuePair("userId",username),
new NameValuePair("ticket", ticket),
new NameValuePair("response_type", "code"),
new NameValuePair("redirect_uri", redirectURI),
new NameValuePair("client_id", clientId),
new NameValuePair("regCallback", URLEncoder.encode(regCallback, "UTF-8"))
};
post.setRequestBody(formpPairs);
int status = httpClient.executeMethod(post);
if (status == HttpStatus.SC_OK) {
byte[] htmlDatas = post.getResponseBody();
code = authorizeAgain(htmlDatas, ticket, authorizeURL,
redirectURI, clientId, username, uid);
}else if (status == 302) {
Header locationHeader = post.getResponseHeader("location");
String location = locationHeader.getValue();
code = location.substring(location.indexOf("=")+1);
} return code;
} /**
* 二次提交授权申请
* @param htmlDatas 第一次授权申请返回的页面数据
* @return
* @throws IOException
* @throws HttpException
*/
private String authorizeAgain(byte[] htmlDatas, String ticket, String authorizeURL,
String redirectURI,String clientId, String username,
String uid) throws HttpException, IOException { String verifyToken = null;
String html = new String(htmlDatas, "utf-8");
Document doc = Jsoup.parse(html);
Element verifyTokeneElement = doc.select("input[name=verifyToken]").first();
verifyToken = verifyTokeneElement.attr("value");
String code = null;
String url = authorizeURL + "?client_id=" + clientId + "&redirect_uri="
+ redirectURI + "&response_type=code&forcelogin=true";
String regCallback = authorizeURL + "?client_id=" + clientId + "&redirect_uri="
+ redirectURI + "&response_type=code&display=default&from=&with_cookie=";
PostMethod post = new PostMethod(authorizeURL);
//模拟申请token的链接,如果不添加,那么回调地址返回则为空
post.setRequestHeader("Referer",authorizeURL);
// 模拟登录时所要提交的参数信息
NameValuePair[] formpPairs=new NameValuePair[]{
new NameValuePair("action", "authorize"),
new NameValuePair("uid",uid),
new NameValuePair("url", url),
new NameValuePair("response_type", "code"),
new NameValuePair("redirect_uri", redirectURI),
new NameValuePair("client_id", clientId),
new NameValuePair("verifyToken", verifyToken),
new NameValuePair("regCallback", URLEncoder.encode(regCallback, "UTF-8"))
};
post.setRequestBody(formpPairs);
int status = httpClient.executeMethod(post);
if (status == 302) {
Header locationHeader = post.getResponseHeader("location");
String location = locationHeader.getValue();
if (location == null) {
throw new NullPointerException("redirect_uri is null");
}
code = location.substring(location.indexOf("=")+1);
}
return code;
} /**
* 模拟用户预登录
* @param unameBase64
* @return
* @throws IOException
*/
private HashMap<String, String> pubKeyMap(String unameBase64)
throws IOException { String url = "https://login.sina.com.cn/sso/prelogin.php?"
+ "entry=openapi&"
+ "callback=sinaSSOController.preloginCallBack&" + "su="
+ unameBase64 + "&" + "rsakt=mod&" + "checkpin=1&"
+ "client=ssologin.js(v1.4.5)" + "&_=" + new Date().getTime();
return getParaFromResult(get(url));
} /**
* 预登陆是否需要验证码
* @param pubkeyMap
* @return
*/
private String getPin(HashMap<String, String> pubkeyMap) { String imgUrl = null;
int isShowpin = 0;
if (pubkeyMap != null) {
String showpin = pubkeyMap.get("showpin");
if (showpin != null) {
isShowpin = Integer.parseInt(showpin);
if (isShowpin == 1) {
String url = "https://login.sina.com.cn/cgi/pin.php?"
+ "r=" + Math.floor(Math.random() * 100000000)
+ "&s=0"
+ "&p=" + pubkeyMap.get("pcid"); imgUrl = url;
}
}
}
return imgUrl;
} /**
* 确认登陆后是否需要再验证
* @return
*/
private String isHasPinAgain(HashMap<String, String> pubkeyMap,
HashMap<String, String> ticketMap) { String imgUrl = null;
int isHasPin = 0;
if ((pubkeyMap != null) && (ticketMap != null)) {
//意为"为了您的帐号安全,请输入验证码"
String str = "\\u4e3a\\u4e86\\u60a8\\u7684\\u5e10\\u53f7\\u5b89" +
"\\u5168\\uff0c\\u8bf7\\u8f93\\u5165\\u9a8c\\u8bc1\\u7801"; if (ticketMap.containsKey("reason")) {
String reasonStr = ticketMap.get("reason");
if (reasonStr.equals(str)) {
isHasPin = 1;
String url = "https://login.sina.com.cn/cgi/pin.php?"
+ "r=" + Math.floor(Math.random() * 100000000)
+ "&s=0"
+ "&p=" + pubkeyMap.get("pcid"); imgUrl = url;
}
}
}
return imgUrl;
} /**
* 获取验证码
*/
public String getVCode(String pcid) { String imgUrl = null;
if (pcid != null) {
String url = "https://login.sina.com.cn/cgi/pin.php?"
+ "r=" + Math.floor(Math.random() * 100000000)
+ "&s=0"
+ "&p=" + pcid; imgUrl = url;
}
return imgUrl;
} /**
* 保存验证码
* @param url 验证码链接
*/
public void saveVCodeImg(String url) { GetMethod getImages = new GetMethod(url);
try {
int status = httpClient.executeMethod(getImages);
if (status == HttpStatus.SC_OK) {
FileOutputStream outputStream = new FileOutputStream("vc.jpg");
outputStream.write(getImages.getResponseBody());
outputStream.close();
}
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} } /**
* 无验证码时模拟用户登录,并获取ticket
* @param usernameBase64 使用Base64加密的用户名
* @param sp 使用SHA1加密后的用户密码
* @return
* @throws Exception
*/
private HashMap<String, String> getTicket(String usernameBase64,
String sp, HashMap<String, String> pubkeyMap) throws Exception {
String url = null;
if (pubkeyMap != null) {
url = "https://login.sina.com.cn/sso/login.php?"
+ "entry=openapi&"
+ "gateway=1&"
+ "from=&"
+ "savestate=0&"
+ "useticket=1&"
+ "pagerefer=&"
+ "ct=1800&"
+ "s=1&"
+ "vsnf=1&"
+ "vsnval=&"
+ "door=&"
+ "su="+ usernameBase64
+ "&"
+ "service=miniblog&"
+ "servertime="+ pubkeyMap.get("servertime")
+ "&"
+ "nonce="+ pubkeyMap.get("nonce")
+ "&"
+ "pwencode=rsa&"
+ "rsakv="+ pubkeyMap.get("rsakv")
+ "&"
+ "sp="+ sp
+ "&"
+ "encoding=UTF-8&"
+ "callback=sinaSSOController.loginCallBack&"
+ "cdult=2&"
+ "domain=weibo.com&"
+ "prelt=37&"
+ "returntype=TEXT&"
+ "client=ssologin.js(v1.4.5)&" + "_=" + new Date().getTime(); }
return getParaFromResult(get(url));
} /**
* 有验证码时模拟用户登录,并获取ticket
* @param usernameBase64
* @param sp
* @param pin
* @param pcid
* @param servertime
* @param nonce
* @param rsakv
* @return
* @throws Exception
*/
public HashMap<String, String> getTicket(String usernameBase64, String sp, String pin,
String pcid, String servertime,String nonce,String rsakv) throws Exception { String url = "https://login.sina.com.cn/sso/login.php?"
+ "entry=openapi&"
+ "gateway=1&"
+ "from=&"
+ "savestate=0&"
+ "useticket=1&"
+ "pagerefer=&"
+ "pcid=" + pcid + "&"
+ "ct=1800&"
+ "s=1&"
+ "vsnf=1&"
+ "vsnval=&"
+ "door=" + pin + "&"
+ "su="+ usernameBase64
+ "&"
+ "service=miniblog&"
+ "servertime="+ servertime
+ "&"
+ "nonce="+ nonce
+ "&"
+ "pwencode=rsa&"
+ "rsakv="+ rsakv
+ "&"
+ "sp="+ sp
+ "&"
+ "encoding=UTF-8&"
+ "callback=sinaSSOController.loginCallBack&"
+ "cdult=2&"
+ "domain=weibo.com&"
+ "prelt=37&"
+ "returntype=TEXT&"
+ "client=ssologin.js(v1.4.5)&" + "_=" + new Date().getTime(); return getParaFromResult(get(url));
} /**
* 分析结果,取出所需参数
* @param result 页面内容
* @return
*/
private HashMap<String, String> getParaFromResult(String result) { HashMap<String, String> hm = new HashMap<String, String>();
result = result.substring(result.indexOf("{") + 1, result.indexOf("}"));
String[] r = result.split(",");
String[] temp;
for (int i = 0; i < r.length; i++) {
temp = r[i].split(":");
for (int j = 0; j < 2; j++) {
if (temp[j].contains("\""))
temp[j] = temp[j].substring(1, temp[j].length() - 1);
}
hm.put(temp[0], temp[1]);
}
return hm;
} /**
* 执行给定的URL,并输出目标URL返回的页面结果
* @param url
* @return
* @throws IOException
*/
private String get(String url) throws IOException { String surl = null;
GetMethod getMethod = new GetMethod(url);
int status = httpClient.executeMethod(getMethod);
if (status == HttpStatus.SC_OK) {
surl = new String(getMethod.getResponseBody(), "UTF-8");
}
getMethod.releaseConnection();
return surl;
} /**
* 配置信息初始化
* @return
*/
private Properties initProperties() { Properties prop = new Properties();
try {
prop.load(Thread.currentThread().getContextClassLoader().
getResourceAsStream("config.properties")); } catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return prop;
} /**
* @param args
*/
public static void main(String[] args) { WeiboLoginer loginer = new WeiboLoginer();
LoginParams loginParams = loginer.doLogin("","");
//有验证码时
if (loginParams.getCode() == null) {
String pcid = loginParams.getPcid();
String nonce = loginParams.getNonce();
String rsakv = loginParams.getRsakv();
String servertime = loginParams.getServertime();
String sp = loginParams.getSp(); System.err.println(loginParams.getImgUrl());
//再次获取验证码
System.err.println(loginer.getVCode(pcid)); Scanner input = new Scanner(System.in);
String pin = input.nextLine(); LoginParams loginResult = loginer.doLoginByPin("",sp, pin, pcid, servertime, nonce, rsakv);
if (!loginResult.isLogin()) {
System.err.println("验证码错误!重新录入"); //获取验证码并保存(测试)
String imgUrl = loginer.getVCode(pcid);
loginer.saveVCodeImg(imgUrl); Scanner input1= new Scanner(System.in);
String pin1 = input1.nextLine(); String code = loginer.doLoginByPin("",sp, pin1, pcid, servertime, nonce, rsakv).getCode();
System.out.println(SinaWeiboOAuth.getToken(code));
} }else {
//无验证码时
String code = loginParams.getCode();
System.out.println(SinaWeiboOAuth.getToken(code));
}
}
}

参考地址 http://www.cnblogs.com/zhengbing/p/3459249.html

新浪微博模拟登陆+数据抓取(java实现)的更多相关文章

  1. 腾讯微博模拟登陆&plus;数据抓取&lpar;java实现&rpar;

    不多说,贴出相关代码. 参数实体: package token.def; import java.io.Serializable; import java.util.Properties; publi ...

  2. 【转】详解抓取网站,模拟登陆,抓取动态网页的原理和实现(Python,C&num;等)

    转自:http://www.crifan.com/files/doc/docbook/web_scrape_emulate_login/release/html/web_scrape_emulate_ ...

  3. Scrapy模拟登陆豆瓣抓取数据

    scrapy  startproject douban 其中douban是我们的项目名称 2创建爬虫文件 进入到douban 然后创建爬虫文件 scrapy genspider dou douban. ...

  4. 模拟登陆&plus;数据爬取 &lpar;python&plus;selenuim&rpar;

    以下代码是用来爬取LinkedIn网站一些学者的经历的,仅供参考,注意:不要一次性大量爬取会被封号,不要问我为什么知道 #-*- coding:utf-8 -*- from selenium impo ...

  5. 新浪微博数据抓取&lpar;java实现&rpar;

    多了不说,直接贴出相关部分的实现代码 加密部分实现: package token.exe; import java.math.BigInteger; import java.util.Random; ...

  6. 腾讯微博数据抓取&lpar;java实现&rpar;

    不多说,同样贴出相关代码 参数实体: package token.def; import java.io.Serializable; import java.util.Properties; publ ...

  7. Java实现多种方式的http数据抓取

    前言: 时下互联网第一波的浪潮已消逝,随着而来的基于万千数据的物联网时代,因而数据成为企业的重要战略资源之一.基于数据抓取技术,本文介绍了java相关抓取工具,并附上demo源码供感兴趣的朋友测试! ...

  8. scrapy 知乎的模拟登陆及抓取用户数据

    最近看了python的scrapy 框架并用其抓取了部分知乎用户数据,代码主要是集中在知乎登陆和抓取时候的逻辑处理上. 1. 首先进入知乎登陆页面zhihu.com/#sigin上, 用xpath提取 ...

  9. 大数据抓取采集框架&lpar;摘抄至http&colon;&sol;&sol;blog&period;jobbole&period;com&sol;46673&sol;&rpar;

    摘抄至http://blog.jobbole.com/46673/ 随着BIG DATA大数据概念逐渐升温,如何搭建一个能够采集海量数据的架构体系摆在大家眼前.如何能够做到所见即所得的无阻拦式采集.如 ...

随机推荐

  1. T-SQL字符串相加之后被截断的那点事

    本文出处:http://www.cnblogs.com/wy123/p/6217772.html 字符串自身相加, 虽然赋值给了varchar(max)类型的变量,在某些特殊情况下仍然会被“截断”,这 ...

  2. &lbrack;LeetCode&rsqb; Strobogrammatic Number II 对称数之二

    A strobogrammatic number is a number that looks the same when rotated 180 degrees (looked at upside ...

  3. 重构alert,confirm

    最近写了一个重构的alert,confirm控件,调用时直接使用alert,confirm即可 //调用方法 alert("提示语") window.confirm('你确定要删除 ...

  4. scala中的Actor

    1.介绍 2.简单示例 3.第二个程序 4.通信程序 package day01 import scala.actors.Actor case class Message(content: Strin ...

  5. cocos2d-x CCScrollView和CCTableView的使用(转载)

    转载请注明来自:Alex Zhou的程序世界,本文链接:http://codingnow.cn/cocos2d-x/1024.html //============================== ...

  6. pes and ts stream&comma; how to convert

    http://*.com/questions/4145575/transport-stream-mpeg-file-fromat What you are probably w ...

  7. Android 获取有规律资源Id解决方案

    在多个有规律的资源ID获取的时候,可以使用getIdentifier方法来获取,来获取. 用到场景:工具类打成.jar包的时候,有时候会需要引用到res中的资源,这时候不能将资源一起打包,只能通过反射 ...

  8. 用友金蝶SQL数据库误格式化恢复 SQL数据库修复 SQL数据库恢复 工具 方法

    用友金蝶SQL数据库误格式化恢复 SQL数据库修复 SQL数据库恢复 硬盘误格式化.重分区.重装操作系统覆盖 SQL数据解决方法 [客户名称]:贵州铜仁市开天驾驶人培训中心 [软件名称]:用友T3普及 ...

  9. Python包和版本管理的最好工具----pipenv

    pipenv 是Kenneth Reitz大神的作品,提供Python的各个版本间的管理,各种包管理.个人觉得是virtualenv pip等工具的合体. pipenv主要有以下特性: (1)以前我们 ...

  10. 深入理解css优先级

    为什么要写这篇文章是因为 <style type="text/css"> body h1 { color: green; } html h1 { color: purp ...