在爬虫的时候 经常会遇到验证码识别。目前遇到的验证码都是可以借助第三方来规避,但是谷歌的验证码未找到其破解
一般的验证码识别 未有造影、干扰线的 可以选择腾讯OCR通用识别 https://ai.qq.com/
负责点的验证码 可以接入若快打码 http://www.ruokuai.com/
无论是腾讯OCR识别还是若快打码 都是需要将验证码的图片转成base64作为参数。
还记得自己使用第三方验证码识别,明明识别的都是正确的,却发现每次提示都是验证码识别错误,后来再排查的时候发现
虽然是将图片转成了bsee64,但是没有带上cookie值,导致失败
1 public static string ImgBase64(string url, Cookie cookie = null) 2 { 3 var content = new byte[0]; 4 HttpWebResponse response = null; 5 HttpWebRequest request = null; 6 try 7 { 8 var uriObject = new Uri(url); 9 request = (HttpWebRequest)WebRequest.Create(uriObject); 10 request.Method = "GET"; 11 if (cookie != null) 12 { 13 request.CookieContainer = new CookieContainer(); 14 request.CookieContainer.Add(uriObject, cookie); 15 } 16 ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls12; 17 response = (HttpWebResponse)request.GetResponse(); 18 19 if (response.StatusCode == HttpStatusCode.OK) 20 { 21 var responseStream = response.GetResponseStream(); 22 if (responseStream != null) 23 { 24 content = new byte[response.ContentLength]; 25 int readecount = 0; 26 while (readecount < (int)response.ContentLength) 27 { 28 readecount += responseStream.Read(content, readecount, (int)response.ContentLength - readecount); 29 } 30 responseStream.Close(); 31 } 32 } 33 } 34 catch (Exception ex) 35 { 36 //异常日志 37 } 38 finally 39 { 40 if (response != null) 41 { 42 response.Close(); 43 } 44 if (request != null) 45 { 46 request.Abort(); 47 } 48 } 49 return Convert.ToBase64String(content); 50 }
对一些点击验证以及拖动的验证 可以使用极验验证
http://jiyan.c2567.com/index.html