New demo code:
新的演示代码:
I am trying to get the captcha image from a AOL, and I keep getting an error 418.
我试图从AOL获得captcha图像,我一直得到一个错误418。
unit imageunit;
///
/// https://new.aol.com/productsweb/
///
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, IdIOHandler, IdIOHandlerSocket, IdIOHandlerStack, IdSSL,
IdSSLOpenSSL, IdIntercept, IdZLibCompressorBase, IdCompressorZLib,
IdCookieManager, IdBaseComponent, IdComponent, IdTCPConnection, IdTCPClient,
IdHTTP,jpeg,GIFImg, ExtCtrls, PerlRegEx;
type
TForm2 = class(TForm)
IdHTTP1: TIdHTTP;
IdCookieManager1: TIdCookieManager;
IdCompressorZLib1: TIdCompressorZLib;
IdConnectionIntercept1: TIdConnectionIntercept;
IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL;
Panel1: TPanel;
Image1: TImage;
Panel2: TPanel;
Button1: TButton;
PerlRegEx1: TPerlRegEx;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form2: TForm2;
implementation
{$R *.dfm}
function getaimcaptchaimage(data:string):string;
var
Regex: TPerlRegEx;
ResultString: string;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '<img src="/productsweb/WordVerImage?(.*?)"';
Regex.Options := [preCaseless];
Regex.Subject := data;
if Regex.Match then begin
if Regex.SubExpressionCount >= 1 then begin
ResultString := Regex.SubExpressions[1];
end;
result:=Resultstring;
end;
end;
procedure TForm2.Button1Click(Sender: TObject);
var
JPI : TJPEGImage;
streamdata:TMemoryStream;
SStream: Tstringstream;
website:string;
begin
streamdata := TMemoryStream.Create;
SStream := tstringstream.Create ( '' );
try
idhttp1.Get('https://new.aol.com/productsweb/',SStream);
memo1.Text:=UTF8ToWideString ( SStream.DataString );
website:='https://new.aol.com/productsweb/WordVerImage'+getaimcaptchaimage( UTF8ToWideString ( SStream.DataString ));
form2.Caption:=website;
idhttp1.Get(website, Streamdata);
Except
{ Handle exceptions }
On E : Exception Do
Begin
MessageDlg('Exception: '+E.Message,mtError, [mbOK], 0);
End;
End;
//https://new.aol.com/productsweb/WordVerImage?20890843
//https://new.aol.com/productsweb/WordVerImage?91868359
///
/// gives error 418 unused
///
streamdata.Position := 0;
JPI := TJPEGImage.Create;
Try
JPI.LoadFromStream ( streamdata );
Finally
Image1.Picture.Assign ( JPI );
JPI.Free;
streamdata.Free;
End;
end;
end.
Form:
形式:
object Form2: TForm2
Left = 0
Top = 0
Caption = 'Form2'
ClientHeight = 247
ClientWidth = 480
Color = clBtnFace
Font.Charset = DEFAULT_CHARSET
Font.Color = clWindowText
Font.Height = -11
Font.Name = 'Tahoma'
Font.Style = []
OldCreateOrder = False
PixelsPerInch = 96
TextHeight = 13
object Panel1: TPanel
Left = 0
Top = 41
Width = 480
Height = 206
Align = alClient
TabOrder = 0
object Image1: TImage
Left = 1
Top = 1
Width = 478
Height = 115
Align = alClient
ExplicitLeft = 5
ExplicitTop = 17
ExplicitWidth = 200
ExplicitHeight = 70
end
object Memo1: TMemo
Left = 1
Top = 116
Width = 478
Height = 89
Align = alBottom
TabOrder = 0
ExplicitLeft = 80
ExplicitTop = 152
ExplicitWidth = 185
end
end
object Panel2: TPanel
Left = 0
Top = 0
Width = 480
Height = 41
Align = alTop
TabOrder = 1
object Button1: TButton
Left = 239
Top = 6
Width = 75
Height = 25
Caption = 'Button1'
TabOrder = 0
OnClick = Button1Click
end
end
object IdHTTP1: TIdHTTP
Intercept = IdConnectionIntercept1
IOHandler = IdSSLIOHandlerSocketOpenSSL1
MaxAuthRetries = 100
AllowCookies = True
HandleRedirects = True
RedirectMaximum = 100
ProxyParams.BasicAuthentication = False
ProxyParams.ProxyPort = 0
Request.ContentLength = -1
Request.Accept =
'image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-s' +
'hockwave-flash, application/cade, application/xaml+xml, applicat' +
'ion/vnd.ms-xpsdocument, application/x-ms-xbap, application/x-ms-' +
'application, */*'
Request.BasicAuthentication = False
Request.Referer = 'http://www.yahoo.com'
Request.UserAgent =
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/201001' +
'22 firefox/3.6.1'
HTTPOptions = [hoForceEncodeParams]
CookieManager = IdCookieManager1
Compressor = IdCompressorZLib1
Left = 40
Top = 160
end
object IdCookieManager1: TIdCookieManager
Left = 360
Top = 136
end
object IdCompressorZLib1: TIdCompressorZLib
Left = 408
Top = 56
end
object IdConnectionIntercept1: TIdConnectionIntercept
Left = 304
Top = 72
end
object IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL
Intercept = IdConnectionIntercept1
MaxLineAction = maException
Port = 0
DefaultPort = 0
SSLOptions.Mode = sslmUnassigned
SSLOptions.VerifyMode = []
SSLOptions.VerifyDepth = 0
Left = 192
Top = 136
end
object PerlRegEx1: TPerlRegEx
Options = []
Left = 120
Top = 56
end
end
If you go to https://new.aol.com/productsweb/ you will notice the captcha image has a url like https://new.aol.com/productsweb/WordVerImage?91868359
如果你访问https://new.aol.com/productsweb/你会注意到captcha图像有一个url,比如https://new.aol.com/productsweb/WordVerImage?91868359。
I put that url in the edit box and get an error.
我把那个url放在编辑框中,然后得到一个错误。
What is wrong with this code?
这段代码有什么问题?
4 个解决方案
#1
0
at my old projects i was readed captcha from web. i done this with embedded web browser component cache procedures. so if you can read temporary internet files from your code you can read the image data from cache. i added a simple code to below,
在我以前的项目中,我从web上读取了captcha。我在嵌入式web浏览器组件缓存过程中完成了这个任务。因此,如果您可以从您的代码中读取临时internet文件,您可以从缓存中读取图像数据。我在下面添加了一个简单的代码,
the GetCachedFileFromURL and ClearAllEntries functions is declarated in TEmbeddedWebBrowser unit. i was used only my copied code in my solution for lower exe size. but you can use component updated source. the component is open source.
GetCachedFileFromURL和ClearAllEntries函数在TEmbeddedWebBrowser单元中声明。在我的解决方案中,我只使用了我的复制代码来降低exe大小。但是您可以使用组件更新的源代码。组件是开源的。
uses
WinInet;
function GetCachedFileFromURL(strUL: string; var strLocalFile: string): Boolean;
var
lpEntryInfo: PInternetCacheEntryInfo;
hCacheDir: LongWord;
dwEntrySize: LongWord;
dwLastError: LongWord;
begin
Result := False;
dwEntrySize := 0;
// Begin the enumeration of the Internet cache.
FindFirstUrlCacheEntry(nil, TInternetCacheEntryInfo(nil^), dwEntrySize);
GetMem(lpEntryInfo, dwEntrySize);
hCacheDir := FindFirstUrlCacheEntry(nil, lpEntryInfo^, dwEntrySize);
if (hCacheDir <> 0) and (strUL = lpEntryInfo^.lpszSourceUrlName) then
begin
strLocalFile := lpEntryInfo^.lpszLocalFileName;
Result := True;
end;
FreeMem(lpEntryInfo);
if Result = False then
repeat
dwEntrySize := 0;
// Retrieves the next cache group in a cache group enumeration
FindNextUrlCacheEntry(hCacheDir, TInternetCacheEntryInfo(nil^), dwEntrySize);
dwLastError := GetLastError();
if (GetLastError = ERROR_INSUFFICIENT_BUFFER) then
begin
GetMem(lpEntryInfo, dwEntrySize);
if (FindNextUrlCacheEntry(hCacheDir, lpEntryInfo^, dwEntrySize)) then
begin
if strUL = lpEntryInfo^.lpszSourceUrlName then
begin
strLocalFile := lpEntryInfo^.lpszLocalFileName;
Result := True;
Break;
end;
end;
FreeMem(lpEntryInfo);
end;
until (dwLastError = ERROR_NO_MORE_ITEMS);
end;
procedure TForm1.ClearCache();
begin
SearchPattern := spAll;
ClearAllEntries;
end;
usage
使用
procedure TForm1.Button1Click(Sender: TObject);
var
fname:string;
jpImg:TJPEGImage;
begin
ClearCache;
try
jpImg:=TJPEGImage.Create;
GetCachedFileFromURL('https://ebildirge.ssk.gov.tr/WPEB/PG',fname);
jpImg.LoadFromFile(fname);
finally
FreeAndNil(jpgImg);
end;
end;
#2
7
There is a cookie involved. If you go straight to the captcha URL https://new.aol.com/productsweb/WordVerImage?91868359 in a browser that has not visited https://new.aol.com/productsweb/ then you get (after a refresh):
这里面有一块饼干。如果您直接访问captcha URL https://new.aol.com/productsweb/WordVerImage?在一个没有访问过https://new.aol.com/productsweb/的浏览器中,你得到(刷新后):
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>418 unused</title>
</head><body>
<h1>unused</h1>
<p>The server encountered an internal error or
misconfiguration and was unable to complete
your request.</p>
<p>Please contact the server administrator,
null and inform them of the time the error occurred,
and anything you might have done that may have
caused the error.</p>
<p>More information about this error may be available
in the server error log.</p>
</body></html>
But if you visit https://new.aol.com/productsweb/ first, then you will get an image. Clear the cookie and you get the error again (although as Francois pointed out, you get no response first, and then on refresh you get the image.)
但是如果你访问https://new.aol.com/productsweb/首先,你会得到一个图像。清除cookie并再次得到错误(尽管正如Francois指出的那样,您首先没有得到响应,然后刷新您得到图像)。
Indy supports cookies, so you will need to add support for cookies, and then either get the cookie by visiting the productsweb first, or simulate it from a known value.
Indy支持cookie,因此您需要添加对cookie的支持,然后通过访问productsweb获得cookie,或者从已知的值进行模拟。
You will notice that the image generated is not based on the number passed as a parameter alone, but also the cookie. Have two different browsers (Chrome and Firefox) that each have a different cookie, and then visit the same captcha URL and you will get two different images.
您会注意到生成的图像并不是基于仅作为参数传递的数字,而是基于cookie。有两个不同的浏览器(Chrome和Firefox),每个浏览器都有一个不同的cookie,然后访问相同的captcha URL,你会得到两个不同的图像。
Curious what you are trying to accomplish with this.
好奇你想用这个做什么。
#3
4
I think it means "go away". Somehow, through headers or something in your request, it's determining that you smell like a bot. Maybe because you're asking for an image that it knows it didn't just make for you. Yeah, that's probably it. If I go to your URL in my browser, I get an 418 too.
我想它的意思是“走开”。不知何故,通过标题或你的请求,它决定你闻起来像一个机器人。也许是因为你想要一张照片,它知道它并不仅仅是为了你。是的,这可能是它。如果我在浏览器中访问你的URL,我也会得到418。
#4
2
It's not your code. try in a browser....
(You obviously need to remove the blanks from 'h t t p s'...)
这不是你的代码。尝试在浏览器....(显然,你需要去掉“h t p s”的空格…)
This URL https://new.aol.com/productsweb/
apparently needs to be called before you can get a captcha image. Otherwise you get an (improper) error 418 Unused
.
Sometimes I had to try twice with the image # as I first got a 420 Unused
error...
这个URL https://new.aol.com/productsweb/显然需要在你得到一个captcha图像之前被调用。否则,您将得到一个未使用的(错误的)418错误。有时候,我需要尝试两次图像#,因为我第一次得到了420个未使用的错误…
You better ask them, as their API does not really seem stable...
你最好问他们,因为他们的API看起来并不稳定……
RE: the http 418 joke. If you want some fun with the http error codes read on A Web Developer and His Girlfriend(s)
RE: http 418笑话。如果你想要一些有趣的http错误代码,请阅读Web开发人员和他的女朋友(s)
#1
0
at my old projects i was readed captcha from web. i done this with embedded web browser component cache procedures. so if you can read temporary internet files from your code you can read the image data from cache. i added a simple code to below,
在我以前的项目中,我从web上读取了captcha。我在嵌入式web浏览器组件缓存过程中完成了这个任务。因此,如果您可以从您的代码中读取临时internet文件,您可以从缓存中读取图像数据。我在下面添加了一个简单的代码,
the GetCachedFileFromURL and ClearAllEntries functions is declarated in TEmbeddedWebBrowser unit. i was used only my copied code in my solution for lower exe size. but you can use component updated source. the component is open source.
GetCachedFileFromURL和ClearAllEntries函数在TEmbeddedWebBrowser单元中声明。在我的解决方案中,我只使用了我的复制代码来降低exe大小。但是您可以使用组件更新的源代码。组件是开源的。
uses
WinInet;
function GetCachedFileFromURL(strUL: string; var strLocalFile: string): Boolean;
var
lpEntryInfo: PInternetCacheEntryInfo;
hCacheDir: LongWord;
dwEntrySize: LongWord;
dwLastError: LongWord;
begin
Result := False;
dwEntrySize := 0;
// Begin the enumeration of the Internet cache.
FindFirstUrlCacheEntry(nil, TInternetCacheEntryInfo(nil^), dwEntrySize);
GetMem(lpEntryInfo, dwEntrySize);
hCacheDir := FindFirstUrlCacheEntry(nil, lpEntryInfo^, dwEntrySize);
if (hCacheDir <> 0) and (strUL = lpEntryInfo^.lpszSourceUrlName) then
begin
strLocalFile := lpEntryInfo^.lpszLocalFileName;
Result := True;
end;
FreeMem(lpEntryInfo);
if Result = False then
repeat
dwEntrySize := 0;
// Retrieves the next cache group in a cache group enumeration
FindNextUrlCacheEntry(hCacheDir, TInternetCacheEntryInfo(nil^), dwEntrySize);
dwLastError := GetLastError();
if (GetLastError = ERROR_INSUFFICIENT_BUFFER) then
begin
GetMem(lpEntryInfo, dwEntrySize);
if (FindNextUrlCacheEntry(hCacheDir, lpEntryInfo^, dwEntrySize)) then
begin
if strUL = lpEntryInfo^.lpszSourceUrlName then
begin
strLocalFile := lpEntryInfo^.lpszLocalFileName;
Result := True;
Break;
end;
end;
FreeMem(lpEntryInfo);
end;
until (dwLastError = ERROR_NO_MORE_ITEMS);
end;
procedure TForm1.ClearCache();
begin
SearchPattern := spAll;
ClearAllEntries;
end;
usage
使用
procedure TForm1.Button1Click(Sender: TObject);
var
fname:string;
jpImg:TJPEGImage;
begin
ClearCache;
try
jpImg:=TJPEGImage.Create;
GetCachedFileFromURL('https://ebildirge.ssk.gov.tr/WPEB/PG',fname);
jpImg.LoadFromFile(fname);
finally
FreeAndNil(jpgImg);
end;
end;
#2
7
There is a cookie involved. If you go straight to the captcha URL https://new.aol.com/productsweb/WordVerImage?91868359 in a browser that has not visited https://new.aol.com/productsweb/ then you get (after a refresh):
这里面有一块饼干。如果您直接访问captcha URL https://new.aol.com/productsweb/WordVerImage?在一个没有访问过https://new.aol.com/productsweb/的浏览器中,你得到(刷新后):
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>418 unused</title>
</head><body>
<h1>unused</h1>
<p>The server encountered an internal error or
misconfiguration and was unable to complete
your request.</p>
<p>Please contact the server administrator,
null and inform them of the time the error occurred,
and anything you might have done that may have
caused the error.</p>
<p>More information about this error may be available
in the server error log.</p>
</body></html>
But if you visit https://new.aol.com/productsweb/ first, then you will get an image. Clear the cookie and you get the error again (although as Francois pointed out, you get no response first, and then on refresh you get the image.)
但是如果你访问https://new.aol.com/productsweb/首先,你会得到一个图像。清除cookie并再次得到错误(尽管正如Francois指出的那样,您首先没有得到响应,然后刷新您得到图像)。
Indy supports cookies, so you will need to add support for cookies, and then either get the cookie by visiting the productsweb first, or simulate it from a known value.
Indy支持cookie,因此您需要添加对cookie的支持,然后通过访问productsweb获得cookie,或者从已知的值进行模拟。
You will notice that the image generated is not based on the number passed as a parameter alone, but also the cookie. Have two different browsers (Chrome and Firefox) that each have a different cookie, and then visit the same captcha URL and you will get two different images.
您会注意到生成的图像并不是基于仅作为参数传递的数字,而是基于cookie。有两个不同的浏览器(Chrome和Firefox),每个浏览器都有一个不同的cookie,然后访问相同的captcha URL,你会得到两个不同的图像。
Curious what you are trying to accomplish with this.
好奇你想用这个做什么。
#3
4
I think it means "go away". Somehow, through headers or something in your request, it's determining that you smell like a bot. Maybe because you're asking for an image that it knows it didn't just make for you. Yeah, that's probably it. If I go to your URL in my browser, I get an 418 too.
我想它的意思是“走开”。不知何故,通过标题或你的请求,它决定你闻起来像一个机器人。也许是因为你想要一张照片,它知道它并不仅仅是为了你。是的,这可能是它。如果我在浏览器中访问你的URL,我也会得到418。
#4
2
It's not your code. try in a browser....
(You obviously need to remove the blanks from 'h t t p s'...)
这不是你的代码。尝试在浏览器....(显然,你需要去掉“h t p s”的空格…)
This URL https://new.aol.com/productsweb/
apparently needs to be called before you can get a captcha image. Otherwise you get an (improper) error 418 Unused
.
Sometimes I had to try twice with the image # as I first got a 420 Unused
error...
这个URL https://new.aol.com/productsweb/显然需要在你得到一个captcha图像之前被调用。否则,您将得到一个未使用的(错误的)418错误。有时候,我需要尝试两次图像#,因为我第一次得到了420个未使用的错误…
You better ask them, as their API does not really seem stable...
你最好问他们,因为他们的API看起来并不稳定……
RE: the http 418 joke. If you want some fun with the http error codes read on A Web Developer and His Girlfriend(s)
RE: http 418笑话。如果你想要一些有趣的http错误代码,请阅读Web开发人员和他的女朋友(s)