C语言实现抓取网页源代码

时间:2021-12-22 01:43:49
#include <stdio.h>  
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <unistd.h>

int port = 80;

int main(int argc,char **argv)
{
char buffer[512];
int isock;
struct sockaddr_in pin;
struct hostent *hptr;
char message[512];
char *ptr, **pptr;
char str[32];
int done = 0;
int chars = 0;
int l = 0;


if(argc!=2)
{
printf("%s url\n",argv[0]);
return -1;
}

if( (hptr = gethostbyname(argv[1])) == 0 )
{
printf("gethostbyname is fail\n");
exit(1);
}

#if 1
printf("official hostname:%s\n",hptr->h_name);
for(pptr = hptr->h_aliases; *pptr != NULL; pptr++)
printf(" alias:%s\n",*pptr);

switch(hptr->h_addrtype)
{
case AF_INET:
case AF_INET6:
pptr=hptr->h_addr_list;
for(; *pptr!=NULL; pptr++)
printf(" address:%s\n", inet_ntop(hptr->h_addrtype, *pptr, str, sizeof(str)));
printf(" first address: %s\n", inet_ntop(hptr->h_addrtype, hptr->h_addr, str, sizeof(str)));
break;
default:
printf("unknown address type\n");
break;
}
#endif

bzero(message,sizeof(message));
bzero(&pin,sizeof(pin));
pin.sin_family = AF_INET;
pin.sin_port = htons(port);
pin.sin_addr.s_addr = ( (struct in_addr *)(hptr->h_addr) )->s_addr;

if( (isock = socket(AF_INET, SOCK_STREAM, 0)) == -1)
{
printf("Error opening socket!\n");
exit(1);
}

sprintf(message, "GET / HTTP/1.1\r\n");
strcat(message, "Host:");
strcat(message, argv[1]);
strcat(message, "\r\n");
strcat(message, "Accept: */*\r\n");
strcat(message, "User-Agent: Mozilla/4.0(compatible)\r\n");
strcat(message, "connection:Keep-Alive\r\n");
strcat(message, "\r\n\r\n");
//printf("%s",message);
if( connect(isock, (const struct sockaddr *) &pin, sizeof(pin)) == -1 )
{
printf("Error connecting to socket\n");
exit(1);
}
if( send(isock, message, strlen(message), 0) == -1)
{
printf("Error in send\n");
exit(1);
}

struct timeval timeout = {1,0}; //设置超时时间1秒,0代表秒后面的微秒数,左边这个就是1秒0微秒

//设置接收超时
setsockopt(isock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(struct timeval));

while(done == 0)
{
l = recv(isock, buffer, 1, 0);
if( l < 0 )
done = 1;
switch(*buffer)
{
case '\r':
break;
case '\n':
if(chars == 0)
done = 1;
chars = 0;
break;
default:
chars++;
break;
}
// printf("%c",*buffer);
}

// recv(isock, buffer, 5, 0); /* delete over char */
do
{
l = recv(isock, buffer, sizeof(buffer) - 1, 0);
if( l < 0 )
break;
*(buffer + l) = 0;
fputs(buffer, stdout);
}while( l > 0 );
close(isock);
return 0;
}