C++ 获取URL内容的实例

时间:2021-10-15 05:21:20

我就废话不多说了,大家还是直接看代码吧~

以下内容摘自* 链接

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#ifndef HTTPUTIL_H
#define HTTPUTIL_H
#include <windows.h>
#include <string>
#include <stdio.h>
using std::string;
#pragma comment(lib,"ws2_32.lib")
 
void mParseUrl(char *mUrl, string &serverName, string &filepath, string &filename);
SOCKET connectToServer(char *szServerName, WORD portNum);
int getHeaderLength(char *content);
char *readUrl2(char *szUrl, long &bytesReturnedOut, char **headerOut);
 
char *sendRequest(char szUrl[]) {
 WSADATA wsaData;
 //char szUrl[] = "http://api.m.taobao.com/rest/api3.do?api=mtop.common.getTimestamp";
 long fileSize;
 char *memBuffer, *headerBuffer;
 
 memBuffer = headerBuffer = nullptr;
 
 if (WSAStartup(0x101, &wsaData) != 0)
  return nullptr;
 
 memBuffer = readUrl2(szUrl, fileSize, &headerBuffer);
 printf("returned from readUrl\n");
 printf("data returned:\n%s", memBuffer);
 if (fileSize != 0) {
  //delete (memBuffer);
  delete (headerBuffer);
 }
 WSACleanup();
 return memBuffer;
}
 
void mParseUrl(char *mUrl, string &serverName, string &filepath, string &filename) {
 string::size_type n;
 string url = mUrl;
 
 if (url.substr(0, 7) == "http://")
  url.erase(0, 7);
 
 if (url.substr(0, 8) == "https://")
  url.erase(0, 8);
 
 n = url.find('/');
 if (n != string::npos) {
  serverName = url.substr(0, n);
  filepath = url.substr(n);
  n = filepath.rfind('/');
  filename = filepath.substr(n + 1);
 }
 
 else {
  serverName = url;
  filepath = "/";
  filename = "";
 }
}
 
SOCKET connectToServer(char *szServerName, WORD portNum) {
 struct hostent *hp;
 unsigned int addr;
 struct sockaddr_in server;
 SOCKET conn;
 
 conn = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 if (conn == INVALID_SOCKET)
  return NULL;
 
 if (inet_addr(szServerName) == INADDR_NONE) {
  hp = gethostbyname(szServerName);
 } else {
  addr = inet_addr(szServerName);
  hp = gethostbyaddr((char *)&addr, sizeof(addr), AF_INET);
 }
 
 if (hp == nullptr) {
  closesocket(conn);
  return NULL;
 }
 
 server.sin_addr.s_addr = *((unsigned long *)hp->h_addr);
 server.sin_family = AF_INET;
 server.sin_port = htons(portNum);
 if (connect(conn, (struct sockaddr *)&server, sizeof(server))) {
  closesocket(conn);
  return NULL;
 }
 return conn;
}
 
int getHeaderLength(char *content) {
 const char *srchStr1 = "\r\n\r\n", *srchStr2 = "\n\r\n\r";
 char *findPos;
 int ofset = -1;
 
 findPos = strstr(content, srchStr1);
 if (findPos != nullptr) {
  ofset = findPos - content;
  ofset += strlen(srchStr1);
 }
 
 else {
  findPos = strstr(content, srchStr2);
  if (findPos != nullptr) {
   ofset = findPos - content;
   ofset += strlen(srchStr2);
  }
 }
 return ofset;
}
 
char *readUrl2(char *szUrl, long &bytesReturnedOut, char **headerOut) {
 const int bufSize = 512;
 char readBuffer[bufSize], sendBuffer[bufSize], tmpBuffer[bufSize];
 char *tmpResult = nullptr, *result;
 SOCKET conn;
 string server, filepath, filename;
 long totalBytesRead, thisReadSize, headerLen;
 
 mParseUrl(szUrl, server, filepath, filename);
 
 / step 1, connect //
 conn = connectToServer((char *)server.c_str(), 80);
 
 / step 2, send GET request /
 sprintf(tmpBuffer, "GET %s HTTP/1.0", filepath.c_str());
 strcpy(sendBuffer, tmpBuffer);
 strcat(sendBuffer, "\r\n");
 sprintf(tmpBuffer, "Host: %s", server.c_str());
 strcat(sendBuffer, tmpBuffer);
 strcat(sendBuffer, "\r\n");
 strcat(sendBuffer, "\r\n");
 send(conn, sendBuffer, strlen(sendBuffer), 0);
 
 // SetWindowText(edit3Hwnd, sendBuffer);
 printf("Buffer being sent:\n%s", sendBuffer);
 
 / step 3 - get received bytes
 // Receive until the peer closes the connection
 totalBytesRead = 0;
 while (1) {
  memset(readBuffer, 0, bufSize);
  thisReadSize = recv (conn, readBuffer, bufSize, 0);
 
  if ( thisReadSize <= 0 )
   break;
 
  tmpResult = (char *)realloc(tmpResult, thisReadSize + totalBytesRead);
 
  memcpy(tmpResult + totalBytesRead, readBuffer, thisReadSize);
  totalBytesRead += thisReadSize;
 }
 
 headerLen = getHeaderLength(tmpResult);
 long contenLen = totalBytesRead - headerLen;
 result = new char[contenLen + 1];
 memcpy(result, tmpResult + headerLen, contenLen);
 result[contenLen] = 0x0;
 char *myTmp;
 
 myTmp = new char[headerLen + 1];
 strncpy(myTmp, tmpResult, headerLen);
 myTmp[headerLen] = NULL;
 delete (tmpResult);
 *headerOut = myTmp;
 
 bytesReturnedOut = contenLen;
 closesocket(conn);
 return (result);
}
#endif // HTTPUTIL_H

测试代码:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#include <string>
#include <stdio.h>
#include "HttpUtil.h"
#include <iostream>
using std::string;
using namespace std;
 
int main() {
 char *resData = sendRequest("http://api.m.taobao.com/rest/api3.do?api=mtop.common.getTimestamp");
 string str = resData;
 cout << endl << str << endl;
 delete resData;
 return 0;
}

补充知识:C++处理URL的方法,项目有用到,过程记录如下

由于这块需要转换成unicode码,也就是将字符串传换成unicode码,因此需要对输入的字符串做处理,同时又分两种情况,中文 非中文的处理,要区别对待,首先要对输入的字符串进行识别:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
int 是不是中文(char *str)
{
char ch;
while (1)
{
ch = *str++;
if (ch == 0)
{
break;
}
 
if (ch&0x80)
{
if (*str & 0x80)
{
return true;
}
}
else
{
return false;
}
}
return 0;
}

然后要进行相应转换

?
1
2
3
4
5
6
7
8
//---------------------------------------------------------------------
//函数:W2C
//功能:将16位wchar_t转换为 8位char[2]
//参数:w_cn为待转换的16位字符,c_cn[]为转换后的8位字符
//备注:wchar_t的高位字节应该存储在char数组的低位字节
//作者:xxxx
//---------------------------------------------------------------------
void W2C(wchar_t w_cn , char c_cn[]){c_cn[0] = w_cn >> 8 ;c_cn[1] = (char)w_cn ;}

然后主体转换代码:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
//-------------------------------------------------------------------
//函数:ToHex
//功能:将16位字符串转换为十六进制字符串
//参数:待转换的字符串,字符串长度
//返回值:转换后的字符串
//作者:xxxx
//-------------------------------------------------------------------
CString ToHex(CString Data, long nDataLength)
{
CString sResult;
for (long nLoop=0; nLoop<nDataLength; nLoop++)
{
wchar_t ch = Data.GetAt(nLoop);
CHAR buff[MAX_PATH] = {0};
LPCWSTR str = &ch;
WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)str, -1, buff, MAX_PATH, 0, 0);
if (IncludeChinese(buff))
{
char c_cn[2]={'0'};
W2C(ch,c_cn);
static const char *hex = "0123456789ABCDEF";
for(int i=0;i<2;i++)
{
unsigned char chHexA = hex[((unsigned char)(c_cn[i]) >> 4) & 0x0f];
unsigned char chHexB = hex[(unsigned char)(c_cn[i]) & 0x0f];
sResult += (char)chHexA;
sResult += (char)chHexB;
}
}
else
{
sResult += ch;
}
}
 
return sResult;
}

到这里基本上结束了~也查看了一些资料,虽说这个功能简单,但是过程有点曲折。但总算完成了,留个纪念吧~希望对其他人有帮助~希望大家多多支持服务器之家。

原文链接:https://blog.csdn.net/u014416260/article/details/100540873