一、socket介绍
- 参考链接一:https://blog.csdn.net/Pk_zsq/article/details/6087367
- 参考链接二:https://blog.csdn.net/JenMinZhang/article/details/47017741
二、python socket编程
- 参考链接一:https://www.cnblogs.com/nulige/p/6235531.html
- 参考链接二:https://www.liaoxuefeng.com/wiki/0014316089557264a6b348958f449949df42a6d3a2e542c000/001432004374523e495f640612f4b08975398796939ec3c000
二、socket 模拟 HTTP请求
import socket
from urllib.parse import urlparse
class ParserUrl(object):
'''
对url进行解析,并返回域名和路径
'''
def __init__(self, url):
self.url = url
def get_host_path(self):
parser_url = urlparse(self.url)
host, path = parser_url.netloc, parser_url.path
if path:
return host, path
else:
return host, '/'
class SocketHttp(object):
'''
需要一个ParserUrl对象,获取host和path
建立连接返回http response字符串
http数据传递的时是以字节为单位的,所以需要编码
'''
def __init__(self, parser_url):
self.host, self.path = parser_url.get_host_path()
def set_socket(self):
self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.client.connect((self.host, 80))
self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8"))
@property
def data(self):
self.set_socket()
data = b""
while True:
d = self.client.recv(1024)
if d:
data += d
else:
break
self.close_socket()
return data.decode("utf8")
@property
def html_content(self):
html_data = self.data
return html_data.split("\r\n\r\n")[1]
def close_socket(self):
self.client.close()
if __name__ == "__main__":
url = ParserUrl('https://www.baidu.com/')
socket_http = SocketHttp(url)
print(socket_http.html_content)
返回结果:
返回正常的字符串