这部分回顾Socket Programming Assignment 4: ProxyServer。

参考资料:

简介

此次作业的目的构建代理服务器,主要任务如下:

  • 实现基本的代理服务器
  • 实现带缓存的代理服务器

备注:没有支持POST方法。

文件路径结构如下:

Socket4_ProxyServer/
|-- proxy_server.py
`-- proxy_server_cache.py

带缓存的文件路径结构:

|-- cache
|   |-- www.baidu.com
|   `-- www.edx.org
|-- proxy_server.py
`-- proxy_server_cache.py

实现基本的代理服务器

没有使用作业的代码框架,因为感觉不太好理解,自己重构了一版,代码如下:

# coding=utf-8

from socket import *
import sys

if len(sys.argv) <= 2:
	print('Usage : "python ProxyServer.py server_ip server_port"\n[server_ip : It is the IP Address Of Proxy Server')
	sys.exit(2)

# Create a server socket, bind it to a port and start listening
tcpSerSock = socket(AF_INET, SOCK_STREAM)

serverName = sys.argv[1]
serverPort = int(sys.argv[2])
tcpSerSock.bind((serverName, serverPort))
tcpSerSock.listen(1)

while 1:
	# Strat receiving data from the client
	print('Ready to serve...')
	tcpCliSock, addr = tcpSerSock.accept()
	print('Received a connection from:', addr)
	message = tcpCliSock.recv(1024).decode()# Fill in start. # Fill in end.
	if (message == ""):
		continue
	print(message)
	# Extract the filename from the given message
	print(message.split()[1])
	filename = message.split()[1].partition("/")[2]
	try:
		clientSocket = socket(AF_INET, SOCK_STREAM)
		clientSocket.connect((filename, 80))
		clientSocket.send(f"GET / HTTP/1.0\r\nHost: {filename}\r\n\r\n".encode())
		data = b""
		while True:
			try:
				msg = clientSocket.recv(1024)
				data += msg
				if (len(msg) == 0):
					break
			except:
				print("No message available!")
				break
		clientSocket.close()
		# 忽略header
		data = data.split(b"\r\n\r\n")[1]

		# response to client
		tcpCliSock.send("HTTP/1.0 200 OK\r\n".encode())
		tcpCliSock.send("Content-Type:text/html\r\n\r\n".encode())
		tcpCliSock.send(data)
	except:
		tcpCliSock.send("HTTP/1.0 404 Not Found\r\n".encode())
	tcpCliSock.close()
tcpSerSock.close()

启动服务器:

python proxy_server.py localhost 8888

在浏览器上访问如下页面:

http://localhost:8888/www.baidu.com
http://localhost:8888/www.edx.org
http://localhost:8888/123

实验结果如下:

实现带缓存的代理服务器

对前一版增加了缓存功能,具体区别在于当访问之前访问过的网站时,会直接从缓存读取数据,代码如下:

# coding=utf-8

from socket import *
import sys
import os

CACHE_NAME = "cache"

if len(sys.argv) <= 2:
	print('Usage : "python ProxyServer.py server_ip server_port"\n[server_ip : It is the IP Address Of Proxy Server')
	sys.exit(2)

# Create a server socket, bind it to a port and start listening
tcpSerSock = socket(AF_INET, SOCK_STREAM)

serverName = sys.argv[1]
serverPort = int(sys.argv[2])
tcpSerSock.bind((serverName, serverPort))
tcpSerSock.listen(1)

while 1:
	# Strat receiving data from the client
	print('Ready to serve...')
	tcpCliSock, addr = tcpSerSock.accept()
	print('Received a connection from:', addr)
	message = tcpCliSock.recv(1024).decode()# Fill in start. # Fill in end.
	if (message == ""):
		continue
	print(message)
	# Extract the filename from the given message
	print(message.split()[1])
	filename = message.split()[1].partition("/")[2]
	print(f"filename: {filename}")
	# fileExist = "false"
	filetouse = "/" + filename
	print(f"filetouse: {filetouse}")
	# 建立cache目录
	if not os.path.exists(CACHE_NAME):
		os.mkdir(CACHE_NAME)
	filePath = os.path.join(os.getcwd(), CACHE_NAME, filename)
	if os.path.exists(filePath):
		print("Read from cache")
		with open(filePath, encoding="utf-8") as f:
			data = f.read()
		# ProxyServer finds a cache hit and generates a response message
		tcpCliSock.send("HTTP/1.0 200 OK\r\n".encode())
		tcpCliSock.send("Content-Type:text/html\r\n\r\n".encode())
		tcpCliSock.send(f"{data}\r\n".encode())
		print('Read from cache')
	else:
		try:
			clientSocket = socket(AF_INET, SOCK_STREAM)
			clientSocket.connect((filename, 80))
			clientSocket.send(f"GET / HTTP/1.0\r\nHost: {filename}\r\n\r\n".encode())
			data = b""
			while True:
				try:
					msg = clientSocket.recv(1024)
					data += msg
					# print(len(msg))
					if (len(msg) == 0):
						break
				except:
					print("No message available!")
					break
			clientSocket.close()
			# 忽略header
			data = data.split(b"\r\n\r\n")[1]
			#写入cache
			with open(filePath, "wb") as f:
				f.write(data)

			# response to client
			tcpCliSock.send("HTTP/1.0 200 OK\r\n".encode())
			tcpCliSock.send("Content-Type:text/html\r\n\r\n".encode())
			tcpCliSock.send(data)
		except:
			tcpCliSock.send("HTTP/1.0 404 Not Found\r\n".encode())
		# Close the client and the server sockets
		tcpCliSock.close()
tcpSerSock.close()

启动服务器:

python proxy_server_cache.py localhost 8888

实验结果和之前一致,唯一区别是从缓存读取时终端会显示如下内容:

/www.baidu.com
filename: www.baidu.com
filetouse: /www.baidu.com
Read from cache
Read from cache