CMU 15-213 Lab6 Proxy Lab
课程主页:http://www.cs.cmu.edu/afs/cs/academic/class/15213-f15/www/schedule.html
课程资料:https://github.com/EugeneLiu/translationCSAPP
课程视频:https://www.bilibili.com/video/av31289365/
这一部分回顾CSAPP的Proxy Lab。
参考资料:
- https://www.jianshu.com/p/a501d0c2f131
- https://blog.csdn.net/weixin_44520881/article/details/109518057
- https://blog.csdn.net/qq_26440803/article/details/83795122
- https://home.baidu.com/home/index
- https://www.runoob.com/cprogramming/c-function-malloc.html
- https://blog.csdn.net/jazzsoldier/article/details/104258903
- https://www.cnblogs.com/shihaoyu/p/9999949.html
说明
这次lab是实现一个代理服务器,代码主体来自于:
- 11.5 Web服务器;
- 12.3 基于线程的并发编程;
- 12.5.4 共享变量中的读写问题;
运行测试方式
启动代理服务器:
cd proxylab-handout
make && ./proxy 15214
启动服务器:
cd proxylab-handout/tiny
./tiny 15213
测试代理服务器:
curl -v --proxy http://localhost:15214 http://localhost:15213/home.html
curl -v --proxy http://localhost:15214 http://localhost:15213/csapp.c
curl -v --proxy http://localhost:15214 http://localhost:15213/godzilla.jpg
完整测试:
cd proxylab-handout
make proxy && ./driver.sh
报错相关
报错1
Timeout waiting for the server to grab the port reserved for it
修改nop-server.py中
#!/usr/bin/python
为
#!/usr/bin/python3
参考资料:
https://blog.csdn.net/weixin_44520881/article/details/109518057
报错2
The futex facility returned an unexpected error code.Aborted
该报错的含义为不要再struct中包含信号量。
参考资料:
https://blog.csdn.net/jazzsoldier/article/details/104258903
Part I: Implementing a sequential web proxy
一个基本的Web服务器,代码修改自:
/csapp/Code-all/code/netp/tiny/tiny.c
代码:
#include <stdio.h>
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *http_version = "HTTP/1.0";
void doit(int fd);
void get_path(char uri[], char host[], char port[], char path[]);
void read_requesthdrs(rio_t *rp);
int main(int argc, char **argv)
{
int listenfd, connfd;
char hostname[MAXLINE], port[MAXLINE];
socklen_t clientlen;
struct sockaddr clientaddr;
// 判断
if (argc != 2) {
fprintf(stderr, "you should enter a port number greater than 1,024 and less than 65,536!\n");
exit(1);
}
int port_num = -1;
port_num = atoi(argv[1]);
if (port_num == -1) {
fprintf(stderr, "you should enter a port number greater than 1,024 and less than 65,536!\n");
exit(1);
}
listenfd = Open_listenfd(argv[1]);
while (1) {
clientlen = sizeof(clientaddr);
// 返回已连接描述符
connfd = Accept(listenfd, &clientaddr, &clientlen);
// 将套接字地址结构sa转换成对应的主机和服务名字符串,并将它们复制到host和servcice缓冲区;
Getnameinfo(&clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
printf("Accepted connection from (%s, %s)\n", hostname, port);
doit(connfd);
Close(connfd);
}
return 0;
}
void doit(int fd) {
char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
char port[MAXLINE];
char host[MAXLINE];
char path[MAXLINE];
rio_t rio;
// 初始化
Rio_readinitb(&rio, fd);
// 无输入
if (!Rio_readlineb(&rio, buf, MAXLINE)) {
return;
}
// GET http://www.cmu.edu/hub/index.html HTTP/1.1
sscanf(buf, "%s %s %s", method, uri, version);
// 忽略GET以外的方法
if (strcasecmp(method, "GET")) {
return;
}
// 获得path
get_path(uri, host, port, path);
printf("%s %s %s %s %s\n", method, host, port, path, version);
// 连接服务器
rio_t rio_output;
int clientfd;
clientfd = Open_clientfd(host, port);
// 将缓冲区和描述符联系起来
Rio_readinitb(&rio_output, clientfd);
// 修改Header
// GET /hub/index.html HTTP/1.0
sprintf(buf, "%s %s %s\r\n", method, path, http_version);
Rio_writen(clientfd, buf, strlen(buf));
// other
int n = 0;
// Host: www.cmu.edu
sprintf(buf, "Host: %s:%s\r\n", host, port);
Rio_writen(clientfd, buf, strlen(buf));
// User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3
sprintf(buf, "%s\r\n", user_agent_hdr);
Rio_writen(clientfd, buf, strlen(buf));
// Connection: close
sprintf(buf, "Connection: close\r\n");
Rio_writen(clientfd, buf, strlen(buf));
// Proxy-Connection: close
sprintf(buf, "Proxy-Connection: close\r\n");
Rio_writen(clientfd, buf, strlen(buf));
// 返回给客户端
// 不要使用scanf或rio_readlineb来读二进制文件,像scanf或rio_readlineb这样的函数是专门设计来读取文本文件的
while ((n = Rio_readnb(&rio_output, buf, MAXLINE))) {
// 不能使用Rio_writen(fd, buf, strlen(buf)), 因为不一定是字符串
Rio_writen(fd, buf, n);
}
Close(clientfd);
return;
}
void read_requesthdrs(rio_t *rp) {
char buf[MAXLINE];
Rio_readlineb(rp, buf, MAXLINE);
printf("%s", buf);
while (strcmp(buf, "\r\n")) {
Rio_readlineb(rp, buf, MAXLINE);
printf("%s", buf);
}
return;
}
// 解析path
void get_path(char uri[], char host[], char port[], char path[]) {
// 斜杠计数
int slash_cnt = 0;
// 冒号计数
int colon_cnt = 0;
// int flag = 0;
int n = strlen(uri);
int j = 0;
int k = 0;
int l = 0;
for (int i = 0; i < n; i++){
if (uri[i] == '/') {
slash_cnt++;
}
if (uri[i] == ':') {
colon_cnt++;
continue;
}
if ((colon_cnt == 2) && (slash_cnt < 3)) {
port[k++] = uri[i];
}
if ((colon_cnt < 2) && (slash_cnt == 2) && (uri[i] != '/')) {
host[j++] = uri[i];
}
if (slash_cnt >= 3){
path[l++] = uri[i];
}
}
host[j] = '\0';
port[k] = '\0';
path[l] = '\0';
return;
}
测试结果:
make proxy && ./driver.sh
totalScore: 40/70
Part II: Dealing with multiple concurrent requests
一个基本的并发Web服务器,代码修改自:
/csapp/Code-all/code/conc/echoservert.c
代码:
#include <stdio.h>
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *http_version = "HTTP/1.0";
void doit(int fd);
void get_path(char uri[], char host[], char port[], char path[]);
void read_requesthdrs(rio_t *rp);
void *thread(void *vargp);
int main(int argc, char **argv)
{
int listenfd, *connfdp;
char hostname[MAXLINE], port[MAXLINE];
socklen_t clientlen;
struct sockaddr clientaddr;
pthread_t tid;
// 判断
if (argc != 2) {
fprintf(stderr, "you should enter a port number greater than 1,024 and less than 65,536!\n");
exit(1);
}
int port_num = -1;
port_num = atoi(argv[1]);
if (port_num == -1) {
fprintf(stderr, "you should enter a port number greater than 1,024 and less than 65,536!\n");
exit(1);
}
listenfd = Open_listenfd(argv[1]);
while (1) {
clientlen = sizeof(clientaddr);
// 防止竞争
connfdp = Malloc(sizeof(int));
// 返回已连接描述符
*connfdp = Accept(listenfd, &clientaddr, &clientlen);
// 将套接字地址结构sa转换成对应的主机和服务名字符串,并将它们复制到host和servcice缓冲区;
Getnameinfo(&clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
printf("Accepted connection from (%s, %s)\n", hostname, port);
Pthread_create(&tid, NULL, thread, connfdp);
}
return 0;
}
void *thread(void *vargp) {
int connfd = *((int *)vargp);
Pthread_detach(pthread_self());
Free(vargp);
doit(connfd);
Close(connfd);
return NULL;
}
void doit(int fd) {
char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
char port[MAXLINE];
char host[MAXLINE];
char path[MAXLINE];
rio_t rio;
// 初始化
Rio_readinitb(&rio, fd);
// 无输入
if (!Rio_readlineb(&rio, buf, MAXLINE)) {
return;
}
// GET http://www.cmu.edu/hub/index.html HTTP/1.1
sscanf(buf, "%s %s %s", method, uri, version);
// 忽略GET以外的方法
if (strcasecmp(method, "GET")) {
return;
}
// 获得path
get_path(uri, host, port, path);
printf("%s %s %s %s %s\n", method, host, port, path, version);
// 连接服务器
rio_t rio_output;
int clientfd;
clientfd = Open_clientfd(host, port);
// 将缓冲区和描述符联系起来
Rio_readinitb(&rio_output, clientfd);
// 修改Header
// GET /hub/index.html HTTP/1.0
sprintf(buf, "%s %s %s\r\n", method, path, http_version);
Rio_writen(clientfd, buf, strlen(buf));
// other
int n = 0;
// Host: www.cmu.edu
sprintf(buf, "Host: %s:%s\r\n", host, port);
Rio_writen(clientfd, buf, strlen(buf));
// User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3
sprintf(buf, "%s\r\n", user_agent_hdr);
Rio_writen(clientfd, buf, strlen(buf));
// Connection: close
sprintf(buf, "Connection: close\r\n");
Rio_writen(clientfd, buf, strlen(buf));
// Proxy-Connection: close
sprintf(buf, "Proxy-Connection: close\r\n");
Rio_writen(clientfd, buf, strlen(buf));
// 返回给客户端
// 不要使用scanf或rio_readlineb来读二进制文件,像scanf或rio_readlineb这样的函数是专门设计来读取文本文件的
while ((n = Rio_readnb(&rio_output, buf, MAXLINE))) {
// 不能使用Rio_writen(fd, buf, strlen(buf)), 因为不一定是字符串
Rio_writen(fd, buf, n);
}
Close(clientfd);
return;
}
void read_requesthdrs(rio_t *rp) {
char buf[MAXLINE];
Rio_readlineb(rp, buf, MAXLINE);
printf("%s", buf);
while (strcmp(buf, "\r\n")) {
Rio_readlineb(rp, buf, MAXLINE);
printf("%s", buf);
}
return;
}
// 解析path
void get_path(char uri[], char host[], char port[], char path[]) {
// 斜杠计数
int slash_cnt = 0;
// 冒号计数
int colon_cnt = 0;
// int flag = 0;
int n = strlen(uri);
int j = 0;
int k = 0;
int l = 0;
for (int i = 0; i < n; i++){
if (uri[i] == '/') {
slash_cnt++;
}
if (uri[i] == ':') {
colon_cnt++;
continue;
}
if ((colon_cnt == 2) && (slash_cnt < 3)) {
port[k++] = uri[i];
}
if ((colon_cnt < 2) && (slash_cnt == 2) && (uri[i] != '/')) {
host[j++] = uri[i];
}
if (slash_cnt >= 3){
path[l++] = uri[i];
}
}
host[j] = '\0';
port[k] = '\0';
path[l] = '\0';
return;
}
测试结果:
make proxy && ./driver.sh
totalScore: 55/70
Part III: Caching web objects
说明:
- 读写部分的代码参考了12.5.4;缓存部分使用双向链表实现,该链表的首尾部分都增加了哨兵,方便处理;
- 当收到请求时,从头开始检查缓存中是否存在相同内容,
- 如果存在相同内容,则直接返回,将对应的节点移动到链表头部;
- 否则进行缓存;
- 如果容量足够,则直接在头部插入节点;
- 否则从头部开始释放节点,直至容量足够,此时插入节点;
代理服务器部分代码:
#include <stdio.h>
#include "csapp.h"
#include "cache.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *http_version = "HTTP/1.0";
static const char *default_port = "80";
Cache* cache;
void doit(int fd);
void get_path(char uri[], char host[], char port[], char path[]);
void read_requesthdrs(rio_t *rp);
void *thread(void *vargp);
void sig_handler(int sig) {
return;
}
int main(int argc, char **argv)
{
int listenfd, *connfdp;
char hostname[MAXLINE], port[MAXLINE];
socklen_t clientlen;
struct sockaddr clientaddr;
pthread_t tid;
// 判断
if (argc != 2) {
fprintf(stderr, "you should enter a port number greater than 1,024 and less than 65,536!\n");
exit(1);
}
int port_num = -1;
port_num = atoi(argv[1]);
if (port_num == -1) {
fprintf(stderr, "you should enter a port number greater than 1,024 and less than 65,536!\n");
exit(1);
}
cache = (Cache *)malloc(sizeof(Cache));
// 捕获SIGPIPE, do nothing, 8.5.3
signal(SIGPIPE, SIG_IGN);
// 初始化cache
init_cache(cache, MAX_CACHE_SIZE, MAX_OBJECT_SIZE);
listenfd = Open_listenfd(argv[1]);
while (1) {
clientlen = sizeof(clientaddr);
// 防止竞争
connfdp = Malloc(sizeof(int));
// 返回已连接描述符
*connfdp = Accept(listenfd, &clientaddr, &clientlen);
// 将套接字地址结构sa转换成对应的主机和服务名字符串,并将它们复制到host和servcice缓冲区;
Getnameinfo(&clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
printf("Accepted connection from (%s, %s)\n", hostname, port);
Pthread_create(&tid, NULL, thread, connfdp);
}
free_cache(cache);
return 0;
}
void *thread(void *vargp) {
int connfd = *((int *)vargp);
Pthread_detach(pthread_self());
Free(vargp);
doit(connfd);
Close(connfd);
return NULL;
}
void doit(int fd) {
char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
// 默认为80
char port[MAXLINE];
char host[MAXLINE];
char path[MAXLINE];
rio_t rio;
// 初始化
Rio_readinitb(&rio, fd);
// 无输入
if (!Rio_readlineb(&rio, buf, MAXLINE)) {
return;
}
// GET http://www.cmu.edu/hub/index.html HTTP/1.1
sscanf(buf, "%s %s %s", method, uri, version);
// 忽略GET以外的方法
if (strcasecmp(method, "GET")) {
return;
} else {
printf("Not implement!\n");
}
if (reader(cache, uri, fd) == 1) {
printf("read from cache!\n");
}
// 获得path
get_path(uri, host, port, path);
printf("%s %s %s %s %s\n", method, host, port, path, version);
// 连接服务器
rio_t rio_output;
int clientfd;
char output[MAXLINE];
clientfd = Open_clientfd(host, port);
// 将缓冲区和描述符联系起来
Rio_readinitb(&rio_output, clientfd);
// 修改Header
// GET /hub/index.html HTTP/1.0
sprintf(buf, "%s %s %s\r\n", method, path, http_version);
Rio_writen(clientfd, buf, strlen(buf));
// other
int n = 0;
// Host: www.cmu.edu
sprintf(buf, "Host: %s:%s\r\n", host, port);
Rio_writen(clientfd, buf, strlen(buf));
// User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3
sprintf(buf, "%s\r\n", user_agent_hdr);
Rio_writen(clientfd, buf, strlen(buf));
// Connection: close
sprintf(buf, "Connection: close\r\n");
Rio_writen(clientfd, buf, strlen(buf));
// Proxy-Connection: close
sprintf(buf, "Proxy-Connection: close\r\n");
Rio_writen(clientfd, buf, strlen(buf));
// 返回给客户端
// 存储数据的大小
int size = 0;
char data[MAX_OBJECT_SIZE];
// 不要使用scanf或rio_readlineb来读二进制文件,像scanf或rio_readlineb这样的函数是专门设计来读取文本文件的
while ((n = Rio_readnb(&rio_output, buf, MAXLINE))) {
// ignor ECONNRESET
if (errno == ECONNRESET) {
continue;
}
// 将结果保存在data
if (size <= MAX_OBJECT_SIZE) {
memcpy(data + size, buf, n);
size += n;
}
// 不能使用Rio_writen(fd, buf, strlen(buf)), 因为不一定是字符串
Rio_writen(fd, buf, n);
}
// 插入缓存
insert_cache(cache, uri, data);
Close(clientfd);
return;
}
void read_requesthdrs(rio_t *rp) {
char buf[MAXLINE];
Rio_readlineb(rp, buf, MAXLINE);
printf("%s", buf);
while (strcmp(buf, "\r\n")) {
Rio_readlineb(rp, buf, MAXLINE);
printf("%s", buf);
}
return;
}
// 解析path
void get_path(char uri[], char host[], char port[], char path[]) {
// 斜杠计数
int slash_cnt = 0;
// 冒号计数
int colon_cnt = 0;
// int flag = 0;
int n = strlen(uri);
int j = 0;
int k = 0;
int l = 0;
for (int i = 0; i < n; i++){
if (uri[i] == '/') {
slash_cnt++;
}
if (uri[i] == ':') {
colon_cnt++;
continue;
}
if ((colon_cnt == 2) && (slash_cnt < 3)) {
port[k++] = uri[i];
}
if ((colon_cnt < 2) && (slash_cnt == 2) && (uri[i] != '/')) {
host[j++] = uri[i];
}
if (slash_cnt >= 3){
path[l++] = uri[i];
}
}
host[j] = '\0';
// 默认端口
if (k == 0) {
strcpy(port, default_port);
} else {
port[k] = '\0';
}
path[l] = '\0';
return;
}
缓存部分代码:
cache.h:
#ifndef __CACHE__
#define __CACHE__
#include <stdio.h>
#include "csapp.h"
// #include <csapp.h>
// #define MAX_URL_LEN 512;
typedef struct Node {
char *url;
char *data;
struct Node *prev;
struct Node *next;
int size;
} Node;
// ptr->p0->p1
typedef struct Cache {
int max_cache_size;
int max_object_size;
int used_size;
struct Node *head;
struct Node *tail;
} Cache;
// Node
void init_node(Node *node);
void init_node_with_data(Node *node, char *url, char *data, int n);
void free_node(Node *node);
void link_node(Node *node1, Node *node2);
void change_link(Node *node);
void insert_node(Node *node1, Node *node2);
void print_node(Node *node);
// Cache
void init_cache(Cache *cache, int max_cache_size, int max_object_size);
Node* find_cache(Cache *cache, char *url, Node *ptr);
void free_cache_block(Cache *cache);
void free_cache(Cache *cache);
int insert_cache(Cache *cache, char *url, char *data);
void print_cache(Cache *cache);
int reader(Cache *cache, char *url, int fd);
void writer(Cache *cache, char *url, char *data);
#endif
cache.c:
#include "cache.h"
static int readcnt;
static sem_t mutex, w;
// Node
void init_node(Node *node) {
node->url = NULL;
node->data = NULL;
node->prev = NULL;
node->next = NULL;
node->size = 0;
}
void init_node_with_data(Node *node, char *url, char *data, int n) {
// url
int m = strlen(url);
node->url = (char *)malloc(m);
strcpy(node->url, url);
// data
node->data = (char *)malloc(n);
strcpy(node->data, data);
node->prev = NULL;
node->next = NULL;
node->size = n;
}
void free_node(Node *node) {
free(node->url);
free(node->data);
free(node);
}
// 两者都不在list中
void link_node(Node *node1, Node *node2) {
node1->next = node2;
node2->prev = node1;
}
void change_link(Node *node) {
link_node(node->prev, node->next);
}
// 插入node2到node1后面, node1在list中
void insert_node(Node *node1, Node *node2) {
link_node(node2, node1->next);
link_node(node1, node2);
}
void print_node(Node *node) {
printf("url is %s, data is %s\n", node->url, node->data);
}
// Cache
void init_cache(Cache *cache, int max_cache_size, int max_object_size) {
cache->max_cache_size = max_cache_size;
cache->max_object_size = max_object_size;
cache->used_size = 0;
cache->head = NULL;
cache->tail = NULL;
readcnt = 0;
Sem_init(&mutex, 0, 1);
Sem_init(&w, 0, 1);
cache->head = (Node*)malloc(sizeof(Node));
cache->tail = (Node*)malloc(sizeof(Node));
init_node(cache->head);
init_node(cache->tail);
link_node(cache->head, cache->tail);
}
Node* find_cache(Cache *cache, char *url, Node *ptr) {
ptr = NULL;
for (ptr = cache->head->next; ptr != cache->tail; ptr = ptr->next) {
printf("%s %s\n", ptr->url, url);
if (strcmp(ptr->url, url) == 0) {
printf("write\n");
// 移动到第一个位置
insert_node(cache->head, ptr);
return ptr;
}
}
return ptr;
}
void free_cache_block(Cache *cache) {
Node *ptr = cache->tail->prev;
if (ptr != cache->head && ptr) {
link_node(ptr->prev, ptr->next);
cache->used_size -= ptr->size;
free_node(ptr);
}
}
void free_cache(Cache *cache) {
Node *ptr = cache->head;
while (ptr) {
Node *next = ptr->next;
free_node(ptr);
ptr = next;
}
}
int insert_cache(Cache *cache, char *url, char *data) {
int n = strlen(data);
if (n > cache->max_object_size) {
printf("This file exceeds the maximum file size!\n");
return -1;
}
while (n + cache->used_size > cache->max_cache_size) {
free_cache_block(cache);
}
cache->used_size += n;
Node *node = (Node *)malloc(sizeof(Node));
init_node_with_data(node, url, data, n);
insert_node(cache->head, node);
return 0;
}
void print_cache(Cache *cache) {
Node *ptr = NULL;
int i = 0;
for (ptr = cache->head->next; ptr != cache->tail && ptr; ptr = ptr->next) {
print_node(ptr);
i++;
}
}
int reader(Cache *cache, char *url, int fd) {
P(&mutex);
readcnt++;
if (readcnt == 1) {
P(&w);
}
V(&mutex);
Node *ptr = NULL;
ptr = find_cache(cache, url, ptr);
if (ptr != NULL) {
rio_writen(fd, ptr->data, ptr->size);
return 1;
}
P(&mutex);
readcnt--;
if (readcnt == 0) {
V(&w);
}
V(&mutex);
return -1;
}
void writer(Cache *cache, char *url, char *data) {
P(&w);
printf("write to cache!\n");
insert_cache(cache, url, data);
V(&w);
}
测试结果:
make proxy && ./driver.sh
totalScore: 70/70
综合测试
启动代理服务器,然后设置电脑的代理服务器,访问:
https://home.baidu.com/
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Doraemonzzz!
评论
ValineLivere