CMU 15-213 Lab5 Performance Lab

课程主页:http://www.cs.cmu.edu/afs/cs/academic/class/15213-f15/www/schedule.html

课程资料:https://github.com/EugeneLiu/translationCSAPP

课程视频:https://www.bilibili.com/video/av31289365/

这一部分回顾CSAPP的Performance Lab。

参考资料:

https://blog.csdn.net/kangyupl/article/details/107169906

备注:由于硬件不同,所以我们的结果应该和Naive baseline implementation比较,而不是代码中固定的结果,例如

1
2
3
4
5
Rotate: Version = naive_rotate: Naive baseline implementation:
Dim 64 128 256 512 1024 Mean
Your CPEs 1.5 2.2 4.0 8.0 7.2
Baseline CPEs 14.7 40.1 46.4 65.9 94.5
Speedup 9.8 18.1 11.7 8.2 13.1 11.7

Baseline CPEs是测试代码中固定的结果,所以baseline实际的CPE为

1
1.5	2.2	4.0	8.0	7.2

baseline的加速为11.7,所以我们的代码至少要比11.7好才算提供了优化。

Optimizing Rotate

尝试了多种方法,最后如下代码的结果较好,使用了两种优化

  1. 代码复用

    1
    2
    s = (dim - 1) * dim;
    t = 0;
  2. 以及循环顺序调换(i,j互换)

代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
char rotate_v7_descr[] = "rotate_v7";
void rotate_v7(int dim, pixel *src, pixel *dst){
int i, j, s, t, a, b;
//a, b用于缓存
s = (dim - 1) * dim;
t = 0;
for (j = 0; j < dim; j++){
a = s;
b = t;
for (i = 0; i < dim; i++){
dst[s] = src[t];
s++;
t += dim;
}
s = a - dim;
t = b + 1;
}
}

结果如下

1
2
3
4
5
6
7
8
9
10
11
Rotate: Version = naive_rotate: Naive baseline implementation:
Dim 64 128 256 512 1024 Mean
Your CPEs 1.5 2.2 4.0 8.0 7.2
Baseline CPEs 14.7 40.1 46.4 65.9 94.5
Speedup 9.8 18.1 11.7 8.2 13.1 11.7

Rotate: Version = rotate: Current working version:
Dim 64 128 256 512 1024 Mean
Your CPEs 1.5 1.6 2.1 3.1 4.3
Baseline CPEs 14.7 40.1 46.4 65.9 94.5
Speedup 9.9 25.2 21.8 21.3 22.1 19.2

Optimizing Smooth

主要思路是减少min,max的使用次数,所以将正方形分为4个顶点,4条边以及内部3个部分,分别设置对应的avg函数,代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
//初始化像素
static void initialize_k(pixel_sum *sum, int k) {
sum->red = sum->green = sum->blue = 0;
sum->num = k;
return;
}

//累加
static void accumulate(pixel_sum *sum, pixel p) {
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
return;
}

//赋值
static void assign(pixel *current_pixel, pixel_sum sum) {
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}

//8个平均,用于内部(dim - 1) * (dim - 1)个元素
static pixel avg_9(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_k(&sum, 9);

for(ii = i-1; ii <= i+1; ii++){
for(jj = j-1; jj <= j+1; jj++){
accumulate(&sum, src[RIDX(ii, jj, dim)]);
}
}

assign(&current_pixel, sum);
return current_pixel;
}

//4个平均,用于4个角
static pixel avg_4(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_k(&sum, 4);

for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++){
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++){
accumulate(&sum, src[RIDX(ii, jj, dim)]);
}
}

assign(&current_pixel, sum);
return current_pixel;
}

//6个平均,上下两行
static pixel avg_6_v1(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_k(&sum, 6);

for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++){
for(jj = j-1; jj <= j+1; jj++){
accumulate(&sum, src[RIDX(ii, jj, dim)]);
}
}

assign(&current_pixel, sum);
return current_pixel;
}

//6个平均,左右两列
static pixel avg_6_v2(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_k(&sum, 6);

for(ii = i-1; ii <= i+1; ii++){
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++){
accumulate(&sum, src[RIDX(ii, jj, dim)]);
}
}

assign(&current_pixel, sum);
return current_pixel;
}

char smooth_v5_descr[] = "smooth_v5";
void smooth_v5(int dim, pixel *src, pixel *dst) {
int i, j;

int x[4] = {0, 0, dim - 1, dim - 1};
int y[4] = {0, dim - 1, 0, dim - 1};
//4个角落
for (i = 0; i < 4; i++){
for (j = 0; j < 4; j++){
dst[RIDX(x[i], y[j], dim)] = avg_4(dim, x[i], y[j], src);
}
}

//第一行
i = 0;
for (j = 1; j < dim - 1; j++){
dst[RIDX(i, j, dim)] = avg_6_v1(dim, i, j, src);
}

//最后一行
i = dim - 1;
for (j = 1; j < dim - 1; j++){
dst[RIDX(i, j, dim)] = avg_6_v1(dim, i, j, src);
}


//第一列
j = 0;
for (i = 1; i < dim - 1; i++){
dst[RIDX(i, j, dim)] = avg_6_v2(dim, i, j, src);
}

//最后一列
j = dim - 1;
for (i = 1; i < dim - 1; i++){
dst[RIDX(i, j, dim)] = avg_6_v2(dim, i, j, src);
}

//内部
for (i = 1; i < dim - 1; i++){
for (j = 1; j < dim - 1; j++){
dst[RIDX(i, j, dim)] = avg_9(dim, i, j, src);
}
}
}

效果如下:

1
2
3
4
5
6
7
8
9
10
11
Smooth: Version = naive_smooth: Naive baseline implementation:
Dim 32 64 128 256 512 Mean
Your CPEs 38.7 39.0 39.1 39.5 40.0
Baseline CPEs 695.0 698.0 702.0 717.0 722.0
Speedup 18.0 17.9 18.0 18.2 18.1 18.0

Smooth: Version = smooth: Current working version:
Dim 32 64 128 256 512 Mean
Your CPEs 20.7 20.5 20.1 19.8 20.3
Baseline CPEs 695.0 698.0 702.0 717.0 722.0
Speedup 33.6 34.0 34.9 36.3 35.6 34.9

最终两个实验的结果

1
2
3
Summary of Your Best Scores:
Rotate: 19.2 (rotate: Current working version)
Smooth: 34.9 (smooth: Current working version)

本文标题:CMU 15-213 Lab5 Performance Lab

文章作者:Doraemonzzz

发布时间:2020年07月22日 - 21:20:00

最后更新:2020年07月22日 - 21:03:30

原始链接:http://doraemonzzz.com/2020/07/22/CMU 15-213 Lab5 Performance Lab/

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。