CMU 15-213 Lab5 Performance Lab
课程主页:http://www.cs.cmu.edu/afs/cs/academic/class/15213-f15/www/schedule.html
课程资料:https://github.com/EugeneLiu/translationCSAPP
课程视频:https://www.bilibili.com/video/av31289365/
这一部分回顾CSAPP的Performance Lab。
参考资料:
https://blog.csdn.net/kangyupl/article/details/107169906
备注:由于硬件不同,所以我们的结果应该和Naive baseline implementation比较,而不是代码中固定的结果,例如
Rotate: Version = naive_rotate: Naive baseline implementation:
Dim 64 128 256 512 1024 Mean
Your CPEs 1.5 2.2 4.0 8.0 7.2
Baseline CPEs 14.7 40.1 46.4 65.9 94.5
Speedup 9.8 18.1 11.7 8.2 13.1 11.7
Baseline CPEs是测试代码中固定的结果,所以baseline实际的CPE为
1.5 2.2 4.0 8.0 7.2
baseline的加速为11.7,所以我们的代码至少要比11.7好才算提供了优化。
Optimizing Rotate
尝试了多种方法,最后如下代码的结果较好,使用了两种优化
代码复用
s = (dim - 1) * dim; t = 0;
以及循环顺序调换(i,j互换)
代码如下
char rotate_v7_descr[] = "rotate_v7";
void rotate_v7(int dim, pixel *src, pixel *dst){
int i, j, s, t, a, b;
//a, b用于缓存
s = (dim - 1) * dim;
t = 0;
for (j = 0; j < dim; j++){
a = s;
b = t;
for (i = 0; i < dim; i++){
dst[s] = src[t];
s++;
t += dim;
}
s = a - dim;
t = b + 1;
}
}
结果如下
Rotate: Version = naive_rotate: Naive baseline implementation:
Dim 64 128 256 512 1024 Mean
Your CPEs 1.5 2.2 4.0 8.0 7.2
Baseline CPEs 14.7 40.1 46.4 65.9 94.5
Speedup 9.8 18.1 11.7 8.2 13.1 11.7
Rotate: Version = rotate: Current working version:
Dim 64 128 256 512 1024 Mean
Your CPEs 1.5 1.6 2.1 3.1 4.3
Baseline CPEs 14.7 40.1 46.4 65.9 94.5
Speedup 9.9 25.2 21.8 21.3 22.1 19.2
Optimizing Smooth
主要思路是减少min,max的使用次数,所以将正方形分为4个顶点,4条边以及内部3个部分,分别设置对应的avg函数,代码如下
//初始化像素
static void initialize_k(pixel_sum *sum, int k) {
sum->red = sum->green = sum->blue = 0;
sum->num = k;
return;
}
//累加
static void accumulate(pixel_sum *sum, pixel p) {
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
return;
}
//赋值
static void assign(pixel *current_pixel, pixel_sum sum) {
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
//8个平均,用于内部(dim - 1) * (dim - 1)个元素
static pixel avg_9(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_k(&sum, 9);
for(ii = i-1; ii <= i+1; ii++){
for(jj = j-1; jj <= j+1; jj++){
accumulate(&sum, src[RIDX(ii, jj, dim)]);
}
}
assign(¤t_pixel, sum);
return current_pixel;
}
//4个平均,用于4个角
static pixel avg_4(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_k(&sum, 4);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++){
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++){
accumulate(&sum, src[RIDX(ii, jj, dim)]);
}
}
assign(¤t_pixel, sum);
return current_pixel;
}
//6个平均,上下两行
static pixel avg_6_v1(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_k(&sum, 6);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++){
for(jj = j-1; jj <= j+1; jj++){
accumulate(&sum, src[RIDX(ii, jj, dim)]);
}
}
assign(¤t_pixel, sum);
return current_pixel;
}
//6个平均,左右两列
static pixel avg_6_v2(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_k(&sum, 6);
for(ii = i-1; ii <= i+1; ii++){
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++){
accumulate(&sum, src[RIDX(ii, jj, dim)]);
}
}
assign(¤t_pixel, sum);
return current_pixel;
}
char smooth_v5_descr[] = "smooth_v5";
void smooth_v5(int dim, pixel *src, pixel *dst) {
int i, j;
int x[4] = {0, 0, dim - 1, dim - 1};
int y[4] = {0, dim - 1, 0, dim - 1};
//4个角落
for (i = 0; i < 4; i++){
for (j = 0; j < 4; j++){
dst[RIDX(x[i], y[j], dim)] = avg_4(dim, x[i], y[j], src);
}
}
//第一行
i = 0;
for (j = 1; j < dim - 1; j++){
dst[RIDX(i, j, dim)] = avg_6_v1(dim, i, j, src);
}
//最后一行
i = dim - 1;
for (j = 1; j < dim - 1; j++){
dst[RIDX(i, j, dim)] = avg_6_v1(dim, i, j, src);
}
//第一列
j = 0;
for (i = 1; i < dim - 1; i++){
dst[RIDX(i, j, dim)] = avg_6_v2(dim, i, j, src);
}
//最后一列
j = dim - 1;
for (i = 1; i < dim - 1; i++){
dst[RIDX(i, j, dim)] = avg_6_v2(dim, i, j, src);
}
//内部
for (i = 1; i < dim - 1; i++){
for (j = 1; j < dim - 1; j++){
dst[RIDX(i, j, dim)] = avg_9(dim, i, j, src);
}
}
}
效果如下:
Smooth: Version = naive_smooth: Naive baseline implementation:
Dim 32 64 128 256 512 Mean
Your CPEs 38.7 39.0 39.1 39.5 40.0
Baseline CPEs 695.0 698.0 702.0 717.0 722.0
Speedup 18.0 17.9 18.0 18.2 18.1 18.0
Smooth: Version = smooth: Current working version:
Dim 32 64 128 256 512 Mean
Your CPEs 20.7 20.5 20.1 19.8 20.3
Baseline CPEs 695.0 698.0 702.0 717.0 722.0
Speedup 33.6 34.0 34.9 36.3 35.6 34.9
最终两个实验的结果
Summary of Your Best Scores:
Rotate: 19.2 (rotate: Current working version)
Smooth: 34.9 (smooth: Current working version)
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Doraemonzzz!
评论
ValineLivere