深入理解计算机系统 第5章 习题解析
这次更新第5章习题。
参考资料:
https://dreamanddead.github.io/CSAPP-3e-Solutions/chapter5/
5.13
基本参数:
汇编代码
.L5
vmovsd 0(%rbp,%rcx,8), %xmm1
vmulsd (%rax,%rcx,8), %xmm1, %xmm1
vaddsd %xmm1, %xmm0, %xmm0
addq $1, %rcx
cmpq %rbx, %rcx
jne .L15
(a)
关键路径的分析:
原图:
简化后:
关键路径:
因此关键路径为加法。
(b)
此时数据类型为浮点数,所以下界为浮点数加法周期3.0。
(c)
此时数据类型为整数,所以下界为浮点数加法周期1.0。
(d)
因为关键路径上的操作为加法。
5.14
代码:
#include <stdio.h>
/*Inner product. Accumulate in temporary */
void inner5(vec_ptr u, vec_ptr v, data_t *dest){
long i;
long length = vec_length(u);
data_t *udata = get_vec_start(u);
data_t *vdata = get_vec_start(v);
data_t sum = (data_t) 0;
for (i = 0; i + 6 < length; i += 6){
sum = ((((((sum + udata[i] * vdata[i]) + udata[i + 1] * vdata[i + 1]) +
udata[i + 2] * vdata[i + 2]) + udata[i + 3] * vdata[i + 3]) +
udata[i + 4] * vdata[i + 4]) + udata[i + 5] * vdata[i + 5]);
}
for (; i < length; i++){
sum = sum + udata[i] * vdata[i];
}
*dest = sum;
}
int main(){
return 0;
}
后续讨论参考课本368。
(a)
(b)
5.15
代码:
#include <stdio.h>
/*Inner product. Accumulate in temporary */
void inner6(vec_ptr u, vec_ptr v, data_t *dest){
long i;
long length = vec_length(u);
data_t *udata = get_vec_start(u);
data_t *vdata = get_vec_start(v);
data_t sum1 = (data_t) 0;
data_t sum2 = (data_t) 0;
data_t sum3 = (data_t) 0;
data_t sum4 = (data_t) 0;
data_t sum5 = (data_t) 0;
data_t sum6 = (data_t) 0;
for (i = 0; i + 6 < length; i += 6){
sum1 = sum1 + udata[i] * vdata[i];
sum2 = sum2 + udata[i + 1] * vdata[i + 1];
sum3 = sum3 + udata[i + 2] * vdata[i + 2];
sum4 = sum4 + udata[i + 3] * vdata[i + 3];
sum5 = sum5 + udata[i + 4] * vdata[i + 4];;
sum6 = sum6 + udata[i + 5] * vdata[i + 5];;
}
for (; i < length; i++){
sum3 = sum3 + udata[i] * vdata[i];
}
*dest = sum1 + sum2 + sum3 + sum4 + sum5 + sum6;
}
int main(){
return 0;
}
制约因素是容量,因为计算地址时需要整数加法单元。
5.16
代码如下:
#include <stdio.h>
/*Inner product. Accumulate in temporary */
void inner5(vec_ptr u, vec_ptr v, data_t *dest){
long i;
long length = vec_length(u);
data_t *udata = get_vec_start(u);
data_t *vdata = get_vec_start(v);
data_t sum = (data_t) 0;
for (i = 0; i + 6 < length; i += 6){
sum = sum + (udata[i] * vdata[i] + udata[i + 1] * vdata[i + 1] +
udata[i + 2] * vdata[i + 2] + udata[i + 3] * vdata[i + 3] +
udata[i + 4] * vdata[i + 4] + udata[i + 5] * vdata[i + 5]);
}
for (; i < length; i++){
sum = sum + udata[i] * vdata[i];
}
*dest = sum;
}
int main(){
return 0;
}
5.17
#include <stdio.h>
void *memset1(void *s, int c, size_t n){
size_t cnt = 0;
unsigned char *schar = s;
unsigned long slong = 0;
unsigned char *addr = &slong;
int k = sizeof(unsigned long);
// 填充k个
for (int i = 0; i < k; i++){
*addr++ = c;
}
// 循环
while (cnt + k < n) {
*schar = slong;
cnt += k;
schar += k;
}
// 处理剩余部分
while (cnt < n){
*schar++ = (unsigned char) c;
cnt++;
}
return s;
}
int main(){
return 0;
}
5.18
#include <stdio.h>
double poly(double a[], double x, long degree){
long i;
double result = a[0];
double xpwr = x; /* Equals x^i at start of loop */
for (i = 1; i <= degree; i++) {
result += a[i] * xpwr;
xpwr = x * xpwr;
}
return result;
}
// 循环展开
double poly1(double a[], double x, long degree){
long i;
double result = a[0];
double xpwr = x; /* Equals x^i at start of loop */
double xpwr1 = xpwr * x;
for (i = 1; i <= degree; i += 2) {
result = result + a[i] * xpwr + a[i + 1] * xpwr1;
xpwr = xpwr1;
xpwr1 = x * xpwr;
}
for (i = 1; i <= degree; i++) {
result += a[i] * xpwr;
xpwr = x * xpwr;
}
return result;
}
// 并行累积
double poly2(double a[], double x, long degree){
long i;
double result0 = 0;
double result1 = 0;
double xpwr0 = x; /* Equals x^i at start of loop */
double y = x * x;
double xpwr1 = y;
for (i = 1; i + 1 <= degree; i += 2) {
result0 = result0 + a[i] * xpwr0;
result1 = result1 + a[i + 1] * xpwr1;
xpwr0 = xpwr0 * y;
xpwr1 = xpwr1 * y;
}
for (; i <= degree; i++) {
result0 = result0 + a[i] * xpwr0;
xpwr0 *= x;
}
return result0 + result1;
}
// 重新结合
double poly3(double a[], double x, long degree){
long i;
double result = a[0];
double xpwr = x; /* Equals x^i at start of loop */
double xpwr1 = xpwr * x;
for (i = 1; i <= degree; i += 2) {
result = result + (a[i] * xpwr + a[i + 1] * xpwr1);
xpwr = xpwr1;
xpwr1 = x * xpwr;
}
for (i = 1; i <= degree; i++) {
result += a[i] * xpwr;
xpwr = x * xpwr;
}
return result;
}
int main(){
return 0;
}
5.19
#include <stdio.h>
void psum1a(float a[], float p[], long n){
long i;
float last_val, val;
last_val = 0;
for (i = 0; i + 4 < n; i++){
val = last_val + a[i];
p[i] = val;
val = val + a[i + 1];
p[i + 1] = val;
val = val + a[i + 2];
p[i + 2] = val;
val = val + a[i + 3];
p[i + 3] = val;
// update
last_val = val;
}
for (; i < n; i++){
val = last_val + a[i];
p[i] = val;
last_val = val;
}
}
int main(){
return 0;
}
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Doraemonzzz!
评论
ValineLivere