深入理解计算机系统 第3章 习题解析 Part1
这次更新第3章习题。
参考资料:
https://dreamanddead.github.io/CSAPP-3e-Solutions/chapter3/
https://cs.nyu.edu/wies/teaching/cso-fa19/class13_machine.pdf
Chapter 3
难题:3.59
3.58
汇编代码:
decode2:
subq %rdx, %rsi # y = y - z
imulq %rsi, %rdi # x = x * y
movq %rsi, %rax # tmp = y
salq $63, %rax # tmp = tmp << 63
sarq $63, %rax # tmp = tmp >> 63
xorq %rdi, %rax # tmp = tmp ^ x
ret
对于移位操作,如果tmp为偶数,那么
(tmp << 63) >> 63 = 0...0
否则,
(tmp << 63) >> 63 = 1...1
因此C代码如下:
long decode2(long x, long y, long z){
y = y - z;
x = x * y;
if (y & 1){
return ~x;
}else{
return x;
}
}
3.59
C代码:
typedef __int128 int128_t;
void store_prod(int128_t *dest, int64_t x, int64_t y) {
*dest = x * (int128_t) y;
}
首先看数学关系:
因此:
注意到如果$x < 0$,那么
如果$x\ge 0$,那么
接着看汇编代码(注意参数分别存储在%rdi, %rsi, %rdx):
store_prod:
movq %rdx, %rax # t1 = y
cqto # y, t1 = t1的128位符号拓展 (y = yh, t1 = yl)
movq %rsi, %rcx # t2 = x
sarq $63, %rcx # t2 = t2 >> 63 (if xh < 0, t2 = -1, else t2 = 0, 即t2 = xh)
imulq %rax, %rcx # t2 = (t2 * t1) mod 2^64 (t2 = xh * yl mod 2^64)
imulq %rsi, %rdx # y = (y * x) mod 2^64 (y = yh * xl mod 2^64)
addq %rdx, %rcx # t2 = t2 + y (t2 = xh * yl + yh * xl)
mulq %rsi # y, t1 = t1 * x = yl * xl (y = yl * xl / 2^64, t1 = yl * xl mod 2^64)
addq %rcx, %rdx # y = y + t2 (y = (yh * xl + xh * yl + yl * xl / 2^64) mode 2^64)
movq %rax, (%rdi) # *dest = t1 (yl * xl mode 2^64)
movq %rdx, 8(%rdi) # *(dest + 8) = y ((yh * xl + xh * yl + yl * xl / 2^64) mode 2^64)
ret
3.60
汇编代码:
long loop(long x, int n)
x in %rdi, n in %esi
loop:
movl %esi, %ecx #t1 = n
movl $1, %edx #t2 = 1
movl $0, %eax #t3 = 0
jmp .L2 #goto .L2
.L3:
movq %rdi, %r8 #t4 = x
andq %rdx, %r8 #t4 = t4 & t2
orq %r8, %rax #t3 = t3 | t4
salq %cl, %rdx #t2 = t2 << (low 8 bit of t1)
.L2:
testq %rdx, %rdx #t2
jne .L3 #if t2 != 0, then goto .L3
rep: ret #return t3
对应C代码:
long loop(long x, int n)
{
long result = 0;
long mask;
for (mask = 1; mask != 0; mask = (mask << (x & 0xFF))){
result |= (x & mask) ;
}
return result;
}
3.61
C代码:
long cread_alt(long *xp){
return ((!xp) ? 0 : *xp);
}
对应的汇编代码:
cread_alt:
movq $0, %rax #v = 0
testq %rdi, %rdi #test xp
cmovne (%rdi), %rax #if xp != NULL, v = *xp
ret
3.62
汇编代码:
p1 in %rdi, p2 in %rsi, action in %edx
.L8: MODE_E
movl $27, %eax #t1 = 27
ret
.L3: MODE_A
movq (%rsi), %rax #t1 = *p2
movq (%rdi), %rdx #action = *p1
movq %rdx, (%rsi) #*p2 = action
ret
.L5: MODE_B
movq (%rdi), %rax #t1 = *p1
addq (%rsi), %rax #t1 = t1 + *p2
movq %rax, (%rdi) #*p1 = result
ret
.L6: MODE_C
movq $59, (%rdi) #*p1 = 59
movq (%rsi), %rax #t1 = *p2
ret
.L7: MODE_D
movq (%rsi), %rax #t1 = *p2
movq %rax, (%rdi) #*p1 = t1
movl $27, %eax #t1 = 27
ret
.L7: default
movl $12, %eax #t1 = 12
ret
对应的C代码:
/* Enumerated type creates set of constants numbered 0 and upward */
typedef enum {MODE_A, MODE_B, MODE_C, MODE_D, MODE_E} mode_t;
long switch3(long *p1, long *p2, mode_t action)
{
long result = 0;
switch (action) {
case MODE_A:
result = *p2;
action = *p1;
*p2 = action;
break;
case MODE_B:
result = *p1;
result += *p2;
*p1 = result;
break;
case MODE_C:
*p1 = 59;
result = *p2;
break;
case MODE_D:
result = *p2;
*p1 = result;
result = 27;
break;
case MODE_E:
result = 27;
break;
default:
result = 12;
}
return result;
}
3.63
C代码:
long switch_prob(long x, long n ) {
long result = x;
switch(n) {
case 60:
case 62:
result = 8 * x;
break;
case 63:
result = (x >> 3);
break;
case 64:
result = x;
result <<= 4;
result -= x;
x = result;
case 65:
x *= x;
default:
result = 75 + x;
break;
}
return result;
}
gdb结果:
(gdb) x/6gx 0x4006f8
0x4006f8: 0x00000000004005a1(n=60) 0x00000000004005c3(n=61, default)
0x400708: 0x00000000004005a1(n=62) 0x00000000004005aa(n=63)
0x400718: 0x00000000004005b2(n=64) 0x00000000004005bf(n=65)
汇编代码
long switch. prob(long x, 1ong n)
x in %rdi, n in %rsi
0000000000400590 <switch_prob>:
400590: 48 83 ee 3c sub $0x3c,%rsi #n -= 60
400594: 48 83 fe 05 cmp $0x5,%rsi #tmp = n - 5
400598: 77 29 ja 4005c3 <switch_ prob+0x33> #if tmp > 0, goto 4005c3
40059a: ff 24 f5 f8 06 40 00 jmpq *0x4006f8(,%rsi,8) #goto *[0x4006f8 + 8n]
4005a1: 48 8d 04 fd 00 00 00 lea 0x0(,%rdi,8),%rax #t1 = 8 * x
4005a8: 00
4005a9: c3 retq #return
4005aa: 48 89 f8 mov %rdi,%rax #t1 = x
4005ad: 48 c1 f8 03 sar $0x3,%rax #t1 >>= 3
4005b1: c3 retq #return
4005b2: 48 89 f8 mov %rdi,%rax #t1 = x
4005b5: 48 c1 e0 04 shl $0x4,%rax #t1 <<= 4
4005b9: 48 29 f8 sub %rdi,%rax #t1 -= x
4005bc: 48 89 c7 mov %rax,%rdi #x = t1
4005bf: 48 0f af ff imul %rdi,%rdi #x *= x
4005c3: 48 8d 47 4b lea 0x4b(%rdi),%rax #t1 = 0x4b + x
4005c7: c3 retq #return
3.64
C代码:
long A[R][S][T];
long store_ele(long i, long j, long k, long *dest)
{
*dest = A[i] [j] [k];
return sizeof (A) ;
}
汇编代码:
long store_ele(long i, 1ong j, 1ong k, long *dest)
i in %rdi, j in %rsi, k in %rdx, dest in %rcx
store_ele:
leaq (%rsi,%rsi,2), %rax #t1 = 3 * j
leaq (%rsi,%rax,4), %rax #t1 = j + 4 * t1; t1 = 13j
movq %rdi, %rsi #j = i
salq $6, %rsi #j <<= 6; j = 64j = 64i
addq %rsi, %rdi #i = i + j; i = 65i
addq %rax, %rdi #i = i + t1; i = 65i + 13j
addq %rdi, %rdx #k = k + i; k = 65i + 13j + k
movq A(,%rdx,8), %rax #t1 = *(A + 8 * k)
movq %rax, (%rcx) #*dest = rax
movl $3640, %eax #t1 = 3640
ret
推广214页的等式3.1可得:
对于
D A[R][S][T]
其地址为
对于此例
因此
结合
sizeof(A) = 3640
可得
3.65
C代码:
void transpose(long A[M] [M]) {
long i, j;
for(i = 0; i < M; i++)
for(j = O; j < i; j++){
long t = A[i][j];
A[i][j] = A[j][i] ;
A[j][i] = t;
}
}
汇编代码:
#%rcx: t1, %rdx: t2, %rsi: t3, %rax: t4, %rdi: t5
.L6:
movq (%rdx), %rcx #t1 = *t2
movq (%rax), %rsi #t3 = *t4
movq %rsi, (%rdx) #*t2 = t3
movq %rcx, (%rax) #*t4 = t1
addq $8, %rdx #t2 += 8
addq $120, %rax #t4 += 120
cmpq %rdi, %rax #t4 - t5
jne .L6 #if t4 - t5 != 0, goto .L6
注意到如下更新方式
t2 += 8
这说明$\%\text{rdx}$保存着指向$\text{A[i][j]}$的指针,因此$\text{t4}(\%\text{rax})$保存着指向$\text{A[j][i]}$的指针。
最后注意如下更新方式
t4 += 120
因此
3.66
C代码:
long sum_co1(long n, long A[NR(n)][NC(n)], long j) {
long i;
long result = 0;
for (i = 0; i < NR(n); i++)
result += A[i][j];
return result;
}
汇编代码:
long sum_col(long n, long A[NR(n)][CNC(n)], long j)
n in %rdi, A in %rsi, j in %rdx
sum_col:
leaq 1(,%rdi,4), %r8 #r8 = 1 + 4 * n
leaq (%rdi,%rdi,2), %rax #rax = 3 * n
movq %rax, %rdi #rdi = rax = 3 * n
testq %rax, %rax #test rax
jle .L4 #if rax <= 0, goto L4
salq $3, %r8 #r8 = r8 << 3 = 8 * r8
leaq (%rsi,%rdx,8), %rcx #rcx = rsi + 8 * rdx = A + 8 * j
movl $0, %eax #rax = 0 (result = 0)
movl $0, %edx #rdx = 0 (i = 0)
.L3:
addq (%rcx), %rax #rax = rax + *rcx
addq $1, %rdx #rdx = rdx + 1
addq %r8, %rcx #rcx = rcx + r8 = rcx + 8 * r8
cmpq %rdi, %rdx #cmp rdx - rdi (cmp i - 3 * n)
jne .L3 #if rdx - rdi != 0
rep; ret
.L4:
movl $0, %eax #rax = 0
ret
根据注释可得
NR(n) = 3 * n
CNC(n) = 1 + 4 * n
3.67
参考资料:
https://dreamanddead.github.io/CSAPP-3e-Solutions/chapter3/3.67/
C代码:
typedef struct {
long a[2];
long* p;
}strA;
typedef struct {
long u[2];
long q;
}strB;
strB process(strA s) {
strB r;
r.u[0] = s.a[1];
r.u[1] = s.a[0];
r.q = *s.P;
return r;
}
long eval (1ong x, long y, long z){
strA s;
s.a[0] = x;
s.a[1] = y;
s.p = &z;
strB r = process(s);
return r.u[0] + r.u[1] + r.q;
)
内存中的布局:
strA:
[a[0]][a[1]][*p]
[8][8][8]
strB:
[u[0]][u[1]][q]
[8][8][8]
汇编代码:
strB process(strA s)
process:
movq %rdi, %rax #t1 = r
movq 24(%rsp), %rdx #t2 = *(rsp + 24) (s.p)
movq (%rdx), %rdx #t2 = *t2 = *(s.p)
movq 16(%rsp), %rcx #t3 = *(rsp + 16) (s.a[1])
movq %rcx, (%rdi) #r.u[0] = t3 = s.a[1]
movq 8(%rsp), %rcx #t3 = *(rsp + 8) (s.a[0])
movq %rcx, 8(%rdi) #r.u[1] = t3 = s.a[0]
movg %rdx, 16(%rdi) #r.q = t2 = *(s.p)
ret
long eval(long x,long y, long z)
x in %rdi, y in %rsi, z in %rdx
eval:
subq $104, %rsp #rsp = rsp - 104 (分配空间)
movq %rdx, 24(%rsp) #*(rsp + 24) = z
leaq 24(%rsp), %rax #t1 = rsp + 24 (t1 = &z)
movq %rdi, (%rsp) #*rsp = x (s.a[0] = x)
movq %rsi, 8(%rsp) #*(rsp + 8) = y (s.a[1] = y)
movq %rax, 16(%rsp) #*(rsp + 16) = z (s.p = &z)
leaq 64(%rsp), %rdi #rdi = *(rsp + 64) (参数一)
call process
movq 72(%rsp), %rax #t1 = *(rsp + 72) (t1 = r.u[1])
addq 64(%rsp), %rax #t1 = t1 + *(rsp + 64) (t1 = t1 + r.u[0])
addq 80(%rsp), %rax #t1 = t1 + *(rsp + 80) (t1 = t1 + r.q)
addq $104, %rsp #rsp = rsp + 104
ret
(a)
栈 | 值 |
---|---|
rsp + 104 | |
… | |
rsp + 64 | rdi |
… | |
rsp + 24 | z |
rsp + 16 | s.p = &z |
rsp + 8 | s.a[1] = y |
rsp | s.a[0] = x |
(b)
%rdi(*(rsp + 64))
(c)
通过%rsp + 8, +16, + 24
(d)
通过%rsp + 64, +72, +80
(e)
栈 | 值 |
---|---|
rsp + 104 | |
… | |
rsp + 80 | r.q |
rsp + 72 | r.u[1] |
rsp + 64 | r.u[0] |
… | |
rsp + 24 | z |
rsp + 16 | s.p = &z |
rsp + 8 | s.a[1] = y |
rsp(eval) | s.a[0] = x |
rsp - 8(rsp in process) |
(f)
结构的值是在栈中传递。
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 Doraemonzzz!
评论
ValineLivere