继续更新习题。

参考资料:

https://dreamanddead.gitbooks.io/csapp-3e-solutions/content/

Chapter 2

91

A.

浮点数三个部分:

0 10000000 10010010000111111011011

所以二进制表示为

B.

C.

所以从小数点第9位后即不相等。

92

#include <stdio.h>
#include <limits.h>
#include <math.h>

typedef unsigned float_bits;

/* Compute -f. If f is NaN, then return f. */
float_bits float_negate(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;
    
    if (exp == 0xFF && frac != 0){
        //NaN
    }else{
        sign = 1 - sign;
        
    }
    return (sign << 31) | (exp << 23) | frac;
}

float u2f(float_bits f){
    return *(float *) &f;
}

float_bits f2u(float f){
    return *(float_bits *) &f;
}

float_bits float_negate_(float f){
    if (!isnan(f)){
        f = -f;
    }

    return f2u(f);
}

void printInfo(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;

    printf("sign: %x, exp: %x, frac: %x\n", sign, exp, frac);

    return;
}

void test(){
    float_bits fMax = -1;
    float_bits f = 0;
    
    while (1){
        float_bits f1 = float_negate(f);
        float_bits f2 = float_negate_(u2f(f));
        if (f1 != f2){
            printf("Wrong answer for %x, %f!\n", f, u2f(f));
            printInfo(f);
            printf("Answer1 is %x, %f, answer2 is %x, %f\n", f1, u2f(f1), f2, u2f(f2));
            printInfo(f1);
            printInfo(f2);
            return;
        }
        f++;
        if (f == fMax){
            break;
        }
    }

    printf("Pass test!\n");
}

int main(){
    test();

    return 0;
}

93

#include <stdio.h>
#include <limits.h>
#include <math.h>

typedef unsigned float_bits;

float_bits float_absval(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;
    
    if (exp == 0xFF && frac != 0){
        //NaN
    }else{
        //注意+-0的处理
        if (sign == 1){
            sign = 0;
        }
    }

    return (sign << 31) | (exp << 23) | frac;
}

float u2f(float_bits f){
    return *(float *) &f;
}

float_bits f2u(float f){
    return *(float_bits *) &f;
}

float_bits float_absval_(float f){
    if (!isnan(f)){
        f = fabs(f);
    }

    return f2u(f);
}

void printInfo(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;

    printf("sign: %x, exp: %x, frac: %x\n", sign, exp, frac);

    return;
}

void test(){
    float_bits fMax = -1;
    float_bits f = 0;
    
    while (1){
        float_bits f1 = float_absval(f);
        float_bits f2 = float_absval_(u2f(f));
        if (f1 != f2){
            printf("Wrong answer for %x, %f!\n", f, u2f(f));
            printInfo(f);
            printf("Answer1 is %x, %f, answer2 is %x, %f\n", f1, u2f(f1), f2, u2f(f2));
            printInfo(f1);
            printInfo(f2);
            return;
        }
        f++;
        if (f == fMax){
            break;
        }
    }

    printf("Pass test!\n");
}

int main(){
    test();
    
    return 0;
}

94

对于$\exp=0$的数,

如果$f_{n-1}=0$,那么

如果$f_{n-1}=1$,那么

所以无法使用$\exp=0$的情形表示该数,此时用规格化的情形表示,取

那么

注意这两部分对应的操作都为

frac <<= 1;

代码:

#include <stdio.h>
#include <limits.h>
#include <math.h>

typedef unsigned float_bits;

float_bits float_twice(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;
    
    if (exp == 0xFF){
        //正无穷, NaN
    }else if (exp == 0){
        //非规格化的
        frac <<= 1;
    }else if (exp == 0xFF - 1){
        //成2之后变成非规格化的数
        exp++;
        //设置frac为0
        frac = 0;
    }else{
        //乘2之后依然为规格化的数
        exp++;
    }

    return (sign << 31) | (exp << 23) | frac;
}

float u2f(float_bits f){
    return *(float *) &f;
}

float_bits f2u(float f){
    return *(float_bits *) &f;
}

float_bits float_twice_(float f){
    if ((!isnan(f)) && (!isinf(f))){
        f *= 2;
    }

    return f2u(f);
}

void printInfo(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;

    printf("sign: %x, exp: %x, frac: %x\n", sign, exp, frac);

    return;
}

void test(){
    float_bits fMax = -1;
    float_bits f = 0;
    
    while (1){
        float_bits f1 = float_twice(f);
        float_bits f2 = float_twice_(u2f(f));
        if (f1 != f2){
            printf("Wrong answer for %x, %f!\n", f, u2f(f));
            printInfo(f);
            printf("Answer1 is %x, %f, answer2 is %x, %f\n", f1, u2f(f1), f2, u2f(f2));
            printInfo(f1);
            printInfo(f2);
            return;
        }
        f++;
        if (f == fMax){
            break;
        }
    }

    printf("Pass test!\n");
}

int main(){
    test();
    return 0;
}

95

两个需要注意的地方:下面分别陈述。

首先是exp=1的情形,此时乘以$0.5$化为非规格化的情形,frac的解释由1.f变成了0.f,对应的代码为:

exp--;
frac = (1 << 22) | (frac >> 1);

其次是exp=0的情形,此时要考虑向偶数舍入,舍入方式如下:

//向偶数舍入
//00 -> 0
//01 -> 0
//10 -> 1
//11 -> 10

完整代码如下:

#include <stdio.h>
#include <limits.h>
#include <math.h>

typedef unsigned float_bits;

/* Compute 0.5*f. If f is NaN, then return f. */
float_bits float_half(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;
    unsigned last = frac & 0x3;
    
    if (exp == 0xFF){
        //正无穷, NaN
    }else if (exp <= 1){
        if (exp == 1){
            exp--;
            frac = (1 << 22) | (frac >> 1);
        }else{
            frac >>= 1;
        }
        //非规格化的数向偶数舍入
        //00 -> 0
        //01 -> 0
        //10 -> 1
        //11 -> 10
        if (last == 0x3){
            frac++;
        }
    }else{
        //规格化的数
        exp--;
    }

    return (sign << 31) | (exp << 23) | frac;
}

float u2f(float_bits f){
    return *(float *) &f;
}

float_bits f2u(float f){
    return *(float_bits *) &f;
}

float_bits float_half_(float f){
    if ((!isnan(f)) && (!isinf(f))){
        f *= 0.5;
    }

    return f2u(f);
}

void printInfo(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;

    printf("sign: %x, exp: %x, frac: %x\n", sign, exp, frac);

    return;
}

void test(){
    float_bits fMax = -1;
    float_bits f = 0;

    while (1){
        float_bits f1 = float_half(f);
        float_bits f2 = float_half_(u2f(f));
        if (f1 != f2){
            printf("Wrong answer for %x, %f!\n", f, u2f(f));
            printInfo(f);
            printf("Answer1 is %x, %f, answer2 is %x, %f\n", f1, u2f(f1), f2, u2f(f2));
            printInfo(f1);
            printInfo(f2);
            return;
        }
        f++;
        if (f == fMax){
            break;
        }
    }

    printf("Pass test!\n");
}

int main(){
    test();
    
    return 0;
}

96

#include <stdio.h>
#include <limits.h>
#include <math.h>

typedef unsigned float_bits;

/*
 * Compute (int) f.
 * If conversion causes overflow or f is NaN,return 0x80000000
 */
int float_f2i(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;
    
    if (exp == 0xFF){
        //正无穷, NaN
        return 0x80000000;
    }else if (exp == 0){
        //绝对值小于1
        return 0;
    }else{
        //规格化的
        int E = exp - 127;
        if (E >= 32){
            //绝对值大于等于2 ^ 32, 超过int范围
            return 0x80000000;
        }else if (E < 0){
            //绝对值小于1
            return 0;
        }else if (E == 0){
            //绝对值大于1, 小于2的数
            int res = 1;
            if (sign){
                res *= -1;
            }

            return res;
        }else if (E == 31){
            //指数部分为2 ^ 31, 绝对值大于等于2 ^ 31
            if ((frac == 0) && (sign)){
                //INT_MIN
                return 1 << 31;
            }else{
                //绝对值小于2 ^ 31, 超过范围
                return 0x80000000;
            }
        }else{
            //0 < E < 30
            //先计算|i|
            //化成31位int形式
            //(1fn-1fn-2...f0 0...0)共31位
            int res = (1 << 30) | (frac << 7);
            //右移的位数
            int k = 30 - E;
            res >>= k;
            //考虑符号
            if (sign){
                res *= (-1);
            }

            return res;
        }
    }
}

float u2f(float_bits f){
    return *(float *) &f;
}

float_bits f2u(float f){
    return *(float_bits *) &f;
}

int float_f2i_(float f){

    return (int) f;
}

void test(){
    float_bits fMax = -1;
    float_bits f = 0;
    
    while (1){
        float_bits f1 = float_f2i(f);
        float_bits f2 = float_f2i_(u2f(f));
        if (f1 != f2){
            printf("Wrong answer for %x!\n", f);
            printf("Answer1 is %x, answer2 is %x\n", f1, f2);
            return;
        }
        f++;
        if (f == fMax){
            break;
        }
    }

    printf("Pass test!\n");
}

int main(){
    test();
    
    return 0;
}

97

超过frac表示的范围时存在舍入问题,具体如下。

假设我们考虑的int为正数,记为$d$,最高有效位为$d_{k-1}=1$,那么

如果$k\ge 25$,那么该数无法用如下形式的数表示(最多表示24位有效数字)

将$d$表示为类似上述情形

直接右移得到

注意要向偶数舍入,所以如果

那么结果为

如果

那么结果也为

其余情形的结果都为$d’$。

另一个问题是舍入之后可能带来的进位问题。

如果舍入之后的结果为

那么此时就要进位。

完整代码如下:

#include <stdio.h>
#include <limits.h>
#include <math.h>

typedef unsigned float_bits;

int leftmost_index(int i){
    int k = 0;
    while (i != 0){
        i >>= 1;
        k++;
    }

    return k;
}

/* Compute (float) i */
float_bits float_i2f(int i){
    unsigned sign;
    unsigned exp;
    unsigned frac;

    if (i == INT_MIN){
        sign = 1;
        exp = 31 + 127;
        frac = 0;
    }else{
        //符号位
        if (i < 0){
            i = -i;
            sign = 1;
        }else{
            sign = 0;
        }
        //0
        if (i == 0){
            exp = 0;
            frac = 0;
        }else{
            //exp
            //找到最高位, 位置从1开始
            //dkdk-1...d1
            //1dk-1...d1
            //1.dk-1...d1 * 2 ^ k
            int k = leftmost_index(i);
            exp = k - 1 + 127;

            if (k <= 24){
                //没有超过frac表示的范围
                //消除最高位
                frac = (1 << (k - 1)) ^ i;
                //转换成标准的frac形式
                frac <<= (24 - k);
            }else{
                //超过frac表示的范围
                //首先转换成23位, 加上首位共24位
                frac = i >> (k - 24);
                //取末k - 24位, 由此判断进位
                int mask = (1 << (k - 24)) - 1;
                int last = i & mask;
                //阈值
                int threshold = 1 << (k - 25);
                //舍入
                if (last == threshold){
                    if (frac & 1){
                        frac++;
                    }
                }else if (last > threshold){
                    frac++;
                }
                //找到最高有效位
                int k = leftmost_index(frac);
                //考虑进位的情形
                if (k == 25){
                    exp++;
                }
                //消除最高有效位
                frac = (1 << (k - 1)) ^ frac;
            }
        }
    }

    return (sign << 31) | (exp << 23) | frac;
}

float u2f(float_bits f){
    return *(float *) &f;
}

float_bits f2u(float f){
    return *(float_bits *) &f;
}

float_bits float_i2f_(int i){

    return f2u((float) i);
}

void printInfo(float_bits f){
    unsigned sign = (f >> 31);
    unsigned exp = (f >> 23) & 0xFF;
    unsigned frac = f & 0x7FFFFF;

    printf("sign: %x, exp: %x, frac: %x\n", sign, exp, frac);
    printf("sign: %d, exp: %d, frac: %d\n", sign, exp, frac);

    return;
}

void test(){
    int f = INT_MIN;
    while (1){
        float_bits f1 = float_i2f(f);
        float_bits f2 = float_i2f_(f);
        if (f1 != f2){
            printf("Wrong answer for %x, %d!\n", f, f);
            printf("Answer1 is %x, %f, answer2 is %x, %f\n", f1, u2f(f1), f2, u2f(f2));
            printInfo(f1);
            printInfo(f2);
            return;
        }
        f++;
        if (f == INT_MAX){
            break;
        }
    }

    printf("Pass test!\n");
}

int main(){
    printf("%d\n", INT_MIN);
    test();

    return 0;
}