这一部分将带来Course1理论习题的实现。

思考题1

现在有$n$个元素且大小不相同的无序数组,$n$为$2$的幂,给出一个最多用$n+\log_2n-2$次比较的算法来找出第二大的元素。

算法伪代码:传送门

import numpy as np

def Find_second_largest(array):
    n = len(array)
    #构造Hash,值为和键比较过且小于键的元素
    mp = dict()
    for i in range(n):
        mp[i] = []
    #索引集合
    index = range(n)
    #比较次数
    Cnt = 0
    
    while (len(index) != 1):
        new_index, cnt = Compare(array, index, mp)
        index = new_index[:]
        Cnt += cnt
        
    #和最大值比较过的元素
    index1 = mp[index[0]]
    array1 = [array[i] for i in index1]
    m = len(index1)

    #找到array1中的最大值
    res = array1[0]
    for i in range(1, m):
        Cnt += 1
        if (array1[i] > res):
            res = array1[i]
    
    return res, Cnt

#进行一轮比较
def Compare(array, index, mp):
    n = len(index)
    #记录下一轮的索引
    new_index = []
    for i in range(n // 2):
        i1 = index[2 * i]
        i2 = index[2 * i + 1]
        x = array[i1]
        y = array[i2]
        if (x > y):
            new_index.append(i1)
            mp[i1].append(i2)
        else:
            new_index.append(i2)
            mp[i2].append(i1)

    #判断奇偶性
    if n % 2 == 1:
        new_index.append(index[-1])
    
    return new_index, n // 2

#测试
factor = [2, 5, 10, 15]
for n in factor:
    N = 2 ** n
    array = np.random.rand(N)
    #理论比较次数
    m = N + np.log2(N) - 2
    #计算结果
    res, cnt = Find_second_largest(array)
    #计算第二大的元素
    ans = np.sort(array)[-2]
    #验证
    print(res == ans, cnt == m)

思考题2

给定一个由$n$个不同元素组成的单峰数组,这意味着它的元素按递增顺序排列直到其最大元素,之后其元素按递减顺序排列。给出一个算法在$O(\log n)$时间内找到最大元素。

算法伪代码:传送门

import numpy as np

def Generate_data(max_n=1000):
    n = np.random.randint(1, max_n)
    m = np.random.randint(1, max_n)

    d1 = np.random.randn(n)
    d2 = np.random.randn(m)
    
    d1 = np.sort(d1)
    d2 = np.sort(d2)[::-1]
    data = np.append(d1, d2)
    
    return data

def Find_max(array):
    n = len(array)
    if (n <= 3):
        return np.max(array)

    n1 = n // 3
    n2 = 2 * n1
    
    #端点元素
    d1 = array[0]
    d2 = array[n1]
    d3 = array[n2]
    
    if ((d1 <= d2) and (d2 <= d3)):
        return Find_max(array[n1:])
    else:
        return Find_max(array[:n2])

#测试
for i in range(10):
    array = Generate_data()
    res = Find_max(array)
    print(res == np.max(array))

思考题3

现在有一个从最小到最大排好序的$n$个不同整数的数组$A$,它们可以是正数,负数或零。现在要判断是否存在索引$i$,使得$A [i] = i$。 设计最快的算法来解决这个问题。

算法伪代码:传送门

import numpy as np

def Generate_data(m):
    N = int(1e6)
    data = np.arange(N)
    array = np.random.choice(data, size=m, replace=False)
    
    return np.sort(array)

#判断函数
def Judge_(array, l, r):
    if (l > r):
        return False
    n = (l + r) // 2
    if (array[n] == n):
        return True
    elif (array[n] > n):
        r = n - 1
        return Judge_(array, l, r)
    else:
        l = n + 1
        return Judge_(array, l, r)

def Judge(array):
    n = array.shape[0]
    
    return Judge_(array, 0, n - 1)
    
#蛮力搜索
def Judge_BF(array):
    n = len(array)
    for i in range(n):
        if (array[i] == i):
            return True
    return False

#测试
M = range(1, 5)
for m in M:
    n = 10 ** m
    array = Generate_data(n)
    r1 = Judge(array)
    r2 = Judge_BF(array)
    
    print(r1 == r2)

思考题4

给你一个$n\times n$个不同数字的网格。 如果数字小于其所有邻居,则该数字是局部最小值。 (一个数字的邻居是上方,下方,左侧或右侧的一个。大多数数字有四个邻居;侧面的数字有三个;四个角有两个。)使用分而治之的算法计算局部最小值,仅对数字对进行$O(n)$次比较。(注意:因为有$n ^ 2$个元素,所以不能查看全部元素。 提示:考虑哪种类型的递归会给你所需的上限。)

算法伪代码:传送门

import numpy as np

#判断是否越界
def Judge(i, n):
    return (i >= 0) and (i < n) 

def Judge_node(i, j, n):
    return Judge(i, n) and Judge(j, n)

#判断是否为局部最小
def Judge_local_minimum(data, i, j):
    n = data.shape[0]
    dx = [0, 0, 1, -1]
    dy = [1, -1, 0, 0]
    d = data[i][j]
    for s in range(4):
        i1 = i + dx[s]
        j1 = j + dy[s]
        if (Judge_node(i1, j1, n) and (data[i1][j1] < d)):
            return False
    
    return True

def Find_local_minimum_(data, x1, y1, x2, y2):
    '''
    搜索区域的左上角为(x1, y1),右下角为(x2, y2)
    '''
    if ((x1 == x2) and (y1 == y2)):
        return x1, y1
    
    x0 = (x1 + x2) // 2
    y0 = (y1 + y2) // 2
    #记录当前最小值
    local_minimum = data[x1][y1]
    #记录索引
    i = x1
    j = y1
    #找到最小值
    x = [x0, x1, x2]
    y = [y0, y1, y2]
    for s in x:
        for t in range(y1, y2 + 1):
            if (data[s][t] < local_minimum):
                i = s
                j = t
                local_minimum = data[s][t]
    for t in y:
        for s in range(x1, x2 + 1):
            if (data[s][t] < local_minimum):
                i = s
                j = t
                local_minimum = data[s][t]
    #判断是否为局部最优
    dx = [0, 0, 1, -1]
    dy = [1, -1, 0, 0]
    i0 = i
    j0 = j
    for s in range(4):
        i1 = i + dx[s]
        j1 = j + dy[s]
        if (Judge_node(i1, j1, n) and (data[i1][j1] < data[i0][j0])):
            i0 = i1
            j0 = j1
    
    #如果为局部最优则直接返回
    if ((i == i0) and (j == j0)):
        return i, j

    #中心点
    if ((i == x0) and (j == y0)):
        return i, j
    elif ((i == x1) or (i == x2) or (j == y1) or (j == y2)):#边界区域
        #确定属于哪个区域
        u = int(i <= x0)
        v = int(j <= y0)
        x3 = x[u]
        y3 = y[v]
        if u:
            x4 = x[u - 1]
        else:
            x4 = x[u + 1]
        if v:
            y4 = y[v - 1]
        else:
            y4 = y[v + 1]
        
        return Find_local_minimum_(data, x3, y3, x4, y4)
    else:
        #找到局部最小值
        dx = [0, 0, 1, -1]
        dy = [1, -1, 0, 0]
        i0 = i
        j0 = j
        for s in range(4):
            i1 = i + dx[s]
            j1 = j + dy[s]
            if (Judge_node(i1, j1, n) and (data[i1][j1] < data[i0][j0])):
                i0 = i1
                j0 = j1
        #如果为局部最小值则直接返回
        if ((i0 == i) and (j0 == j)):
            return i, j
        else:
            #确定属于哪个区域
            u = int(i <= x0)
            v = int(j <= y0)
            x3 = x[u]
            y3 = y[v]
            if u:
                x4 = x[u - 1]
            else:
                x4 = x[u + 1]
            if v:
                y4 = y[v - 1]
            else:
                y4 = y[v + 1]
            
            return Find_local_minimum_(data, x3, y3, x4, y4)
        
def Find_local_minimum(data):
    n = data.shape[0]
    i, j = Find_local_minimum_(data, 0, 0, n - 1, n - 1)
    print(Judge_local_minimum(data, i, j))

for i in range(1, 5):
    n = 10 ** i
    data = np.random.rand(n, n)
    Find_local_minimum(data)

思考题5

Given an array of $n$ distinct (but unsorted) elements $x_1,x_2,\ldots,x_n$ with positive weights $w_1,w_2,\ldots,w_n$ such that $\sum_{i=1}^n w_i = W$, a weighted median is an element $x_k$ for which the total weight of all elements with value less than $x_k$(i.e., $\sum_{x_i \lt x_k} w_i$) is at most $W/2$, and also the total weight of elements with value larger than $x_k$ (i.e., $\sum_{x_i \gt x_k} w_i$) is at most $W/2$. Observe that there are at most two weighted medians. Show how to compute all weighted medians in $O(n)$ worst-case time.

算法伪代码:传送门(第三题)

import numpy as np

def Generate_data(m):
    N = int(1e6)
    data = np.arange(N)
    array1 = np.random.choice(data, size=m, replace=False)
    array2 = np.random.choice(data, size=m, replace=False)
    
    return np.c_[array1, array2]

def Exchange(A, i, j):
    #交换元素的值
    p = np.array(A[i])
    A[i] = np.array(A[j])
    A[j] = p

#1维Partion
def Partion1(A, l, r, p):
    #找到p的位置
    index = l
    for i in range(l, r + 1):
        if (A[i] == p):
            index = i
            break
    #交换位置
    Exchange(A, index, l)
    i = l + 1
    for j in range(l + 1, r + 1):
        if (A[j] < p):
            Exchange(A, i, j)
            i += 1
    Exchange(A, l, i - 1)
    
    return i - 1

#2维Partion 
def Partion2(A, l, r, p):
    #找到p的位置
    index = l
    for i in range(l, r + 1):
        if (A[i][0] == p):
            index = i
            break
    #交换位置
    Exchange(A, index, l)
    i = l + 1
    for j in range(l + 1, r + 1):
        #按第一个维度partion
        if (A[j][0] < p):
            Exchange(A, i, j)
            i += 1
    Exchange(A, l, i - 1)
    
    return i - 1

#找到A[l,...,r]中小于p的最大数
def Find_max(A, l, r, p):
    index = l
    for i in range(l, r + 1):
        if (A[i][0] > A[index][0]):
            index = i
            
    return A[index][0]

#找到A[l,...,r]中大于p的最小数
def Find_min(A, l, r, p):
    index = l
    for i in range(l, r + 1):
        if (A[i][0] < A[index][0]):
            index = i
            
    return A[index][0]

#判断是否满足条件
def Judge(A, p, W):
    W1 = 0
    W2 = 0
    for i in A:
        if (i[0] < p):
            W1 += i[1]
        elif (i[0] > p):
            W2 += i[1]
    return (W1 <= W / 2) and (W2 <= W / 2)
    

def Deterministic_Selection(A, i):
    n = len(A)
    if (n == 1):
        return A[0]
    C = []
    for s in range(0, n, 5):
        t = min(n, s + 5)
        A[s: t].sort()
        #找到中间元素
        index = min(s + 2, n - 1)
        C.append(A[index])
    p = Deterministic_Selection(C, len(C) // 2)
    #返回位置
    j = Partion1(A, 0, n - 1, p)
    #次序统计量
    k = j + 1
    if (i == k):
        return p
    elif (k > i):
        return Deterministic_Selection(A[: j], i)
    else:
        return Deterministic_Selection(A[j + 1:], i - k)

def Weighted_median(A, B, W, Hash, res):
    '''
    A为原始序列(2维),B为当前序列(1维), Hash记录是否访问过该位置的元素
    '''
    if (len(B) == 1):
        return B[0]
    #找到中位数
    p = Deterministic_Selection(B, len(B) // 2)
    n = len(A)
    #返回位置
    j = Partion2(A, 0, n - 1, p)
    #W1
    W1 = np.sum(A[: j, 1])
    #W2
    W2 = np.sum(A[j + 1:, 1])
    if (Hash[j] == 1):
        p1 = Find_max(A, 0, j - 1, p)
        if (Judge(A, p1, W) and (Hash[j - 1] == 0)):
            Hash[j - 1] = 1
            res.append(p1)
        p2 = Find_min(A, j + 1, n - 1, p)
        if (Judge(A, p2, W) and (Hash[j + 1] == 0)):
            Hash[j + 1] = 1
            res.append(p2)
        return
    else:
        Hash[j] = 1
    if ((W1 <= W / 2) and (W2 <= W / 2)):
        res.append(A[j][0])
    elif (W1 > W / 2):
        B = [i for i in B if i <= p]
        Weighted_median(A, B, W, Hash, res)
    else:
        B = [i for i in B if i >= p]
        Weighted_median(A, B, W, Hash, res)

#蛮力法计算Weighted_median
def Weighted_median_BF(A, W):
    res = []
    for i in A[:, 0]:
        index1 = (A[:, 0] < i)
        index2 = (A[:, 0] > i)
        W1 = np.sum(A[index1][:, 1])
        W2 = np.sum(A[index2][:, 1])
        
        if (W1 <= W / 2) and (W2 <= W / 2):
            res.append(i)
    
    return res

#判断列表是否相同
def JudgeList(a, b):
    n = len(a)
    m = len(b)
    if (n != m):
        return False
    else:
        for i in range(n):
            if (a[i] != b[i]):
                return False
        return True

tmp = []
N = 100
for k in range(N):
    for i in range(1, 4):
        #生成数据
        d = 10 ** i
        data = np.random.uniform(size=[d, 2])
        #计算权重
        W = np.sum(data[:, 1])
        #Hash表
        Hash = np.zeros(d)
        #计算结果
        p1 = []
        Weighted_median(data, np.array(data[:, 0]), W, Hash, p1)
        p2 = Weighted_median_BF(data, W)
        #保存比较结果
        tmp.append(JudgeList(p1, p2))

print(np.sum(tmp) == N * 3)