斯坦福算法专项课程Course1理论习题算法实现
这一部分将带来Course1理论习题的实现。
思考题1
现在有$n$个元素且大小不相同的无序数组,$n$为$2$的幂,给出一个最多用$n+\log_2n-2$次比较的算法来找出第二大的元素。
算法伪代码:传送门
import numpy as np
def Find_second_largest(array):
n = len(array)
#构造Hash,值为和键比较过且小于键的元素
mp = dict()
for i in range(n):
mp[i] = []
#索引集合
index = range(n)
#比较次数
Cnt = 0
while (len(index) != 1):
new_index, cnt = Compare(array, index, mp)
index = new_index[:]
Cnt += cnt
#和最大值比较过的元素
index1 = mp[index[0]]
array1 = [array[i] for i in index1]
m = len(index1)
#找到array1中的最大值
res = array1[0]
for i in range(1, m):
Cnt += 1
if (array1[i] > res):
res = array1[i]
return res, Cnt
#进行一轮比较
def Compare(array, index, mp):
n = len(index)
#记录下一轮的索引
new_index = []
for i in range(n // 2):
i1 = index[2 * i]
i2 = index[2 * i + 1]
x = array[i1]
y = array[i2]
if (x > y):
new_index.append(i1)
mp[i1].append(i2)
else:
new_index.append(i2)
mp[i2].append(i1)
#判断奇偶性
if n % 2 == 1:
new_index.append(index[-1])
return new_index, n // 2
#测试
factor = [2, 5, 10, 15]
for n in factor:
N = 2 ** n
array = np.random.rand(N)
#理论比较次数
m = N + np.log2(N) - 2
#计算结果
res, cnt = Find_second_largest(array)
#计算第二大的元素
ans = np.sort(array)[-2]
#验证
print(res == ans, cnt == m)
思考题2
给定一个由$n$个不同元素组成的单峰数组,这意味着它的元素按递增顺序排列直到其最大元素,之后其元素按递减顺序排列。给出一个算法在$O(\log n)$时间内找到最大元素。
算法伪代码:传送门
import numpy as np
def Generate_data(max_n=1000):
n = np.random.randint(1, max_n)
m = np.random.randint(1, max_n)
d1 = np.random.randn(n)
d2 = np.random.randn(m)
d1 = np.sort(d1)
d2 = np.sort(d2)[::-1]
data = np.append(d1, d2)
return data
def Find_max(array):
n = len(array)
if (n <= 3):
return np.max(array)
n1 = n // 3
n2 = 2 * n1
#端点元素
d1 = array[0]
d2 = array[n1]
d3 = array[n2]
if ((d1 <= d2) and (d2 <= d3)):
return Find_max(array[n1:])
else:
return Find_max(array[:n2])
#测试
for i in range(10):
array = Generate_data()
res = Find_max(array)
print(res == np.max(array))
思考题3
现在有一个从最小到最大排好序的$n$个不同整数的数组$A$,它们可以是正数,负数或零。现在要判断是否存在索引$i$,使得$A [i] = i$。 设计最快的算法来解决这个问题。
算法伪代码:传送门
import numpy as np
def Generate_data(m):
N = int(1e6)
data = np.arange(N)
array = np.random.choice(data, size=m, replace=False)
return np.sort(array)
#判断函数
def Judge_(array, l, r):
if (l > r):
return False
n = (l + r) // 2
if (array[n] == n):
return True
elif (array[n] > n):
r = n - 1
return Judge_(array, l, r)
else:
l = n + 1
return Judge_(array, l, r)
def Judge(array):
n = array.shape[0]
return Judge_(array, 0, n - 1)
#蛮力搜索
def Judge_BF(array):
n = len(array)
for i in range(n):
if (array[i] == i):
return True
return False
#测试
M = range(1, 5)
for m in M:
n = 10 ** m
array = Generate_data(n)
r1 = Judge(array)
r2 = Judge_BF(array)
print(r1 == r2)
思考题4
给你一个$n\times n$个不同数字的网格。 如果数字小于其所有邻居,则该数字是局部最小值。 (一个数字的邻居是上方,下方,左侧或右侧的一个。大多数数字有四个邻居;侧面的数字有三个;四个角有两个。)使用分而治之的算法计算局部最小值,仅对数字对进行$O(n)$次比较。(注意:因为有$n ^ 2$个元素,所以不能查看全部元素。 提示:考虑哪种类型的递归会给你所需的上限。)
算法伪代码:传送门
import numpy as np
#判断是否越界
def Judge(i, n):
return (i >= 0) and (i < n)
def Judge_node(i, j, n):
return Judge(i, n) and Judge(j, n)
#判断是否为局部最小
def Judge_local_minimum(data, i, j):
n = data.shape[0]
dx = [0, 0, 1, -1]
dy = [1, -1, 0, 0]
d = data[i][j]
for s in range(4):
i1 = i + dx[s]
j1 = j + dy[s]
if (Judge_node(i1, j1, n) and (data[i1][j1] < d)):
return False
return True
def Find_local_minimum_(data, x1, y1, x2, y2):
'''
搜索区域的左上角为(x1, y1),右下角为(x2, y2)
'''
if ((x1 == x2) and (y1 == y2)):
return x1, y1
x0 = (x1 + x2) // 2
y0 = (y1 + y2) // 2
#记录当前最小值
local_minimum = data[x1][y1]
#记录索引
i = x1
j = y1
#找到最小值
x = [x0, x1, x2]
y = [y0, y1, y2]
for s in x:
for t in range(y1, y2 + 1):
if (data[s][t] < local_minimum):
i = s
j = t
local_minimum = data[s][t]
for t in y:
for s in range(x1, x2 + 1):
if (data[s][t] < local_minimum):
i = s
j = t
local_minimum = data[s][t]
#判断是否为局部最优
dx = [0, 0, 1, -1]
dy = [1, -1, 0, 0]
i0 = i
j0 = j
for s in range(4):
i1 = i + dx[s]
j1 = j + dy[s]
if (Judge_node(i1, j1, n) and (data[i1][j1] < data[i0][j0])):
i0 = i1
j0 = j1
#如果为局部最优则直接返回
if ((i == i0) and (j == j0)):
return i, j
#中心点
if ((i == x0) and (j == y0)):
return i, j
elif ((i == x1) or (i == x2) or (j == y1) or (j == y2)):#边界区域
#确定属于哪个区域
u = int(i <= x0)
v = int(j <= y0)
x3 = x[u]
y3 = y[v]
if u:
x4 = x[u - 1]
else:
x4 = x[u + 1]
if v:
y4 = y[v - 1]
else:
y4 = y[v + 1]
return Find_local_minimum_(data, x3, y3, x4, y4)
else:
#找到局部最小值
dx = [0, 0, 1, -1]
dy = [1, -1, 0, 0]
i0 = i
j0 = j
for s in range(4):
i1 = i + dx[s]
j1 = j + dy[s]
if (Judge_node(i1, j1, n) and (data[i1][j1] < data[i0][j0])):
i0 = i1
j0 = j1
#如果为局部最小值则直接返回
if ((i0 == i) and (j0 == j)):
return i, j
else:
#确定属于哪个区域
u = int(i <= x0)
v = int(j <= y0)
x3 = x[u]
y3 = y[v]
if u:
x4 = x[u - 1]
else:
x4 = x[u + 1]
if v:
y4 = y[v - 1]
else:
y4 = y[v + 1]
return Find_local_minimum_(data, x3, y3, x4, y4)
def Find_local_minimum(data):
n = data.shape[0]
i, j = Find_local_minimum_(data, 0, 0, n - 1, n - 1)
print(Judge_local_minimum(data, i, j))
for i in range(1, 5):
n = 10 ** i
data = np.random.rand(n, n)
Find_local_minimum(data)
思考题5
Given an array of $n$ distinct (but unsorted) elements $x_1,x_2,\ldots,x_n$ with positive weights $w_1,w_2,\ldots,w_n$ such that $\sum_{i=1}^n w_i = W$, a weighted median is an element $x_k$ for which the total weight of all elements with value less than $x_k$(i.e., $\sum_{x_i \lt x_k} w_i$) is at most $W/2$, and also the total weight of elements with value larger than $x_k$ (i.e., $\sum_{x_i \gt x_k} w_i$) is at most $W/2$. Observe that there are at most two weighted medians. Show how to compute all weighted medians in $O(n)$ worst-case time.
算法伪代码:传送门(第三题)
import numpy as np
def Generate_data(m):
N = int(1e6)
data = np.arange(N)
array1 = np.random.choice(data, size=m, replace=False)
array2 = np.random.choice(data, size=m, replace=False)
return np.c_[array1, array2]
def Exchange(A, i, j):
#交换元素的值
p = np.array(A[i])
A[i] = np.array(A[j])
A[j] = p
#1维Partion
def Partion1(A, l, r, p):
#找到p的位置
index = l
for i in range(l, r + 1):
if (A[i] == p):
index = i
break
#交换位置
Exchange(A, index, l)
i = l + 1
for j in range(l + 1, r + 1):
if (A[j] < p):
Exchange(A, i, j)
i += 1
Exchange(A, l, i - 1)
return i - 1
#2维Partion
def Partion2(A, l, r, p):
#找到p的位置
index = l
for i in range(l, r + 1):
if (A[i][0] == p):
index = i
break
#交换位置
Exchange(A, index, l)
i = l + 1
for j in range(l + 1, r + 1):
#按第一个维度partion
if (A[j][0] < p):
Exchange(A, i, j)
i += 1
Exchange(A, l, i - 1)
return i - 1
#找到A[l,...,r]中小于p的最大数
def Find_max(A, l, r, p):
index = l
for i in range(l, r + 1):
if (A[i][0] > A[index][0]):
index = i
return A[index][0]
#找到A[l,...,r]中大于p的最小数
def Find_min(A, l, r, p):
index = l
for i in range(l, r + 1):
if (A[i][0] < A[index][0]):
index = i
return A[index][0]
#判断是否满足条件
def Judge(A, p, W):
W1 = 0
W2 = 0
for i in A:
if (i[0] < p):
W1 += i[1]
elif (i[0] > p):
W2 += i[1]
return (W1 <= W / 2) and (W2 <= W / 2)
def Deterministic_Selection(A, i):
n = len(A)
if (n == 1):
return A[0]
C = []
for s in range(0, n, 5):
t = min(n, s + 5)
A[s: t].sort()
#找到中间元素
index = min(s + 2, n - 1)
C.append(A[index])
p = Deterministic_Selection(C, len(C) // 2)
#返回位置
j = Partion1(A, 0, n - 1, p)
#次序统计量
k = j + 1
if (i == k):
return p
elif (k > i):
return Deterministic_Selection(A[: j], i)
else:
return Deterministic_Selection(A[j + 1:], i - k)
def Weighted_median(A, B, W, Hash, res):
'''
A为原始序列(2维),B为当前序列(1维), Hash记录是否访问过该位置的元素
'''
if (len(B) == 1):
return B[0]
#找到中位数
p = Deterministic_Selection(B, len(B) // 2)
n = len(A)
#返回位置
j = Partion2(A, 0, n - 1, p)
#W1
W1 = np.sum(A[: j, 1])
#W2
W2 = np.sum(A[j + 1:, 1])
if (Hash[j] == 1):
p1 = Find_max(A, 0, j - 1, p)
if (Judge(A, p1, W) and (Hash[j - 1] == 0)):
Hash[j - 1] = 1
res.append(p1)
p2 = Find_min(A, j + 1, n - 1, p)
if (Judge(A, p2, W) and (Hash[j + 1] == 0)):
Hash[j + 1] = 1
res.append(p2)
return
else:
Hash[j] = 1
if ((W1 <= W / 2) and (W2 <= W / 2)):
res.append(A[j][0])
elif (W1 > W / 2):
B = [i for i in B if i <= p]
Weighted_median(A, B, W, Hash, res)
else:
B = [i for i in B if i >= p]
Weighted_median(A, B, W, Hash, res)
#蛮力法计算Weighted_median
def Weighted_median_BF(A, W):
res = []
for i in A[:, 0]:
index1 = (A[:, 0] < i)
index2 = (A[:, 0] > i)
W1 = np.sum(A[index1][:, 1])
W2 = np.sum(A[index2][:, 1])
if (W1 <= W / 2) and (W2 <= W / 2):
res.append(i)
return res
#判断列表是否相同
def JudgeList(a, b):
n = len(a)
m = len(b)
if (n != m):
return False
else:
for i in range(n):
if (a[i] != b[i]):
return False
return True
tmp = []
N = 100
for k in range(N):
for i in range(1, 4):
#生成数据
d = 10 ** i
data = np.random.uniform(size=[d, 2])
#计算权重
W = np.sum(data[:, 1])
#Hash表
Hash = np.zeros(d)
#计算结果
p1 = []
Weighted_median(data, np.array(data[:, 0]), W, Hash, p1)
p2 = Weighted_median_BF(data, W)
#保存比较结果
tmp.append(JudgeList(p1, p2))
print(np.sum(tmp) == N * 3)