斯坦福算法专项课程Course1理论习题算法实现

这一部分将带来Course1理论习题的实现。

思考题1

现在有$n$个元素且大小不相同的无序数组,$n$为$2$的幂,给出一个最多用$n+\log_2n-2$次比较的算法来找出第二大的元素。

算法伪代码:传送门

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np

def Find_second_largest(array):
n = len(array)
#构造Hash,值为和键比较过且小于键的元素
mp = dict()
for i in range(n):
mp[i] = []
#索引集合
index = range(n)
#比较次数
Cnt = 0

while (len(index) != 1):
new_index, cnt = Compare(array, index, mp)
index = new_index[:]
Cnt += cnt

#和最大值比较过的元素
index1 = mp[index[0]]
array1 = [array[i] for i in index1]
m = len(index1)

#找到array1中的最大值
res = array1[0]
for i in range(1, m):
Cnt += 1
if (array1[i] > res):
res = array1[i]

return res, Cnt

#进行一轮比较
def Compare(array, index, mp):
n = len(index)
#记录下一轮的索引
new_index = []
for i in range(n // 2):
i1 = index[2 * i]
i2 = index[2 * i + 1]
x = array[i1]
y = array[i2]
if (x > y):
new_index.append(i1)
mp[i1].append(i2)
else:
new_index.append(i2)
mp[i2].append(i1)

#判断奇偶性
if n % 2 == 1:
new_index.append(index[-1])

return new_index, n // 2

#测试
factor = [2, 5, 10, 15]
for n in factor:
N = 2 ** n
array = np.random.rand(N)
#理论比较次数
m = N + np.log2(N) - 2
#计算结果
res, cnt = Find_second_largest(array)
#计算第二大的元素
ans = np.sort(array)[-2]
#验证
print(res == ans, cnt == m)

思考题2

给定一个由$n$个不同元素组成的单峰数组,这意味着它的元素按递增顺序排列直到其最大元素,之后其元素按递减顺序排列。给出一个算法在$O(\log n)$时间内找到最大元素。

算法伪代码:传送门

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy as np

def Generate_data(max_n=1000):
n = np.random.randint(1, max_n)
m = np.random.randint(1, max_n)

d1 = np.random.randn(n)
d2 = np.random.randn(m)

d1 = np.sort(d1)
d2 = np.sort(d2)[::-1]
data = np.append(d1, d2)

return data

def Find_max(array):
n = len(array)
if (n <= 3):
return np.max(array)

n1 = n // 3
n2 = 2 * n1

#端点元素
d1 = array[0]
d2 = array[n1]
d3 = array[n2]

if ((d1 <= d2) and (d2 <= d3)):
return Find_max(array[n1:])
else:
return Find_max(array[:n2])

#测试
for i in range(10):
array = Generate_data()
res = Find_max(array)
print(res == np.max(array))

思考题3

现在有一个从最小到最大排好序的$n$个不同整数的数组$A$,它们可以是正数,负数或零。现在要判断是否存在索引$i$,使得$A [i] = i$。 设计最快的算法来解决这个问题。

算法伪代码:传送门

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np

def Generate_data(m):
N = int(1e6)
data = np.arange(N)
array = np.random.choice(data, size=m, replace=False)

return np.sort(array)

#判断函数
def Judge_(array, l, r):
if (l > r):
return False
n = (l + r) // 2
if (array[n] == n):
return True
elif (array[n] > n):
r = n - 1
return Judge_(array, l, r)
else:
l = n + 1
return Judge_(array, l, r)

def Judge(array):
n = array.shape[0]

return Judge_(array, 0, n - 1)

#蛮力搜索
def Judge_BF(array):
n = len(array)
for i in range(n):
if (array[i] == i):
return True
return False

#测试
M = range(1, 5)
for m in M:
n = 10 ** m
array = Generate_data(n)
r1 = Judge(array)
r2 = Judge_BF(array)

print(r1 == r2)

思考题4

给你一个$n\times n$个不同数字的网格。 如果数字小于其所有邻居,则该数字是局部最小值。 (一个数字的邻居是上方,下方,左侧或右侧的一个。大多数数字有四个邻居;侧面的数字有三个;四个角有两个。)使用分而治之的算法计算局部最小值,仅对数字对进行$O(n)$次比较。(注意:因为有$n ^ 2$个元素,所以不能查看全部元素。 提示:考虑哪种类型的递归会给你所需的上限。)

算法伪代码:传送门

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np

#判断是否越界
def Judge(i, n):
return (i >= 0) and (i < n)

def Judge_node(i, j, n):
return Judge(i, n) and Judge(j, n)

#判断是否为局部最小
def Judge_local_minimum(data, i, j):
n = data.shape[0]
dx = [0, 0, 1, -1]
dy = [1, -1, 0, 0]
d = data[i][j]
for s in range(4):
i1 = i + dx[s]
j1 = j + dy[s]
if (Judge_node(i1, j1, n) and (data[i1][j1] < d)):
return False

return True

def Find_local_minimum_(data, x1, y1, x2, y2):
'''
搜索区域的左上角为(x1, y1),右下角为(x2, y2)
'''
if ((x1 == x2) and (y1 == y2)):
return x1, y1

x0 = (x1 + x2) // 2
y0 = (y1 + y2) // 2
#记录当前最小值
local_minimum = data[x1][y1]
#记录索引
i = x1
j = y1
#找到最小值
x = [x0, x1, x2]
y = [y0, y1, y2]
for s in x:
for t in range(y1, y2 + 1):
if (data[s][t] < local_minimum):
i = s
j = t
local_minimum = data[s][t]
for t in y:
for s in range(x1, x2 + 1):
if (data[s][t] < local_minimum):
i = s
j = t
local_minimum = data[s][t]
#判断是否为局部最优
dx = [0, 0, 1, -1]
dy = [1, -1, 0, 0]
i0 = i
j0 = j
for s in range(4):
i1 = i + dx[s]
j1 = j + dy[s]
if (Judge_node(i1, j1, n) and (data[i1][j1] < data[i0][j0])):
i0 = i1
j0 = j1

#如果为局部最优则直接返回
if ((i == i0) and (j == j0)):
return i, j

#中心点
if ((i == x0) and (j == y0)):
return i, j
elif ((i == x1) or (i == x2) or (j == y1) or (j == y2)):#边界区域
#确定属于哪个区域
u = int(i <= x0)
v = int(j <= y0)
x3 = x[u]
y3 = y[v]
if u:
x4 = x[u - 1]
else:
x4 = x[u + 1]
if v:
y4 = y[v - 1]
else:
y4 = y[v + 1]

return Find_local_minimum_(data, x3, y3, x4, y4)
else:
#找到局部最小值
dx = [0, 0, 1, -1]
dy = [1, -1, 0, 0]
i0 = i
j0 = j
for s in range(4):
i1 = i + dx[s]
j1 = j + dy[s]
if (Judge_node(i1, j1, n) and (data[i1][j1] < data[i0][j0])):
i0 = i1
j0 = j1
#如果为局部最小值则直接返回
if ((i0 == i) and (j0 == j)):
return i, j
else:
#确定属于哪个区域
u = int(i <= x0)
v = int(j <= y0)
x3 = x[u]
y3 = y[v]
if u:
x4 = x[u - 1]
else:
x4 = x[u + 1]
if v:
y4 = y[v - 1]
else:
y4 = y[v + 1]

return Find_local_minimum_(data, x3, y3, x4, y4)

def Find_local_minimum(data):
n = data.shape[0]
i, j = Find_local_minimum_(data, 0, 0, n - 1, n - 1)
print(Judge_local_minimum(data, i, j))

for i in range(1, 5):
n = 10 ** i
data = np.random.rand(n, n)
Find_local_minimum(data)

思考题5

Given an array of $n$ distinct (but unsorted) elements $x_1,x_2,\ldots,x_n$ with positive weights $w_1,w_2,\ldots,w_n$ such that $\sum_{i=1}^n w_i = W$, a weighted median is an element $x_k$ for which the total weight of all elements with value less than $x_k$(i.e., $\sum_{x_i \lt x_k} w_i$) is at most $W/2$, and also the total weight of elements with value larger than $x_k$ (i.e., $\sum_{x_i \gt x_k} w_i$) is at most $W/2$. Observe that there are at most two weighted medians. Show how to compute all weighted medians in $O(n)$ worst-case time.

算法伪代码:传送门(第三题)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import numpy as np

def Generate_data(m):
N = int(1e6)
data = np.arange(N)
array1 = np.random.choice(data, size=m, replace=False)
array2 = np.random.choice(data, size=m, replace=False)

return np.c_[array1, array2]

def Exchange(A, i, j):
#交换元素的值
p = np.array(A[i])
A[i] = np.array(A[j])
A[j] = p

#1维Partion
def Partion1(A, l, r, p):
#找到p的位置
index = l
for i in range(l, r + 1):
if (A[i] == p):
index = i
break
#交换位置
Exchange(A, index, l)
i = l + 1
for j in range(l + 1, r + 1):
if (A[j] < p):
Exchange(A, i, j)
i += 1
Exchange(A, l, i - 1)

return i - 1

#2维Partion
def Partion2(A, l, r, p):
#找到p的位置
index = l
for i in range(l, r + 1):
if (A[i][0] == p):
index = i
break
#交换位置
Exchange(A, index, l)
i = l + 1
for j in range(l + 1, r + 1):
#按第一个维度partion
if (A[j][0] < p):
Exchange(A, i, j)
i += 1
Exchange(A, l, i - 1)

return i - 1

#找到A[l,...,r]中小于p的最大数
def Find_max(A, l, r, p):
index = l
for i in range(l, r + 1):
if (A[i][0] > A[index][0]):
index = i

return A[index][0]

#找到A[l,...,r]中大于p的最小数
def Find_min(A, l, r, p):
index = l
for i in range(l, r + 1):
if (A[i][0] < A[index][0]):
index = i

return A[index][0]

#判断是否满足条件
def Judge(A, p, W):
W1 = 0
W2 = 0
for i in A:
if (i[0] < p):
W1 += i[1]
elif (i[0] > p):
W2 += i[1]
return (W1 <= W / 2) and (W2 <= W / 2)


def Deterministic_Selection(A, i):
n = len(A)
if (n == 1):
return A[0]
C = []
for s in range(0, n, 5):
t = min(n, s + 5)
A[s: t].sort()
#找到中间元素
index = min(s + 2, n - 1)
C.append(A[index])
p = Deterministic_Selection(C, len(C) // 2)
#返回位置
j = Partion1(A, 0, n - 1, p)
#次序统计量
k = j + 1
if (i == k):
return p
elif (k > i):
return Deterministic_Selection(A[: j], i)
else:
return Deterministic_Selection(A[j + 1:], i - k)

def Weighted_median(A, B, W, Hash, res):
'''
A为原始序列(2维),B为当前序列(1维), Hash记录是否访问过该位置的元素
'''
if (len(B) == 1):
return B[0]
#找到中位数
p = Deterministic_Selection(B, len(B) // 2)
n = len(A)
#返回位置
j = Partion2(A, 0, n - 1, p)
#W1
W1 = np.sum(A[: j, 1])
#W2
W2 = np.sum(A[j + 1:, 1])
if (Hash[j] == 1):
p1 = Find_max(A, 0, j - 1, p)
if (Judge(A, p1, W) and (Hash[j - 1] == 0)):
Hash[j - 1] = 1
res.append(p1)
p2 = Find_min(A, j + 1, n - 1, p)
if (Judge(A, p2, W) and (Hash[j + 1] == 0)):
Hash[j + 1] = 1
res.append(p2)
return
else:
Hash[j] = 1
if ((W1 <= W / 2) and (W2 <= W / 2)):
res.append(A[j][0])
elif (W1 > W / 2):
B = [i for i in B if i <= p]
Weighted_median(A, B, W, Hash, res)
else:
B = [i for i in B if i >= p]
Weighted_median(A, B, W, Hash, res)

#蛮力法计算Weighted_median
def Weighted_median_BF(A, W):
res = []
for i in A[:, 0]:
index1 = (A[:, 0] < i)
index2 = (A[:, 0] > i)
W1 = np.sum(A[index1][:, 1])
W2 = np.sum(A[index2][:, 1])

if (W1 <= W / 2) and (W2 <= W / 2):
res.append(i)

return res

#判断列表是否相同
def JudgeList(a, b):
n = len(a)
m = len(b)
if (n != m):
return False
else:
for i in range(n):
if (a[i] != b[i]):
return False
return True

tmp = []
N = 100
for k in range(N):
for i in range(1, 4):
#生成数据
d = 10 ** i
data = np.random.uniform(size=[d, 2])
#计算权重
W = np.sum(data[:, 1])
#Hash表
Hash = np.zeros(d)
#计算结果
p1 = []
Weighted_median(data, np.array(data[:, 0]), W, Hash, p1)
p2 = Weighted_median_BF(data, W)
#保存比较结果
tmp.append(JudgeList(p1, p2))

print(np.sum(tmp) == N * 3)