CS50 Introduction to Artificial Intelligence with Python Lecture 5

第五讲的主题是Learning,这里总结第五讲以及第五次作业。

课程地址:https://cs50.harvard.edu/ai/

备注:图片均来自课程课件。

由于这一讲主要是机器学习的内容,课程内容的回顾在此从略,主要回顾project。

Project

Shopping

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def load_data(filename):
"""
Load shopping data from a CSV file `filename` and convert into a list of
evidence lists and a list of labels. Return a tuple (evidence, labels).

evidence should be a list of lists, where each list contains the
following values, in order:
- Administrative, an integer
- Administrative_Duration, a floating point number
- Informational, an integer
- Informational_Duration, a floating point number
- ProductRelated, an integer
- ProductRelated_Duration, a floating point number
- BounceRates, a floating point number
- ExitRates, a floating point number
- PageValues, a floating point number
- SpecialDay, a floating point number
- Month, an index from 0 (January) to 11 (December)
- OperatingSystems, an integer
- Browser, an integer
- Region, an integer
- TrafficType, an integer
- VisitorType, an integer 0 (not returning) or 1 (returning)
- Weekend, an integer 0 (if false) or 1 (if true)

labels should be the corresponding list of labels, where each label
is 1 if Revenue is true, and 0 otherwise.
"""
data = pd.read_csv(filename)
data.replace({'Month': {'Jan': 1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'June':6,
'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12}}, inplace=True)
data.replace({'VisitorType': {'Returning_Visitor': 1, 'New_Visitor': 0, 'Other': 0}}, inplace=True)
data.replace({'Weekend': {True: 1, False: 0}}, inplace=True)
data.replace({'Revenue': {True: 1, False: 0}}, inplace=True)
data = data.values

evidence = data[:, :-1]
labels = data[:, -1]

return evidence, labels

def train_model(evidence, labels):
"""
Given a list of evidence lists and a list of labels, return a
fitted k-nearest neighbor model (k=1) trained on the data.
"""
model = KNeighborsClassifier(n_neighbors=1)
model.fit(evidence, labels)

return model


def evaluate(labels, predictions):
"""
Given a list of actual labels and a list of predicted labels,
return a tuple (sensitivity, specificty).

Assume each label is either a 1 (positive) or 0 (negative).

`sensitivity` should be a floating-point value from 0 to 1
representing the "true positive rate": the proportion of
actual positive labels that were accurately identified.

`specificity` should be a floating-point value from 0 to 1
representing the "true negative rate": the proportion of
actual negative labels that were accurately identified.
"""
n1 = np.sum(labels==1)
n2 = np.sum((labels==1) & (predictions==1))
sensitivity = n2 / n1

n3 = np.sum(labels==0)
n4 = np.sum((labels==0) & (predictions==0))
specificity = n4 / n3

return sensitivity, specificity

Nim

一个强化学习的问题,利用了Q-learning:

代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def get_q_value(self, state, action):
"""
Return the Q-value for the state `state` and the action `action`.
If no Q-value exists yet in `self.q`, return 0.
"""
state = tuple(state)
if state not in self.q:
return 0
elif action not in self.q[state]:
return 0
else:
return self.q[state][action]

def update_q_value(self, state, action, old_q, reward, future_rewards):
"""
Update the Q-value for the state `state` and the action `action`
given the previous Q-value `old_q`, a current reward `reward`,
and an estiamte of future rewards `future_rewards`.

Use the formula:

Q(s, a) <- old value estimate
+ alpha * (new value estimate - old value estimate)

where `old value estimate` is the previous Q-value,
`alpha` is the learning rate, and `new value estimate`
is the sum of the current reward and estimated future rewards.
"""
state = tuple(state)
if state not in self.q:
self.q[state] = dict()
self.q[state][action] = old_q + self.alpha * (reward + future_rewards - old_q)

def best_future_reward(self, state):
"""
Given a state `state`, consider all possible `(state, action)`
pairs available in that state and return the maximum of all
of their Q-values.

Use 0 as the Q-value if a `(state, action)` pair has no
Q-value in `self.q`. If there are no available actions in
`state`, return 0.
"""
state = tuple(state)
if state not in self.q:
return 0
return max(self.q[state].values())

def choose_action(self, state, epsilon=True):
"""
Given a state `state`, return an action `(i, j)` to take.

If `epsilon` is `False`, then return the best action
available in the state (the one with the highest Q-value,
using 0 for pairs that have no Q-values).

If `epsilon` is `True`, then with probability
`self.epsilon` choose a random available action,
otherwise choose the best action available.

If multiple actions have the same Q-value, any of those
options is an acceptable return value.
"""
available_actions = Nim.available_actions(state)
p = np.random.rand()

if epsilon and p < self.epsilon:
return random.choice(list(available_actions))
else:
if tuple(state) not in self.q:
return random.choice(list(available_actions))
else:
#找到最大值对应的动作
tmp = self.q[tuple(state)]
value = self.best_future_reward(state)
action = None
for key in tmp:
if tmp[key] == value:
action = key
break
return action

本文标题:CS50 Introduction to Artificial Intelligence with Python Lecture 5

文章作者:Doraemonzzz

发布时间:2020年04月29日 - 21:41:28

最后更新:2020年04月29日 - 21:58:06

原始链接:http://doraemonzzz.com/2020/04/29/CS50 Introduction to Artificial Intelligence with Python Lecture 5/

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。