CS50 Introduction to Artificial Intelligence with Python Lecture 5

第五讲的主题是Learning，这里总结第五讲以及第五次作业。

课程地址：https://cs50.harvard.edu/ai/

备注：图片均来自课程课件。

由于这一讲主要是机器学习的内容，课程内容的回顾在此从略，主要回顾project。

Project

Shopping

def load_data(filename):
    """
    Load shopping data from a CSV file `filename` and convert into a list of
    evidence lists and a list of labels. Return a tuple (evidence, labels).

    evidence should be a list of lists, where each list contains the
    following values, in order:
        - Administrative, an integer
        - Administrative_Duration, a floating point number
        - Informational, an integer
        - Informational_Duration, a floating point number
        - ProductRelated, an integer
        - ProductRelated_Duration, a floating point number
        - BounceRates, a floating point number
        - ExitRates, a floating point number
        - PageValues, a floating point number
        - SpecialDay, a floating point number
        - Month, an index from 0 (January) to 11 (December)
        - OperatingSystems, an integer
        - Browser, an integer
        - Region, an integer
        - TrafficType, an integer
        - VisitorType, an integer 0 (not returning) or 1 (returning)
        - Weekend, an integer 0 (if false) or 1 (if true)

    labels should be the corresponding list of labels, where each label
    is 1 if Revenue is true, and 0 otherwise.
    """
    data = pd.read_csv(filename)
    data.replace({'Month': {'Jan': 1, 'Feb':2, 'Mar':3, 'Apr':4, 'May':5, 'June':6,
			'Jul':7, 'Aug':8, 'Sep':9, 'Oct':10, 'Nov':11, 'Dec':12}}, inplace=True)
    data.replace({'VisitorType': {'Returning_Visitor': 1, 'New_Visitor': 0, 'Other': 0}}, inplace=True)
    data.replace({'Weekend': {True: 1, False: 0}}, inplace=True)
    data.replace({'Revenue': {True: 1, False: 0}}, inplace=True)
    data = data.values
    
    evidence = data[:, :-1]
    labels = data[:, -1]
    
    return evidence, labels

def train_model(evidence, labels):
    """
    Given a list of evidence lists and a list of labels, return a
    fitted k-nearest neighbor model (k=1) trained on the data.
    """
    model = KNeighborsClassifier(n_neighbors=1)
    model.fit(evidence, labels)
    
    return model


def evaluate(labels, predictions):
    """
    Given a list of actual labels and a list of predicted labels,
    return a tuple (sensitivity, specificty).

    Assume each label is either a 1 (positive) or 0 (negative).

    `sensitivity` should be a floating-point value from 0 to 1
    representing the "true positive rate": the proportion of
    actual positive labels that were accurately identified.

    `specificity` should be a floating-point value from 0 to 1
    representing the "true negative rate": the proportion of
    actual negative labels that were accurately identified.
    """
    n1 = np.sum(labels==1)
    n2 = np.sum((labels==1) & (predictions==1))
    sensitivity = n2 / n1
    
    n3 = np.sum(labels==0)
    n4 = np.sum((labels==0) & (predictions==0))
    specificity = n4 / n3
    
    return sensitivity, specificity

Nim

一个强化学习的问题，利用了Q-learning：

$Q(s, a) \leftarrow \mathrm{Q}(s, a)+\alpha((\mathrm{r}+\text { future reward estimate })-\mathrm{Q}(s, a))$

代码如下：

def get_q_value(self, state, action):
	"""
	Return the Q-value for the state `state` and the action `action`.
	If no Q-value exists yet in `self.q`, return 0.
	"""
	state = tuple(state)
	if state not in self.q:
		return 0
	elif action not in self.q[state]:
		return 0
	else:
		return self.q[state][action]

def update_q_value(self, state, action, old_q, reward, future_rewards):
	"""
	Update the Q-value for the state `state` and the action `action`
	given the previous Q-value `old_q`, a current reward `reward`,
	and an estiamte of future rewards `future_rewards`.

	Use the formula:

	Q(s, a) <- old value estimate
			   + alpha * (new value estimate - old value estimate)

	where `old value estimate` is the previous Q-value,
	`alpha` is the learning rate, and `new value estimate`
	is the sum of the current reward and estimated future rewards.
	"""
	state = tuple(state)
	if state not in self.q:
		self.q[state] = dict()
	self.q[state][action] = old_q + self.alpha * (reward + future_rewards - old_q)

def best_future_reward(self, state):
	"""
	Given a state `state`, consider all possible `(state, action)`
	pairs available in that state and return the maximum of all
	of their Q-values.

	Use 0 as the Q-value if a `(state, action)` pair has no
	Q-value in `self.q`. If there are no available actions in
	`state`, return 0.
	"""
	state = tuple(state)
	if state not in self.q:
		return 0
	return max(self.q[state].values())

def choose_action(self, state, epsilon=True):
	"""
	Given a state `state`, return an action `(i, j)` to take.

	If `epsilon` is `False`, then return the best action
	available in the state (the one with the highest Q-value,
	using 0 for pairs that have no Q-values).

	If `epsilon` is `True`, then with probability
	`self.epsilon` choose a random available action,
	otherwise choose the best action available.

	If multiple actions have the same Q-value, any of those
	options is an acceptable return value.
	"""
	available_actions = Nim.available_actions(state)
	p = np.random.rand()

	if epsilon and p < self.epsilon:
		return random.choice(list(available_actions))
	else:
		if tuple(state) not in self.q:
			return random.choice(list(available_actions))
		else:
			#找到最大值对应的动作
			tmp = self.q[tuple(state)]
			value = self.best_future_reward(state)
			action = None
			for key in tmp:
				if tmp[key] == value:
					action = key
					break
			return action