#### 1. Neural Networks: MNIST image classfication

(a)(b)(c)前向传播部分很简单，反向传播部分的推导请参考CS231作业1，其中如下代码

#计算第二层梯度
p3 = np.exp(scores) / p2.reshape(-1, 1)
p3[np.arange(N), y] -= 1
dW2 = X1.T.dot(p3) / N + 2 * reg * W2
db2 = np.sum(p3, axis=0) / N
grads["b2"] = db2

N2, D2 = y.shape
t2 = y - labels
db2 = np.sum(t2, axis=0) / N2
dW2 = h.T.dot(t2) / N2
if Lambda != 0:
dW2 += 2 * Lambda * W2
dX2 = t2.dot(W2.T)

#CS231
p3[np.arange(N), y] -= 1

#CS229
t2 = y - labels

#第一层梯度
N2, D2 = data.shape
t1 = h * (1 - h)
dW1 = data.T.dot(t1 * dX2) / N2
if Lambda != 0:
dW1 += 2 * Lambda * W1
db1 = np.sum(t1, axis=0) / N2

#### 4. Independent components analysis

# -*- coding: utf-8 -*-
"""
Created on Fri Mar 29 13:49:57 2019

@author: qinzhen
"""

### Independent Components Analysis
###
### This program requires a working installation of:
###
### On Mac:
###     1. portaudio: On Mac: brew install portaudio
###     2. sounddevice: pip install sounddevice
###
### On windows:
###      pip install pyaudio sounddevice
###

import sounddevice as sd
import numpy as np

Fs = 11025

def normalize(dat):
return 0.99 * dat / np.max(np.abs(dat))

return mix

def play(vec):
sd.play(vec, Fs, blocking=True)

def sigmoid(x):
return 1 / (1 + np.exp(-x))

def unmixer(X):
M, N = X.shape
W = np.eye(N)

anneal = [0.1, 0.1, 0.1, 0.05, 0.05, 0.05, 0.02, 0.02, 0.01, 0.01,
0.005, 0.005, 0.002, 0.002, 0.001, 0.001]
print('Separating tracks ...')

for alpha in anneal:
#打乱数据
np.random.permutation(X)
for i in range(M):
x = X[i, :].reshape(-1, 1)
WX = W.dot(x)
grad = (1 - 2 * sigmoid(WX)).dot(x.T) + np.linalg.inv(W.T)
###################################
return W

def unmix(X, W):
S = np.zeros(X.shape)

S = X.dot(W.T)
##################################
return S

for i in range(X.shape[1]):
print('Playing mixed track %d' % i)
play(X[:, i])

W = unmixer(X)
S = normalize(unmix(X, W))

for i in range(S.shape[1]):
print('Playing separated track %d' % i)
play(S[:, i])

#### 5. Markov decision processes

(a)$\forall \pi$，定义

(b)如果存在$V$使得

#### 6.Reinforcement Learning: The inverted pendulum

#价值函数
value_function = np.random.uniform(0, 0.1, NUM_STATES)
#计数
tran_cnt = np.zeros((NUM_STATES, NUM_STATES, NUM_ACTIONS))
#概率
tran_prob = np.ones((NUM_STATES, NUM_STATES, NUM_ACTIONS)) / NUM_STATES
#记录奖励
reward_value = np.zeros(NUM_STATES)
#记录奖励总数
reward_cnt = np.zeros(NUM_STATES)
#奖励
state_reward = np.zeros(NUM_STATES)

#期望收益，比较其大小
s0 = np.sum(tran_prob[state, :, 0] * value_function)
s1 = np.sum(tran_prob[state, :, 1] * value_function)
if s0 > s1:
action = 0
elif s0 < s1:
action = 1
else:
action = np.random.randint(0, 2)

tran_cnt[state, new_state, action] += 1
reward_value[new_state] += R
reward_cnt[new_state] += 1

#统计在某个状态采取某个动作的次数
for i in range(NUM_ACTIONS):
for j in range(NUM_STATES):
#在状态j采取行动i的总数
num = np.sum(tran_cnt[j, :, i])
if num > 0:
tran_prob[j, :, i] = tran_cnt[j, :, i] / num
#更新奖励
state_reward[reward_cnt>0] = reward_value[reward_cnt>0] / reward_cnt[reward_cnt>0]

• 对每个状态$s$，初始化$V(s):=0$

• 重复直到收敛{

• 对每个状态，更新$V(s):=R(s) + \max_{a\in A}\gamma \sum_{s’\in S} P_{sa}(s’) V(s’)$

}

iterations = 0
while True:
#值迭代更新后的值
v0 = GAMMA * tran_prob[:, :, 0].dot(value_function) + state_reward
v1 = GAMMA * tran_prob[:, :, 1].dot(value_function) + state_reward
v = np.c_[v0, v1]
#取最大值
value_function_new = np.max(v, axis=1)
#计算更新幅度
delta = np.linalg.norm(value_function_new - value_function)
#更新价值函数
value_function = np.copy(value_function_new)
iterations += 1
#更新幅度变小，则停止循环
if delta < TOLERANCE:
break

#更新一次收敛的计数
if iterations == 1:
consecutive_no_learning_trials += 1
else:
consecutive_no_learning_trials = 0