#### Bayesian Networks

• 有向图
• 每个节点代表一个随机变量
• 从$X$到$Y$的箭头表示$X$是$Y$的父节点
• 每个节点$X$都有概率分布$\mathbf{P}(X | \text { Parents }(X))$

#### Inference

• 查询$\mathrm X$：要为其计算分布的变量
• 证据变量$\mathrm E$：事件$\mathrm e$的观察变量
• 隐藏变量$\mathrm Y$：无证据，没有被查询的变量。
• 目标：计算$\mathbf{P}(\mathrm{X} | \mathbf{e})$

HMM中的常见任务如下：

### Project

#### PageRank

If page has no outgoing links, then transition_model should return a probability distribution that chooses randomly among all pages with equal probability. (In other words, if a page has no links, we can pretend it has links to all pages in the corpus, including itself.)

transition_model

def transition_model(corpus, page, damping_factor):
"""
Return a probability distribution over which page to visit next,
given a current page.

With probability damping_factor, choose a link at random
linked to by page. With probability 1 - damping_factor, choose
a link at random chosen from all pages in the corpus.
"""
n = len(corpus)
#计算概率
res = dict()
m = len(corpus[page])
for i in corpus:
if len(corpus[page]) != 0:
if i in corpus[page]:
res[i] = (1 - damping_factor) / n + damping_factor / m
else:
res[i] = (1 - damping_factor) / n
else:
res[i] = 1 / n

return res

sample_pagerank

def sample_pagerank(corpus, damping_factor, n):
"""
Return PageRank values for each page by sampling n pages
according to transition model, starting with a page at random.

Return a dictionary where keys are page names, and values are
their estimated PageRank value (a value between 0 and 1). All
PageRank values should sum to 1.
"""
#初始化
res = dict()
for i in corpus:
res[i] = 0
page = np.random.choice(list(corpus.keys()))
for i in range(n):
res[page] += 1
prob = transition_model(corpus, page, damping_factor)
key = list(prob.keys())
value = list(prob.values())
page = np.random.choice(key, p=value)

for i in corpus:
res[i] /= n

return res

iterate_pagerank

def iterate_pagerank(corpus, damping_factor):
"""
Return PageRank values for each page by iteratively updating
PageRank values until convergence.

Return a dictionary where keys are page names, and values are
their estimated PageRank value (a value between 0 and 1). All
PageRank values should sum to 1.
"""
#计算网页个数
numlinks = dict()
#反向边
corpus_reverse = dict()
for i in corpus:
corpus_reverse[i] = set()
#根据题目的解释
if (len(corpus[i]) == 0):
corpus[i] = set(corpus.keys())

for i in corpus:
for j in corpus[i]:
corpus_reverse[j].add(i)
numlinks[i] = len(corpus[i])

n = len(corpus)
#初始化
page = dict()
for i in corpus:
page[i] = 1 / n
while True:
newpage = dict()
for i in corpus:
newpage[i] = (1 - damping_factor) / n
for j in corpus_reverse[i]:
newpage[i] += damping_factor * page[j] / numlinks[j]
s = 0
flag = True
for i in corpus:
diff = abs(newpage[i] - page[i])
if diff > 0.001:
flag = False
page[i] = newpage[i]
s += page[i]

if flag:
break

return page

#### Heredity

#原本为i个gene，生成j个gene的概率
#i=0,1,2, j=0,1
def get_prob(i, j):
if j != 0 and j != 1:
return 0
p = PROBS["mutation"]
if i == 0:
p1 = 1 - p
if j == 0:
return p1
else:
return 1 - p1
elif i == 1:
p1 = 0.5 * (1 - p) + 0.5 * p
if j == 0:
return p1
else:
return 1 - p1
else:
p1 = p
if j == 0:
return p1
else:
return 1- p1

def get_num(people, one_gene, two_genes, person):
zero_gene = set(people.keys()) - one_gene - two_genes
if person in zero_gene:
return 0
elif person in one_gene:
return 1
else:
return 2

def joint_probability(people, one_gene, two_genes, have_trait):
"""
Compute and return a joint probability.

The probability returned should be the probability that
* everyone in set one_gene has one copy of the gene, and
* everyone in set two_genes has two copies of the gene, and
* everyone not in one_gene or two_gene does not have the gene, and
* everyone in set have_trait has the trait, and
* everyone not in set have_trait does not have the trait.
"""
prob = 1

for person in people:
#计算数量
n1 = get_num(people, one_gene, two_genes, person)
#trait
p = PROBS["trait"][n1][True]
if person not in have_trait:
p = 1 - p
prob *= p
#没有父节点
if people[person]['mother'] == None:
prob *= PROBS["gene"][n1]
else:
father = people[person]['father']
mother = people[person]['mother']
n2 = get_num(people, one_gene, two_genes, father)
n3 = get_num(people, one_gene, two_genes, mother)
p1 = 0
#穷举所有情况
for i in range(n1 + 1):
j = n1 - i
p1 += get_prob(n2, i) * get_prob(n3, j)
prob *= p1

return prob

def update(probabilities, one_gene, two_genes, have_trait, p):
"""
Add to probabilities a new joint probability p.
Each person should have their "gene" and "trait" distributions updated.
Which value for each distribution is updated depends on whether
the person is in have_gene and have_trait, respectively.
"""
for person in probabilities:
n = get_num(probabilities, one_gene, two_genes, person)
probabilities[person]["gene"][n] += p
probabilities[person]["trait"][person in have_trait] += p

def normalize(probabilities):
"""
Update probabilities such that each probability distribution
is normalized (i.e., sums to 1, with relative proportions the same).
"""
for person in probabilities:
s = 0
for i in probabilities[person]["gene"]:
s += probabilities[person]["gene"][i]
for i in probabilities[person]["gene"]:
probabilities[person]["gene"][i] /= s

s = 0
for i in probabilities[person]["trait"]:
s += probabilities[person]["trait"][i]
for i in probabilities[person]["trait"]:
probabilities[person]["trait"][i] /= s