Michael Collins NLP Homework 4

课程主页：http://www.cs.columbia.edu/~cs4705/

课程网盘地址：

链接：https://pan.baidu.com/s/1KijgO7yjL_MVCC9zKZ7Jdg
提取码：t1i3

这一次回顾Michael Collins NLP作业4。

Quesion 1

(a)

$\begin{aligned} \frac{\partial L(\mathrm v)}{\partial v_{j}} &= \sum_{i}\left(\mathrm f_j(x_i,y_i) - \sum_{y'\in \mathcal V}f_{j}(x_i, y') P(y'|x_i, \mathrm v) \right)-2Cv_j \end{aligned}$

令上式为$0$可得

$\begin{aligned} v_j^{\star} &=\frac{1}{2C} \sum_{i}\left(\mathrm f_j(x_i,y_i) - \sum_{y'\in \mathcal V}f_{j}(x_i, y') P(y'|x_i, \mathrm v) \right) \end{aligned}$

注意$f_1=f_2$，所以

$v_1^\star = v_2^{\star}$

(b)

令上式为$0$可得

$\sum_{i}\left(\mathrm f_j(x_i,y_i) - \sum_{y'\in \mathcal V}f_{j}(x_i, y') P(y'|x_i, \mathrm v) \right) - C 1\{v_j \ge 0\}+C1\{v_j <0\} =0$

取$j=1,2$可得

$\sum_{i}\left(\mathrm f_j(x_i,y_i) - f_{j}(x_i, \text{model}) P(\text{model}|x_i, \mathrm v) \right) - C 1\{v_j \ge 0\}+C1\{v_j <0\} =0$

构造集合

$\begin{aligned} S_1&=\{i |x_i的最后一个单词为\text{the} \}\\ \bar S_1 &= \{1,\ldots, n\}-S \\ S_2 &= \{i |y_i=\text{model} \} \end{aligned}$

那么上式等价于

$\begin{aligned} &|S_1\cap S_2| - \sum_{i\in S_1} \frac{e^{v_1 +v_2 + \sum_{j=3}^d v_i f_i(x,y)}}{\sum_{y^{\prime} \in \mathcal{V}} e^{\mathbf{v} \cdot \mathbf{f}\left(x, y^{\prime}\right)}} - C 1\{v_j \ge 0\}+C1\{v_j <0\}=0 \end{aligned}$

由于上式对$j=1,2$成立，这说明$v_1,v_2$同号。

Question 2

假设

$f_{j}(x, y)=\left\{\begin{array}{ll} 1 & \text { if } y=w_{2}^j \text { and } x=w_{1}^j \\ 0 & \text { otherwise } \end{array}\right.$

那么

$\begin{aligned} \frac{\partial L(\mathrm v)}{\partial v_{j}} &= \sum_{i}\left(\mathrm f_j(x_i,y_i) - \sum_{y'\in \mathcal V}f_{j}(x_i, y') P(y'|x_i, \mathrm v) \right)\\ &= \operatorname{Count}\left(w_1^j, w_2^j\right) - \sum_{i}\sum_{y'\in \mathcal V} f_{j}(x_i, y') P(y'|x_i, \mathrm v)\\ &= \operatorname{Count}\left(w_1^j, w_2^j\right) - \sum_{i} f_{j}(x_i, w_2^j) P(w_2^j|x_i, \mathrm v)\\ &=\operatorname{Count}\left(w_1^j, w_2^j\right)-\sum_{x_i= w_1^j} P(w_2^j|w_1^j, \mathrm v)\\ &=\operatorname{Count}\left(w_1^j, w_2^j\right)-\text{Count}(w_2^j) P(w_2^j|w_1^j, \mathrm v) \end{aligned}$

令上式为$0$可得

$P\left(y=w_{2}^j | x=w_{1}^j, \mathrm{v}^{*}\right)=\frac{\operatorname{Count}\left(w_{1}^j, w_{2}^j\right)}{\operatorname{Count}\left(w_{1}^j\right)}$

Question 3

(a)

定义

$\begin{aligned} f_1(x,y)&=\left\{\begin{array}{ll} 1 & x= y \\ 0 & \text { otherwise } \end{array}\right.\\ f_2(x,y)&=\left\{\begin{array}{ll} 1 & x= y_{\text{reverse}} \\ 0 & \text { otherwise } \end{array}\right.\\ \end{aligned}$

所以需要两个参数$v_1, v_2$。

(b)

$\begin{aligned} P(\text {the} | \text {the}) &=\frac{e^{v_1}}{\sum_{y} e^{\mathrm v.f(\text{the}, y)}}\\ P(\text {eht} | \text {the}) &=\frac{e^{v_2}}{\sum_{y} e^{\mathrm v.f(\text{the}, y)}}\\ P(\operatorname{dog} | \text {the}) &=\frac{1}{\sum_{y} e^{\mathrm v.f(\text{the}, y)}}\\ \end{aligned}$

(c)

令

$|\mathcal V|=n$

那么

$|\mathcal V'|=2n$

注意到

$\sum_{y} e^{\mathrm v.f(\text{the}, y)} = e^{v_1} + e^{v_2} + (2n-2)$

所以条件为

$\begin{aligned} \frac{e^{v_1}}{ e^{v_1} + e^{v_2} + (2n-2)}&= 0.4\\ \frac{e^{v_2}}{ e^{v_1} + e^{v_2} + (2n-2)}&= 0.3\\ \frac{1}{ e^{v_1} + e^{v_2} + (2n-2)}&= 0.3\times \frac 1{2n-2} \end{aligned}$

Question 4

由于一些函数要共用，所以编写了helper.py文件

import tagger_config
from subprocess import PIPE
import sys, subprocess

tags = tagger_config.tags

def sentence_reader(filename):
    sentences = []
    with open(filename) as f:
        sentence = [('*', '*')]
        sentence = []
        for word in f.readlines():
            w = word.strip().split()
            #非空
            if not w:
                sentences.append(sentence)
                sentence = [('*', '*')]
                sentence = []
            else:
                
                sentence.append(w)
            
    return sentences

def transform(sentence):
    res = ""
    n = len(sentence)
    for i in range(n):
        word = sentence[i]
        m = len(word)
        tmp = word[0]
        #单词之间以\t间隔
        for j in range(1, m):
            tmp += "\t" + word[j]
        res += tmp
        #除了最后一行增加换行
        if (i < n - 1):
             res += "\n"
        
    return res

def process(args):
    "Create a 'server' to send commands to."
    return subprocess.Popen(args, stdin=PIPE, stdout=PIPE)

def call(process, stdin):
    "Send command to a server and get stdout."
    res = []
    process.stdin.write(stdin + "\n\n")
    line = process.stdout.readline().strip()
    while line:
        res.append(line)
        line = process.stdout.readline().strip()
    return res
                            
def get_feature(sentence, his):
    #his=[i, tag[i-1], tag[i]]
    #BIGRAM
    BIGRAM = "BIGRAM:" + his[1] + ":" + his[2]
    #TAG
    i = int(his[0]) - 1
    TAG = "TAG:" + sentence[i][0] + ":" + his[2]
    
    return BIGRAM, TAG

def get_feature_v1(sentence, his):
    #his=[i, tag[i-1], tag[i]]
    res = []
    #BIGRAM
    BIGRAM = "BIGRAM:" + his[1] + ":" + his[2]
    if (his[1] == " "):
        print(his)
    res.append(BIGRAM)
    #TAG
    i = int(his[0]) - 1
    TAG = "TAG:" + sentence[i][0] + ":" + his[2]
    res.append(TAG)
    #SUFF
    for j in range(len(sentence)):
        word = sentence[j][0]
        n = len(word)
        for k in range(1, 4):
            if k <= n:
                tmp = "SUFF:" + word[-k:] + ":" + str(k) + ":" + his[2]
                res.append(tmp)
    
    return res

from helper import *

#读取
def get_value(filename):
    value = dict()
    
    with open(filename) as f:
        for string in f.readlines():
            fea, v = string.strip().split()
            value[fea] = float(v)
    
    return value

def generate(output, sentences, F):
    with open(output, "wb") as f:
        for sentence in sentences:
            sent = transform(sentence)
            history = call(enum_server, sent)
            score_ = []
            for his in history:
                score = 0
                feature = F(sentence, his.split())
                for fea in feature:
                    if fea in value:
                        score += value[fea]
                score_.append(his + "\t" + str(score))
            score_ = '\n'.join(score_)
            #生成结果
            res = call(decoder_server, score_)
            #保存
            n = len(sentence)
            for i in range(n):
                tmp = sentence[i][0] + "\t" + res[i].split()[-1]
                f.writelines(tmp)
                f.write("\n")
            f.write("\n")

enum_server = process(["python", "tagger_history_generator.py", "ENUM"])
decoder_server = process(["python", "tagger_decoder.py", "HISTORY"])
filename = "tag_dev.dat"
sentences = sentence_reader(filename)

#Q4
f1 = "tag.model"
o1 = "Q4.out"
value = get_value(f1)
generate(o1, sentences, get_feature)

2226 2459 0.905246034974

Question 5

#参考https://github.com/huxiuhan/nlp-hw

from helper import *

filename = "tag.model"
value = dict()

enum_server = process(["python", "tagger_history_generator.py", "ENUM"])
gold_server = process(["python", "tagger_history_generator.py", "GOLD"])
decoder_server = process(["python", "tagger_decoder.py", "HISTORY"])

filename = "tag_train.dat"
sentences = sentence_reader(filename)

K = 5
#历史
History = []
History_label = []
for sentence in sentences:
    sent = transform(sentence)
    history = call(enum_server, sent)
    history_label = call(gold_server, sent)
    History.append(history)
    History_label.append(history_label)
N = len(sentences)

#训练
for k in range(K):
    for i, sentence in enumerate(sentences):
        sent = transform(sentence)
        history = History[i]
        #真实结果
        history_label = History_label[i]
        score_ = []
        for his in history:
            score = 0
            feature = get_feature_v1(sentence, his.split())
            for fea in feature:
                if fea in value:
                    score += value[fea]
            score_.append(his + "\t" + str(score))
        score_ = '\n'.join(score_)
        #生成结果
        res = call(decoder_server, score_)
        #比较结果
        flag = True
        n = len(history_label)
        
        for j in range(n):
            a1 = res[j][-1]
            a2 = history_label[j].split()[-1]
            if a1 != a2:
                #不相同
                for f in get_feature_v1(sentence, res[j].split()):
                    if f in value:
                        value[f] -= 1
                    else:
                        value[f] = -1
                #相同
                for f in get_feature_v1(sentence, history_label[j].split()):
                    if f in value:
                        value[f] += 1
                    else:
                        value[f] = 1

#生成结果
outputname = "Q5.model"
with open(outputname, "wb") as f:
    for fea in value:
        f.writelines(fea + " " + str(value[fea]))    
        f.writelines("\n")

2184 2459 0.888165921106