keras 论文复现模型学习(ABSA)
GitHub上的大神代码学习 [ https://github.com/AlexYangLi/ABSA_Keras ]
模型
TD-LSTM
- 数据处理,需要切成左输入和右输入,根据from/to 来进行cut
- 两个输入concat后求输出。
def td_lstm(self):
input_l = Input(shape=(self.left_max_len,))#生成对应的keras张量
input_r = Input(shape=(self.right_max_len,))
word_embedding = Embedding(input_dim=self.text_embeddings.shape[0],
output_dim=self.config.word_embed_dim,
weight=[self.text_embeddings],
trainable = self.config.word_embed_trainable,
mask_zero=True)
input_l_embed = SpatialDropout1D(0.2)(word_embedding(input_l))
input_r_embed = SpatialDropout1D(0.2)(word_embedding(input_r))
hidden_l = LSTM(300)(input_l_embed)
hidden_r = LSTM(300)(input_r_embed)#输出None × 300
hidden_concat = concatenate([hidden_l,hidden_r],axis=-1)
return Model([input_l,input_r],hidden_concat)#先定义layer,然后用model把layer合在一起,返回Model(输入,输出)
def build_model(self):
network_inputs = list()
if self.config.use_text_input_l:
network_inputs.append(Input(shape=(self.left_max_len,),name='input_text_l'))
if self.config.use_text_input_r:
network_inputs.append(Input(shape=(self.right_max_len,),name='input_text_r'))
if len(network_inputs)==1:
netword_inputs = network_inputs[0]
base_network = self.build_base_network()
sentence_vec = base_network(network_inputs)#多个input放在列表里,因为Model([input_l,input_r],hidden_concat)
dense_layer = Dense(self.config.dense_units, activation='relu')(sentence_vec)
output_layer = Dense(self.config.n_classes, activation='softmax')(dense_layer)
self.model = Model(network_inputs, output_layer)
self.model.compile(loss='categorical_crossentropy', metrics=['acc'], optimizer=self.config.optimizer)
def prepare_input(self,input_data):
if self.config.model_name == 'td_lstm':
text_l, text_r = input_data
input_pad = [pad_sequences(text_l,self.left_max_len),pad_seqences(text_r,self.right_max_len)]
return input_pad
def prepare_label(self,label_data):
return to_categorical(label_data,self.config.n_classes)
def train(self, train_input_data, train_label, valid_input_data, valid_label):
x_train = self.prepare_input(train_input_data)
y_train = self.prepare_label(train_label)
x_valid = self.prepare_input(valid_input_data)
y_valid = self.prepare_label(valid_label)
print('start training...')
self.model.fit(x=x_train, y=y_train, batch_size=self.config.batch_size, epochs=self.config.n_epochs,
validation_data=(x_valid, y_valid), callbacks=self.callbacks)
print('training end...')
print('score over valid data:')
valid_pred = self.model.predict(x_valid)
get_score_senti(y_valid, valid_pred)
TC-LSTM
和TD-LSTM基本相似,只是在输入上增加了aspect词向量。
-
构造aspect向量矩阵
-
提取出aspect向量,复制到和左输入、右输入一样的维度
-
和左右输入concat一起,axis = -1
构造aspect向量矩阵
aspect_vocab :单词,单词id
word_cut_func :lambda x: nltk.word_tokenize(x) #分词
word_vocab:词表
word_glove:词向量矩阵
aspect_word_glove = build_aspect_embedding(aspect_vocab, word_cut_func, word_vocab, word_glove)
def build_aspect_embedding(aspect_vocab,split_func,word_vocab,word_embed):
aspect_embed = np.random.uniform(-0.1,0.1,[len(aspect_vocab.keys()),word_embed.shape[1]])
count = 0
for aspect,aspect_id in aspect_vocab.items():
word_ids = [word_vocab.get(word,0) for word in split_func(aspect)]
if any(word_ids):
avg_vector = np.mean(word_embed[word_ids],axis=0)
aspect_embed[aspect_id] = avg_vector
else:
count+=1
print('aspect embedding out of vocabulary:', count)
return aspect_embed
构建模型
def tc_lstm(self):#和td-lstm结构基本相似,只是在输入上加了aspect向量。
input_l = Input(shape=(self.left_max_len,))
input_r = Input(shape=(self.right_max_len,))
input_aspect = Input(shape = (1,))#(1,)
word_embedding = Embedding(input_dim=self.text_embeddings.shape[0], output_dim=self.config.word_embed_dim,
weights=[self.text_embeddings], trainable=self.config.word_embed_trainable,
mask_zero=True)
input_l_embed = SpatialDropout1D(0.2)(word_embedding(input_l))# None , left_max_len,emb
input_r_embed = SpatialDropout1D(0.2)(word_embedding(input_r))#None , right_max_len,emb
if self.config.aspect_embed_type == 'random':
asp_embedding = Embedding(input_dim=self.n_aspect, output_dim=self.config.aspect_embed_dim) #自己训练生成embedding
else:
asp_embedding = Embedding(input_dim=self.aspect_embeddings.shape[0],
output_dim=self.config.aspect_embed_dim,
trainable=self.config.aspect_embed_trainable)
aspect_embed = asp_embedding(input_aspect)#(1,),输出None,1,词向量
aspect_embed = Flatten()(aspect_embed)# flatten作用? none,12,3->none,36的作用
aspect_repeat_l = RepeatVector(self.left_max_len)(aspect_embed)#将aspect emb复制left_max_len次
input_l_concat = concatenate([input_l_embed,aspect_repeat_l],axis=-1)
aspect_repeat_r = RepeatVector(self.right_max_len)(aspect_embed)
input_r_concat = concatenate([input_r_embed, aspect_repeat_r], axis=-1)#None , left_max_len,emb,|| None, left_max_len,emb
# regarding aspect string as the last unit
hidden_l = LSTM(self.config.lstm_units)(input_l_concat)
hidden_r = LSTM(self.config.lstm_units, go_backwards=True)(input_r_concat)
hidden_concat = concatenate([hidden_l, hidden_r], axis=-1)
return Model([input_l, input_r, input_aspect], hidden_concat)
AT-LSTM
keras 学习
input层
keras.engine.input_layer.Input()
Input()
用于实例化 Keras 张量。
shape: 形状元组(整型),不包括batch size。for instance, shape=(32,) 表示了预期的输入将是一批32维的向量。
batch_shape: 形状元组(整型),包括了batch size。for instance, batch_shape=(10,32)表示了预期的输入将是10个32维向量的批次。
embedding层参数
embedding 参数
mask_zero:布尔值,确定是否将输入中的‘0’看作是应该被忽略的‘填充’(padding)值,该参数在使用递归层处理变长输入时有用。设置为True的话,模型中后续的层必须都支持masking,否则会抛出异常。如果该值为True,则下标0在字典中不可用,input_dim应设置为|vocabulary| + 2。
Flatten
keras.layers.Flatten(data_format=None)
将输入展平。不影响批量大小。
参数
-
data_format:一个字符串,其值为
channels_last
(默认值)或者channels_first
。它表明输入的维度的顺序。此参数的目的是当模型从一种数据格式切换到另一种数据格式时保留权重顺序。channels_last
对应着尺寸为(batch, ..., channels)
的输入,而channels_first
对应着尺寸为(batch, channels, ...)
的输入。默认为image_data_format
的值,你可以在 Keras 的配置文件~/.keras/keras.json
中找到它。如果你从未设置过它,那么它将是channels_last
例
model = Sequential()
model.add(Conv2D(64, (3, 3),
input_shape=(3, 32, 32), padding='same',))
# 现在:model.output_shape == (None, 64, 32, 32)
model.add(Flatten())
# 现在:model.output_shape == (None, 65536)
网友评论