-
Notifications
You must be signed in to change notification settings - Fork 0
/
sequence_layers.py
114 lines (102 loc) · 4.9 KB
/
sequence_layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import numpy as np
import copy
import math
class PositionalEncoder(nn.Layer):
def __init__(self, d_model, max_seq_len=80):
#d_model为嵌入维度
super(PositionalEncoder, self).__init__()
self.d_model = d_model
position = np.array([[pos / np.power(10000, 2. * i / self.d_model)
for i in range(self.d_model)]
for pos in range(max_seq_len)])
# Second part, apply the cosine to even columns and sin to odds.
position[:, 0::2] = np.sin(position[:, 0::2]) # dim 2i
position[:, 1::2] = np.cos(position[:, 1::2]) # dim 2i+1
self.position = paddle.to_tensor(position)
def forward(self, x):
x = x*math.sqrt(self.d_model)
seq_len = x.shape[1]
x = x+self.position[:seq_len,:]
return x
class AttentionSequencePoolingLayer(nn.Layer):
def __init__(self, dnn_units=[8, 64, 16], dnn_activation='sigmoid', weight_normalization=False):
super().__init__()
self.dnn_units = dnn_units
self.dnn_activation = 'sigmoid'
self.weight_normalization = weight_normalization
layer_list = []
bn_list = []
for i in range(len(dnn_units)-1):
dnn_layer = nn.Linear(
in_features = self.dnn_units[i] if i != 0 else self.dnn_units[i]*4 ,
out_features = self.dnn_units[i+1],
weight_attr= self._weight_init())
layer_list.append(copy.deepcopy(dnn_layer))
bn_layer = nn.BatchNorm(50)
bn_list.append(copy.deepcopy(bn_layer))
self.bn_layer = nn.LayerList(bn_list)
self.layers = nn.LayerList(layer_list)
self.dnn = nn.Linear(self.dnn_units[-1], 1, weight_attr=self._weight_init())
self.activation = nn.Sigmoid()
def _weight_init(self):
return paddle.framework.ParamAttr(initializer=paddle.nn.initializer.XavierNormal())
def forward(self, inputs):
querys, keys, sess_length = inputs
assert(type(sess_length) == paddle.Tensor), f"At Attention SequencePoolingLayer expected inputs[2]'s type is paddle.Tensor, but got {type(sess_length)}"
keys_length = keys.shape[1]
key_masks = nn.functional.sequence_mask(sess_length*10, keys_length)
querys = paddle.tile(querys.unsqueeze(1), [1, keys_length, 1])
att_input = paddle.concat([querys, keys, querys-keys, querys*keys], axis=-1)
for i, layer in enumerate(self.layers):
att_input = layer(att_input)
att_input = self.bn_layer[i](att_input) # BatchNomalization
att_input = self.activation(att_input) # activation
att_score = self.dnn(att_input) # (N, 50, 1)
att_score = paddle.transpose(att_score, [0, 2, 1]) # (N, 1, 50)
if self.weight_normalization:
paddings = paddle.ones_like(att_score) * (-2 ** 32 + 1)
else:
paddings = paddle.zeros_like(att_score)
att_score = paddle.where(key_masks.unsqueeze(1) == 1, att_score, paddings) # key_masks.unsqueeze in order to keep shape same as att_score
att_score = nn.Softmax()(att_score)
out = paddle.matmul(att_score, keys)
return out
class MLP(nn.Layer):
def __init__(self, mlp_hidden_units, use_bn=True):
super().__init__()
self.mlp_hidden_units = mlp_hidden_units
self.acitivation = paddle.nn.Sigmoid()
layer_list = []
for i in range(len(mlp_hidden_units)-1):
dnn_layer = nn.Linear(
in_features = self.mlp_hidden_units[i],
out_features = self.mlp_hidden_units[i+1],
weight_attr= self._weight_init())
layer_list.append(copy.deepcopy(dnn_layer))
self.layers = nn.LayerList(layer_list)
self.dense = nn.Linear(self.mlp_hidden_units[-1], 1, bias_attr=False)
self.predict_layer = nn.Sigmoid()
def _weight_init(self):
return paddle.framework.ParamAttr(initializer=paddle.nn.initializer.XavierNormal())
def forward(self, x):
for layer in self.layers:
x = layer(x)
x = self.acitivation(x)
x = self.dense(x)
x = self.predict_layer(x)
return x