Merge pull request #97 from raynardj/dev

raynardj · web-flow · commit 74afa023e8d2 · 2019-04-22T18:04:25.000+08:00
Dev
diff --git a/forgebox/ftorch/layers/__init__.py b/forgebox/ftorch/layers/__init__.py
@@ -11,54 +11,6 @@ def forward(self, input):
         return input.view(input.size(0), -1)
 
 
-
-
-class AttLSTM(nn.Module):
-    def __init__(self, mask_activation="softmax", **kwargs):
-        """
-        Attentional LSTM
-        input_size: input dimension
-        hidden_size: hidden dimension, also the output dimention of LSTM
-        other kwargs of LSTM, most of the following is  pilferage from nn.LSTM doc:
-            input_size: mentioned above, only have to specify once
-            hidden_size: mentioned above, only have to specify once
-            num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
-                would mean stacking two LSTMs together to form a `stacked LSTM`,
-                with the second LSTM taking in outputs of the first LSTM and
-                computing the final results. Default: 1
-            bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`.
-                Default: ``True``
-            batch_first: If ``True``, then the input and output tensors are provided
-                as (batch, seq, feature). Default: ``False``
-            dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
-                LSTM layer except the last layer, with dropout probability equal to
-                :attr:`dropout`. Default: 0
-            bidirectional: If ``True``, becomes a bidirectional LSTM. Default: ``False``
-        """
-        super(AttLSTM, self).__init__()
-        self.input_size = kwargs["input_size"]
-        self.hidden_size = kwargs["hidden_size"]
-        self.mask_maker = nn.Linear(self.hidden_size, 1)
-        self.lstm = nn.LSTM(**kwargs)
-        if mask_activation == "softmax":
-            self.mask_act = nn.Softmax(dim=1)
-        elif mask_activation == "sigmoid":
-            self.mask_act = nn.Sigmoid()
-        elif mask_activation == "relu":
-            self.mask_act = nn.ReLU()
-        elif mask_activation == "passon":
-            self.mask_act = passon()
-        else:
-            print("Activation type:%s not found, should be one of the following:\nsoftmax\nsigmoid\nrelu" % (
-                mask_activation))
-
-    def forward(self, x):
-        mask = self.mask_act(self.mask_maker(x).squeeze(-1)).unsqueeze(1)  # mask shape (bs,1,seq_leng)
-        output, (h_n, c_n) = self.lstm(x)
-        output = mask.bmm(output).squeeze(1)  # output shape (bs, hidden_size)
-        return output, (h_n, c_n), mask.squeeze(1)
-
-
 class passon(nn.Module):
     def __init__(self):
         """
diff --git a/forgebox/ftorch/layers/nlp.py b/forgebox/ftorch/layers/nlp.py
@@ -126,3 +126,26 @@ def forward(self, x, mask):
         x = self.input_block(x, lambda _x: self.attention(_x, _x, _x, mask=mask))
         x = self.output_block(x, self.FFN)
         return self.dropout(x)
+
+
+class AttentionLSTM(nn.Module):
+    def __init__(self, input_size, heads=4, num_layers=2, dropout_ratio=0.1):
+        """
+        input size: int,  input size, also the hidden size for RNN
+        heads, int=4, heads of the Attention(the extra dimension space for attention)
+        num_layers: int= 2.
+        dropout_ratio: flaot= 0.1
+        """
+        super().__init__()
+        self.attention = MultiHeadedAttention(heads, d_model=input_size, dropout_ratio=dropout_ratio)
+        self.rnn = nn.LSTM(input_size=input_size,
+                           hidden_size=input_size,
+                           num_layers=num_layers,
+                           batch_first=True)
+
+    def forward(self, x, mask=None):
+        x1 = self.attention(x, x, self.rnn(x)[0], mask)
+        xr = x.flip(dims=[1])
+        x2 = self.attention(xr, xr, self.rnn(xr)[0], mask)
+        x = torch.cat([x1[:, -1, :], x2[:, -1, :]], dim=1)
+        return x