support atanglu

yxlllc · yxlllc · commit f5915341d09d · 2025-06-05T20:28:34.000+08:00
diff --git a/configs/acoustic.yaml b/configs/acoustic.yaml
@@ -78,6 +78,7 @@ backbone_args:
   kernel_size: 31
   dropout_rate: 0.0
   use_conditioner_cache: true
+  glu_type: 'atanglu'
 main_loss_type: l2
 main_loss_log_norm: false
 schedule_type: 'linear'
diff --git a/configs/templates/config_acoustic.yaml b/configs/templates/config_acoustic.yaml
@@ -84,6 +84,7 @@ backbone_args:
   kernel_size: 31
   dropout_rate: 0.0
   use_conditioner_cache: true
+  glu_type: 'atanglu'
 #backbone_type: 'wavenet'
 #backbone_args:
 #  num_channels: 512
diff --git a/configs/templates/config_variance.yaml b/configs/templates/config_variance.yaml
@@ -106,6 +106,7 @@ pitch_prediction_args:
     num_channels: 512
     dropout_rate: 0.0
     use_conditioner_cache: true
+    glu_type: 'atanglu'
 
 variances_prediction_args:
   total_repeat_bins: 48
@@ -120,6 +121,7 @@ variances_prediction_args:
     num_channels: 384
     dropout_rate: 0.0
     use_conditioner_cache: true
+    glu_type: 'atanglu'
 
 lambda_dur_loss: 1.0
 lambda_pitch_loss: 1.0
diff --git a/configs/variance.yaml b/configs/variance.yaml
@@ -72,6 +72,7 @@ pitch_prediction_args:
     num_channels: 512
     dropout_rate: 0.0
     use_conditioner_cache: true
+    glu_type: 'atanglu'
 
 energy_db_min: -96.0
 energy_db_max: -12.0
@@ -96,6 +97,7 @@ variances_prediction_args:
     num_channels: 384
     dropout_rate: 0.0
     use_conditioner_cache: true
+    glu_type: 'atanglu'
 
 lambda_dur_loss: 1.0
 lambda_pitch_loss: 1.0
diff --git a/modules/backbones/lynxnet2.py b/modules/backbones/lynxnet2.py
@@ -2,14 +2,20 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from modules.commons.common_layers import SinusoidalPosEmb, SwiGLU, Transpose
+from modules.commons.common_layers import SinusoidalPosEmb, SwiGLU, ATanGLU, Transpose
 from utils.hparams import hparams
 
 
 class LYNXNet2Block(nn.Module):
-    def __init__(self, dim, expansion_factor, kernel_size=31, dropout=0.):
+    def __init__(self, dim, expansion_factor, kernel_size=31, dropout=0., glu_type='swiglu'):
         super().__init__()
         inner_dim = int(dim * expansion_factor)
+        if glu_type == 'swiglu':
+            _glu = SwiGLU()
+        elif glu_type == 'atanglu':
+            _glu = ATanGLU()
+        else:
+            raise ValueError(f'{glu_type} is not a valid activation')
         if float(dropout) > 0.:
             _dropout = nn.Dropout(dropout)
         else:
@@ -20,9 +26,9 @@ def __init__(self, dim, expansion_factor, kernel_size=31, dropout=0.):
             nn.Conv1d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=dim),
             Transpose((1, 2)),
             nn.Linear(dim, inner_dim * 2),
-            SwiGLU(),
+            _glu,
             nn.Linear(inner_dim, inner_dim * 2),
-            SwiGLU(),
+            _glu,
             nn.Linear(inner_dim, dim),
             _dropout
         )
@@ -33,7 +39,7 @@ def forward(self, x):
 
 class LYNXNet2(nn.Module):
     def __init__(self, in_dims, n_feats, *, num_layers=6, num_channels=512, expansion_factor=1, kernel_size=31,
-                 dropout=0.0, use_conditioner_cache=False):
+                 dropout=0.0, use_conditioner_cache=False, glu_type='swiglu'):
         """
         LYNXNet2(Linear Gated Depthwise Separable Convolution Network Version 2)
         """
@@ -59,7 +65,8 @@ def __init__(self, in_dims, n_feats, *, num_layers=6, num_channels=512, expansio
                     dim=num_channels,
                     expansion_factor=expansion_factor,
                     kernel_size=kernel_size,
-                    dropout=dropout
+                    dropout=dropout,
+                    glu_type=glu_type
                 )
                 for i in range(num_layers)
             ]
diff --git a/modules/commons/common_layers.py b/modules/commons/common_layers.py
@@ -128,6 +128,19 @@ def forward(self, x):
         return out * gate
 
 
+class ATanGLU(nn.Module):
+    # ArcTan-Applies the gated linear unit function.
+    def __init__(self, dim=-1):
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, x):
+        # out, gate = x.chunk(2, dim=self.dim)
+        # Using torch.split instead of chunk for ONNX export compatibility.
+        out, gate = torch.split(x, x.size(self.dim) // 2, dim=self.dim)
+        return out * torch.atan(gate)
+        
+        
 class KaimingNormalConv1d(torch.nn.Conv1d):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -160,6 +173,9 @@ def __init__(self, hidden_size, filter_size, kernel_size=1, dropout=0., act='gel
         elif self.act == 'swiglu':
             self.act_fn = SwiGLU()
             filter_size_1 = filter_size * 2
+        elif self.act == 'atanglu':
+            self.act_fn = ATanGLU()
+            filter_size_1 = filter_size * 2
         else:
             raise ValueError(f'{act} is not a valid activation')
         self.ffn_1 = nn.Conv1d(hidden_size, filter_size_1, kernel_size, padding=kernel_size // 2)