Added different activation functions and updated deep network codes.

Olcay Taner YILDIZ · Olcay Taner YILDIZ · commit 7d2ed638bb11 · 2021-08-11T23:02:20.000+03:00
diff --git a/Classification/Model/AutoEncoderModel.py b/Classification/Model/AutoEncoderModel.py
@@ -4,6 +4,7 @@
 from Classification.Instance.Instance import Instance
 from Classification.InstanceList.InstanceList import InstanceList
 from Classification.Model.NeuralNetworkModel import NeuralNetworkModel
+from Classification.Parameter.ActivationFunction import ActivationFunction
 from Classification.Parameter.MultiLayerPerceptronParameter import MultiLayerPerceptronParameter
 import copy
 
@@ -46,7 +47,7 @@ def __init__(self, trainSet: InstanceList, validationSet: InstanceList, paramete
             for j in range(trainSet.size()):
                 self.createInputVector(trainSet.get(j))
                 self.r = trainSet.get(j).toVector()
-                hidden = self.calculateHidden(self.x, self.__W)
+                hidden = self.calculateHidden(self.x, self.__W, ActivationFunction.SIGMOID)
                 hiddenBiased = hidden.biased()
                 self.y = self.__V.multiplyWithVectorFromRight(hiddenBiased)
                 rMinusY = self.r.difference(self.y)
@@ -121,11 +122,11 @@ def __predictInput(self, instance: Instance) -> Vector:
             Predicted value.
         """
         self.createInputVector(instance)
-        self.calculateForwardSingleHiddenLayer(self.__W, self.__V)
+        self.calculateForwardSingleHiddenLayer(self.__W, self.__V, ActivationFunction.SIGMOID)
         return self.y
 
     def calculateOutput(self):
         """
         The calculateOutput method calculates a forward single hidden layer.
         """
-        self.calculateForwardSingleHiddenLayer(self.__W, self.__V)
+        self.calculateForwardSingleHiddenLayer(self.__W, self.__V, ActivationFunction.SIGMOID)
diff --git a/Classification/Model/DeepNetworkModel.py b/Classification/Model/DeepNetworkModel.py
@@ -1,7 +1,9 @@
 from Classification.InstanceList.InstanceList import InstanceList
 from Classification.Model.NeuralNetworkModel import NeuralNetworkModel
+from Classification.Parameter.ActivationFunction import ActivationFunction
 from Classification.Parameter.DeepNetworkParameter import DeepNetworkParameter
 from Math.Matrix import Matrix
+from Math.Vector import Vector
 import copy
 
 from Classification.Performance.ClassificationPerformance import ClassificationPerformance
@@ -11,6 +13,7 @@ class DeepNetworkModel(NeuralNetworkModel):
 
     __weights: list
     __hiddenLayerSize: int
+    __activationFunction: ActivationFunction
 
     def __init__(self, trainSet: InstanceList, validationSet: InstanceList, parameters: DeepNetworkParameter):
         """
@@ -36,6 +39,7 @@ def __init__(self, trainSet: InstanceList, validationSet: InstanceList, paramete
         deltaWeights = []
         hidden = []
         hiddenBiased = []
+        self.__activationFunction = parameters.getActivationFunction()
         self.__allocateWeights(parameters)
         bestWeights = self.__setBestWeights()
         bestClassificationPerformance = ClassificationPerformance(0.0)
@@ -50,18 +54,30 @@ def __init__(self, trainSet: InstanceList, validationSet: InstanceList, paramete
                 deltaWeights.clear()
                 for k in range(self.__hiddenLayerSize):
                     if k == 0:
-                        hidden.append(self.calculateHidden(self.x, self.__weights[k]))
+                        hidden.append(self.calculateHidden(self.x, self.__weights[k], self.__activationFunction))
                     else:
-                        hidden.append(self.calculateHidden(hiddenBiased[k - 1], self.__weights[k]))
+                        hidden.append(self.calculateHidden(hiddenBiased[k - 1], self.__weights[k], self.__activationFunction))
                     hiddenBiased.append(hidden[k].biased())
                 rMinusY = self.calculateRMinusY(trainSet.get(j), hiddenBiased[self.__hiddenLayerSize - 1],
                                                 self.__weights[len(self.__weights) - 1])
                 deltaWeights.insert(0, Matrix(rMinusY, hiddenBiased[self.__hiddenLayerSize - 1]))
                 for k in range(len(self.__weights) - 2, -1, -1):
-                    oneMinusHidden = self.calculateOneMinusHidden(hidden[k])
-                    tmph = deltaWeights[0].elementProduct(self.__weights[k + 1]).sumOfRows()
+                    if k == len(self.__weights) - 2:
+                        tmph = self.__weights[k + 1].multiplyWithVectorFromLeft(rMinusY)
+                    else:
+                        tmph = self.__weights[k + 1].multiplyWithVectorFromLeft(tmpHidden)
                     tmph.remove(0)
-                    tmpHidden = oneMinusHidden.elementProduct(tmph)
+                    if self.__activationFunction == ActivationFunction.SIGMOID:
+                        oneMinusHidden = self.calculateOneMinusHidden(hidden[k])
+                        activationDerivative = oneMinusHidden.elementProduct(hidden[k])
+                    elif self.__activationFunction == ActivationFunction.TANH:
+                        one = Vector(hidden[k].size(), 1.0)
+                        hidden[k].tanh()
+                        activationDerivative = one.difference(hidden[k].elementProduct(hidden[k]))
+                    elif self.__activationFunction == ActivationFunction.RELU:
+                        hidden[k].reluDerivative()
+                        activationDerivative = hidden
+                    tmpHidden = tmph.elementProduct(activationDerivative)
                     if k == 0:
                         deltaWeights.insert(0, Matrix(tmpHidden, self.x))
                     else:
@@ -121,8 +137,8 @@ def calculateOutput(self):
         hiddenBiased = None
         for i in range(len(self.__weights) - 1):
             if i == 0:
-                hidden = self.calculateHidden(self.x, self.__weights[i])
+                hidden = self.calculateHidden(self.x, self.__weights[i], self.__activationFunction)
             else:
-                hidden = self.calculateHidden(hiddenBiased, self.__weights[i])
+                hidden = self.calculateHidden(hiddenBiased, self.__weights[i], self.__activationFunction)
             hiddenBiased = hidden.biased()
         self.y = self.__weights[len(self.__weights) - 1].multiplyWithVectorFromRight(hiddenBiased)
diff --git a/Classification/Model/MultiLayerPerceptronModel.py b/Classification/Model/MultiLayerPerceptronModel.py
@@ -1,7 +1,9 @@
 from Math.Matrix import Matrix
+from Math.Vector import Vector
 
 from Classification.InstanceList.InstanceList import InstanceList
 from Classification.Model.LinearPerceptronModel import LinearPerceptronModel
+from Classification.Parameter.ActivationFunction import ActivationFunction
 from Classification.Parameter.MultiLayerPerceptronParameter import MultiLayerPerceptronParameter
 import copy
 
@@ -11,6 +13,7 @@
 class MultiLayerPerceptronModel(LinearPerceptronModel):
 
     __V: Matrix
+    __activationFunction: ActivationFunction
 
     def __allocateWeights(self, H: int, seed: int):
         """
@@ -42,6 +45,7 @@ def __init__(self, trainSet: InstanceList, validationSet: InstanceList, paramete
             hiddenNodes.
         """
         super().initWithTrainSet(trainSet)
+        self.__activationFunction = parameters.getActivationFunction()
         self.__allocateWeights(parameters.getHiddenNodes(), parameters.getSeed())
         bestW = copy.deepcopy(self.W)
         bestV = copy.deepcopy(self.__V)
@@ -52,14 +56,23 @@ def __init__(self, trainSet: InstanceList, validationSet: InstanceList, paramete
             trainSet.shuffle(parameters.getSeed())
             for j in range(trainSet.size()):
                 self.createInputVector(trainSet.get(j))
-                hidden = self.calculateHidden(self.x, self.W)
+                hidden = self.calculateHidden(self.x, self.W, self.__activationFunction)
                 hiddenBiased = hidden.biased()
                 rMinusY = self.calculateRMinusY(trainSet.get(j), hiddenBiased, self.__V)
                 deltaV = Matrix(rMinusY, hiddenBiased)
-                oneMinusHidden = self.calculateOneMinusHidden(hidden)
                 tmph = self.__V.multiplyWithVectorFromLeft(rMinusY)
                 tmph.remove(0)
-                tmpHidden = oneMinusHidden.elementProduct(hidden.elementProduct(tmph))
+                if self.__activationFunction == ActivationFunction.SIGMOID:
+                    oneMinusHidden = self.calculateOneMinusHidden(hidden)
+                    activationDerivative = oneMinusHidden.elementProduct(hidden)
+                elif self.__activationFunction == ActivationFunction.TANH:
+                    one = Vector(hidden.size(), 1.0)
+                    hidden.tanh()
+                    activationDerivative = one.difference(hidden.elementProduct(hidden))
+                elif self.__activationFunction == ActivationFunction.RELU:
+                    hidden.reluDerivative()
+                    activationDerivative = hidden
+                tmpHidden = tmph.elementProduct(activationDerivative)
                 deltaW = Matrix(tmpHidden, self.x)
                 deltaV.multiplyWithConstant(learningRate)
                 self.__V.add(deltaV)
@@ -78,4 +91,4 @@ def calculateOutput(self):
         """
         The calculateOutput method calculates the forward single hidden layer by using Matrices W and V.
         """
-        self.calculateForwardSingleHiddenLayer(self.W, self.__V)
+        self.calculateForwardSingleHiddenLayer(self.W, self.__V, self.__activationFunction)
diff --git a/Classification/Model/NeuralNetworkModel.py b/Classification/Model/NeuralNetworkModel.py
@@ -10,6 +10,8 @@
 
 import math
 
+from Classification.Parameter.ActivationFunction import ActivationFunction
+
 
 class NeuralNetworkModel(ValidatedModel):
     classLabels: list
@@ -99,7 +101,7 @@ def createInputVector(self, instance: Instance):
         self.x = instance.toVector()
         self.x.insert(0, 1.0)
 
-    def calculateHidden(self, input: Vector, weights: Matrix) -> Vector:
+    def calculateHidden(self, input: Vector, weights: Matrix, activationFunction: ActivationFunction) -> Vector:
         """
         The calculateHidden method takes a {@link Vector} input and {@link Matrix} weights, It multiplies the weights
         Matrix with given input Vector than applies the sigmoid function and returns the result.
@@ -110,14 +112,21 @@ def calculateHidden(self, input: Vector, weights: Matrix) -> Vector:
             Vector to multiply weights.
         weights : Matrix
             Matrix is multiplied with input Vector.
+        activationFunction : ActivationFunction
+            Activation function
 
         RETURNS
         -------
         Vector
             Result of sigmoid function.
         """
         z = weights.multiplyWithVectorFromRight(input)
-        z.sigmoid()
+        if activationFunction == ActivationFunction.SIGMOID:
+            z.sigmoid()
+        elif activationFunction == ActivationFunction.TANH:
+            z.tanh()
+        elif activationFunction == ActivationFunction.RELU:
+            z.relu()
         return z
 
     def calculateOneMinusHidden(self, hidden: Vector) -> Vector:
@@ -139,7 +148,7 @@ def calculateOneMinusHidden(self, hidden: Vector) -> Vector:
         one.initAllSame(hidden.size(), 1.0)
         return one.difference(hidden)
 
-    def calculateForwardSingleHiddenLayer(self, W: Matrix, V: Matrix):
+    def calculateForwardSingleHiddenLayer(self, W: Matrix, V: Matrix, activationFunction: ActivationFunction):
         """
         The calculateForwardSingleHiddenLayer method takes two matrices W and V. First it multiplies W with x, then
         multiplies V with the result of the previous multiplication.
@@ -150,8 +159,10 @@ def calculateForwardSingleHiddenLayer(self, W: Matrix, V: Matrix):
             Matrix to multiply with x.
         V : Matrix
             Matrix to multiply.
+        activationFunction : ActivationFunction
+            Activation function
         """
-        hidden = self.calculateHidden(self.x, W)
+        hidden = self.calculateHidden(self.x, W, activationFunction)
         hiddenBiased = hidden.biased()
         self.y = V.multiplyWithVectorFromRight(hiddenBiased)
 
diff --git a/Classification/Parameter/ActivationFunction.py b/Classification/Parameter/ActivationFunction.py
@@ -0,0 +1,8 @@
+from enum import Enum, auto
+
+
+class ActivationFunction(Enum):
+
+    SIGMOID = auto()
+    TANH = auto()
+    RELU = auto()
diff --git a/Classification/Parameter/DeepNetworkParameter.py b/Classification/Parameter/DeepNetworkParameter.py
@@ -1,11 +1,14 @@
+from Classification.Parameter.ActivationFunction import ActivationFunction
 from Classification.Parameter.LinearPerceptronParameter import LinearPerceptronParameter
 
 
 class DeepNetworkParameter(LinearPerceptronParameter):
 
     __hiddenLayers: list
+    __activationFunction: ActivationFunction
 
-    def __init__(self, seed: int, learningRate: float, etaDecrease: float, crossValidationRatio: float, epoch: int, hiddenLayers: list):
+    def __init__(self, seed: int, learningRate: float, etaDecrease: float, crossValidationRatio: float, epoch: int,
+                 hiddenLayers: list, activationFunction: ActivationFunction):
         """
         Parameters of the deep network classifier.
 
@@ -23,9 +26,12 @@ def __init__(self, seed: int, learningRate: float, etaDecrease: float, crossVali
             Integer value for epoch number of the algorithm.
         hiddenLayers : list
             An integer list for hidden layers of the algorithm.
+        activationFunction : ActivationFunction
+            Activation function.
         """
         super().__init__(seed, learningRate, etaDecrease, crossValidationRatio, epoch)
         self.__hiddenLayers = hiddenLayers
+        self.__activationFunction = activationFunction
 
     def layerSize(self) -> int:
         """
@@ -54,3 +60,14 @@ def getHiddenNodes(self, layerIndex: int) -> int:
             The element at the layerIndex of hiddenLayers list.
         """
         return self.__hiddenLayers[layerIndex]
+
+    def getActivationFunction(self) -> ActivationFunction:
+        """
+        Accessor for the activationFunction.
+
+        RETURNS
+        -------
+        int
+            The activation function.
+        """
+        return self.__activationFunction
diff --git a/Classification/Parameter/MultiLayerPerceptronParameter.py b/Classification/Parameter/MultiLayerPerceptronParameter.py
@@ -1,12 +1,14 @@
+from Classification.Parameter.ActivationFunction import ActivationFunction
 from Classification.Parameter.LinearPerceptronParameter import LinearPerceptronParameter
 
 
 class MultiLayerPerceptronParameter(LinearPerceptronParameter):
 
     __hiddenNodes: int
+    __activationFunction: ActivationFunction
 
     def __init__(self, seed: int, learningRate: float, etaDecrease: float, crossValidationRatio: float, epoch: int,
-                 hiddenNodes: int):
+                 hiddenNodes: int, activationFunction: ActivationFunction):
         """
         Parameters of the multi layer perceptron algorithm.
 
@@ -24,9 +26,12 @@ def __init__(self, seed: int, learningRate: float, etaDecrease: float, crossVali
             Integer value for epoch number of the algorithm.
         hiddenNodes : int
             Integer value for the number of hidden nodes.
+        activationFunction : ActivationFunction
+            Activation function.
         """
         super().__init__(seed, learningRate, etaDecrease, crossValidationRatio, epoch)
         self.__hiddenNodes = hiddenNodes
+        self.__activationFunction = activationFunction
 
     def getHiddenNodes(self) -> int:
         """
@@ -38,3 +43,14 @@ def getHiddenNodes(self) -> int:
             The hiddenNodes.
         """
         return self.__hiddenNodes
+
+    def getActivationFunction(self) -> ActivationFunction:
+        """
+        Accessor for the activationFunction.
+
+        RETURNS
+        -------
+        int
+            The activation function.
+        """
+        return self.__activationFunction
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='NlpToolkit-Classification',
-    version='1.0.9',
+    version='1.0.10',
     packages=['Classification', 'Classification.Model', 'Classification.Model.DecisionTree', 'Classification.Filter',
               'Classification.DataSet', 'Classification.Instance', 'Classification.Attribute',
               'Classification.Parameter', 'Classification.Classifier', 'Classification.Experiment',
diff --git a/test/Classifier/DeepNetworkTest.py b/test/Classifier/DeepNetworkTest.py
@@ -1,6 +1,7 @@
 import unittest
 
 from Classification.Classifier.DeepNetwork import DeepNetwork
+from Classification.Parameter.ActivationFunction import ActivationFunction
 from Classification.Parameter.DeepNetworkParameter import DeepNetworkParameter
 from test.Classifier.ClassifierTest import ClassifierTest
 
@@ -9,13 +10,13 @@ class DeepNetworkTest(ClassifierTest):
 
     def test_Train(self):
         deepNetwork = DeepNetwork()
-        deepNetworkParameter = DeepNetworkParameter(1, 0.1, 0.99, 0.2, 100, [5, 5])
+        deepNetworkParameter = DeepNetworkParameter(1, 0.1, 0.99, 0.2, 100, [5, 5], ActivationFunction.SIGMOID)
         deepNetwork.train(self.iris.getInstanceList(), deepNetworkParameter)
         self.assertAlmostEqual(4.00, 100 * deepNetwork.test(self.iris.getInstanceList()).getErrorRate(), 2)
-        deepNetworkParameter = DeepNetworkParameter(1, 0.01, 0.99, 0.2, 100, [15, 15])
+        deepNetworkParameter = DeepNetworkParameter(1, 0.01, 0.99, 0.2, 100, [15, 15], ActivationFunction.SIGMOID)
         deepNetwork.train(self.bupa.getInstanceList(), deepNetworkParameter)
         self.assertAlmostEqual(28.12, 100 * deepNetwork.test(self.bupa.getInstanceList()).getErrorRate(), 2)
-        deepNetworkParameter = DeepNetworkParameter(1, 0.01, 0.99, 0.2, 100, [20])
+        deepNetworkParameter = DeepNetworkParameter(1, 0.01, 0.99, 0.2, 100, [20], ActivationFunction.SIGMOID)
         deepNetwork.train(self.dermatology.getInstanceList(), deepNetworkParameter)
         self.assertAlmostEqual(3.55, 100 * deepNetwork.test(self.dermatology.getInstanceList()).getErrorRate(), 2)
 
diff --git a/test/Classifier/MultiLayerPerceptronTest.py b/test/Classifier/MultiLayerPerceptronTest.py
@@ -1,6 +1,7 @@
 import unittest
 
 from Classification.Classifier.MultiLayerPerceptron import MultiLayerPerceptron
+from Classification.Parameter.ActivationFunction import ActivationFunction
 from Classification.Parameter.MultiLayerPerceptronParameter import MultiLayerPerceptronParameter
 from test.Classifier.ClassifierTest import ClassifierTest
 
@@ -9,13 +10,13 @@ class MultiLayerPerceptronTest(ClassifierTest):
 
     def test_Train(self):
         multiLayerPerceptron = MultiLayerPerceptron()
-        multiLayerPerceptronParameter = MultiLayerPerceptronParameter(1, 0.1, 0.99, 0.2, 100, 3)
+        multiLayerPerceptronParameter = MultiLayerPerceptronParameter(1, 0.1, 0.99, 0.2, 100, 3, ActivationFunction.SIGMOID)
         multiLayerPerceptron.train(self.iris.getInstanceList(), multiLayerPerceptronParameter)
         self.assertAlmostEqual(2.67, 100 * multiLayerPerceptron.test(self.iris.getInstanceList()).getErrorRate(), 2)
-        multiLayerPerceptronParameter = MultiLayerPerceptronParameter(1, 0.01, 0.99, 0.2, 100, 30)
+        multiLayerPerceptronParameter = MultiLayerPerceptronParameter(1, 0.01, 0.99, 0.2, 100, 30, ActivationFunction.SIGMOID)
         multiLayerPerceptron.train(self.bupa.getInstanceList(), multiLayerPerceptronParameter)
-        self.assertAlmostEqual(29.86, 100 * multiLayerPerceptron.test(self.bupa.getInstanceList()).getErrorRate(), 2)
-        multiLayerPerceptronParameter = MultiLayerPerceptronParameter(1, 0.01, 0.99, 0.2, 100, 20)
+        self.assertAlmostEqual(30.72, 100 * multiLayerPerceptron.test(self.bupa.getInstanceList()).getErrorRate(), 2)
+        multiLayerPerceptronParameter = MultiLayerPerceptronParameter(1, 0.01, 0.99, 0.2, 100, 20, ActivationFunction.SIGMOID)
         multiLayerPerceptron.train(self.dermatology.getInstanceList(), multiLayerPerceptronParameter)
         self.assertAlmostEqual(3.55, 100 * multiLayerPerceptron.test(self.dermatology.getInstanceList()).getErrorRate(), 2)