I'm coding Neural Network with Pytorch but i have a prolem. Pytorch does not update weight after optimizer.step().
This is my code:
Neuralnetwork.py
import torchclass NeuralNetwork(torch.nn.Module): def __init__(self, size_of_hidden_layer:tuple=(5, 5) ) -> None: super().__init__() self.number_of_hidden_layers = size_of_hidden_layer[0] self.number_of_node_each_hidden_layers = size_of_hidden_layer[1] self.hidden_layers = None self.setActFunc('relu') def fit(self, X, y): if self.hidden_layers != None: return self.hidden_layers = torch.nn.ModuleList() self.hidden_layers.append(torch.nn.Linear(in_features=X.shape[1], out_features=self.number_of_node_each_hidden_layers)) for _ in range(self.number_of_hidden_layers-1): self.hidden_layers.append(torch.nn.Linear(in_features=self.number_of_node_each_hidden_layers, out_features=self.number_of_node_each_hidden_layers)) self.output = torch.nn.Linear(in_features=self.number_of_node_each_hidden_layers, out_features=y.shape[1]) def setActFunc(self, act_func:str): if act_func == 'relu': self.activation = torch.nn.ReLU() if act_func == 'leaky': self.activation = torch.nn.LeakyReLU() if act_func == 'sigmoid': self.activation = torch.nn.Sigmoid() def forward(self, X): A = X for layer in self.hidden_layers: Z = layer(A) A = self.activation(Z) Z = self.output(A) y = torch.nn.Softmax(dim=1)(Z) return (y >= 0.5) * 1.0
Trainer.py
import torchfrom torch.nn import Module, MSELossfrom torch.optim import SGD, Adamclass Trainer(): def __init__(self) -> None: pass def train(self, model:Module, X:torch.Tensor, y:torch.Tensor, epochs=10, lr=0.01, act_func='relu'): train_model = model train_model.setActFunc(act_func) train_model.fit(X, y) optimizer = Adam(params=train_model.parameters(), lr=lr) loss_func = MSELoss() for epoch in range(epochs): train_model.train() optimizer.zero_grad() pred = train_model(X) loss = loss_func(pred.squeeze(), y.squeeze()) loss.requires_grad = True loss.backward() optimizer.step()
main.py
import torchimport NeuralNetwork as NNimport Trainerimport numpy as npif __name__ == '__main__': torch.random.manual_seed(42) # Create data sample = 10 feature = 100 X = torch.rand(size=(sample, feature), requires_grad=True) * 10 y = (torch.rand(size=(sample, 1), requires_grad=True) >= 0.5).type(torch.float) # Check if model work model = NN.NeuralNetwork(size_of_hidden_layer=(1, 15)) model.fit(X, y) # Check weight before train print(model.state_dict()) # Train model trainer = Trainer.Trainer() trainer.train(model, X=X, y=y, epochs=1000) # Check weight after train print(model.state_dict())
When i run the file main.py, then the result is:
Before train:OrderedDict([('hidden_layers.0.weight', tensor([[ 0.0260, -0.0809, -0.0605, ..., 0.0265, -0.0153, 0.0411],[-0.0417, -0.0943, -0.0384, ..., -0.0058, 0.0766, 0.0625],[ 0.0919, -0.0732, 0.0643, ..., -0.0694, 0.0010, 0.0003],...,[ 0.0034, 0.0239, -0.0124, ..., -0.0813, -0.0271, 0.0883],[ 0.0391, -0.0119, 0.0206, ..., -0.0621, -0.0815, 0.0218],[-0.0551, 0.0284, 0.0847, ..., -0.0059, 0.0009, -0.0260]])), ('hidden_layers.0.bias', tensor([-0.0683, 0.0907, -0.0331, 0.0477, 0.0696, -0.0657, -0.0125, 0.0578,0.0969, -0.0126, 0.0725, -0.0785, -0.0650, 0.0822, -0.0095])), ('output.weight', tensor([[ 0.2551, 0.0188, -0.1071, 0.2452, 0.2258, 0.0240, -0.1565, -0.0112,-0.1242, 0.1881, 0.0120, 0.0544, 0.1566, 0.0855, -0.1612]])), ('output.bias', tensor([0.2145]))])
After train:OrderedDict([('hidden_layers.0.weight', tensor([[ 0.0260, -0.0809, -0.0605, ..., 0.0265, -0.0153, 0.0411],[-0.0417, -0.0943, -0.0384, ..., -0.0058, 0.0766, 0.0625],[ 0.0919, -0.0732, 0.0643, ..., -0.0694, 0.0010, 0.0003],...,[ 0.0034, 0.0239, -0.0124, ..., -0.0813, -0.0271, 0.0883],[ 0.0391, -0.0119, 0.0206, ..., -0.0621, -0.0815, 0.0218],[-0.0551, 0.0284, 0.0847, ..., -0.0059, 0.0009, -0.0260]])), ('hidden_layers.0.bias', tensor([-0.0683, 0.0907, -0.0331, 0.0477, 0.0696, -0.0657, -0.0125, 0.0578,0.0969, -0.0126, 0.0725, -0.0785, -0.0650, 0.0822, -0.0095])), ('output.weight', tensor([[ 0.2551, 0.0188, -0.1071, 0.2452, 0.2258, 0.0240, -0.1565, -0.0112,-0.1242, 0.1881, 0.0120, 0.0544, 0.1566, 0.0855, -0.1612]])), ('output.bias', tensor([0.2145]))])