Following the PyTorch documentation, this snippet illustrates how to extend PyTorch by manually adding a linear neural network module. The example includes the linear module as discussed in the documentation and an example application on linearly separable data.
""" Simple example using the operations defined in :mod:`linear`. """ import numpy import torch import linear import random def simple_example(): """ Simple example illustrating forward and backward pass through the linear operation. """ input = torch.autograd.Variable(torch.rand(1, 3), requires_grad = True) weights = torch.autograd.Variable(torch.rand(1, 3)) bias = torch.autograd.Variable(torch.zeros(1)) output = linear.linear(input, weights, bias) output.backward() print(input.grad) class Net(torch.nn.Module): """ Network definition consisting of one fully connected (linear) layer followed by a softmax with two outputs. """ def __init__(self): super(Net, self).__init__() self.fc1 = linear.LinearNN(6, 2, True) def forward(self, x): x = self.fc1(x) return torch.nn.functional.softmax(x) def nn_example(): """ Simple, linearly separable classification example. """ N = 1000 inputs = numpy.zeros((N, 6)) outputs = numpy.zeros((N, 1)) for n in range(N): outputs[n, 0] = random.randint(0, 1) if outputs[n, 0] > 0: inputs[n, 0:3] = 1 else: inputs[n, 3:6] = 1 model = Net() optimizer = torch.optim.SGD(model.parameters(), lr = 0.05) for t in range(100): indices = numpy.arange(N) numpy.random.shuffle(indices) indices = indices[0:10] data = torch.autograd.Variable(torch.Tensor(inputs[indices])) target = torch.autograd.Variable(torch.Tensor(outputs[indices])) pred = model(data) # better pass long to loss: https://discuss.pytorch.org/t/problems-with-target-arrays-of-int-int32-types-in-loss-functions/140 # also target has to be 1D - https://github.com/torch/cutorch/issues/227 loss = torch.nn.functional.nll_loss(pred, target.resize(10).long()) loss.backward() optimizer.step() if t%10 == 0: pred = model(torch.autograd.Variable(torch.Tensor(inputs))) # use pred.data to get from a torch.autograd.Variable to the underlying Tensor accuracy = (pred.data.numpy().argmax(1).reshape((N)) == outputs.reshape((N))).astype(int).sum() / float(N) print(accuracy) if __name__ == '__main__': #simple_example() nn_example()
""" Implementation of a linear operation following http://pytorch.org/docs/notes/extending.html. """ import torch class LinearOp(torch.autograd.Function): """ Linear Operation for PyTorch. """ # bias is an optional argument def forward(self, input, weight, bias = None): self.save_for_backward(input, weight, bias) output = input.mm(weight) if bias is not None: output += bias.repeat(1, input.size()) return output # This function has only a single output, so it gets only one gradient def backward(self, grad_output): # This is a pattern that is very convenient - at the top of backward # unpack saved_tensors and initialize all gradients w.r.t. inputs to # None. Thanks to the fact that additional trailing Nones are # ignored, the return statement is simple even when the function has # optional inputs. input, weight, bias = self.saved_tensors grad_input = grad_weight = grad_bias = None # These needs_input_grad checks are optional and there only to # improve efficiency. If you want to make your code simpler, you can # skip them. Returning gradients for inputs that don't require it is # not an error. if self.needs_input_grad: grad_input = grad_output.mm(weight) if self.needs_input_grad: grad_weight = grad_output.t().mm(input) if bias is not None and self.needs_input_grad: grad_bias = grad_output.sum(0).squeeze(0) return grad_input, grad_weight, grad_bias def linear(input, weight, bias = None): """ Interface to the newly created operation. """ return LinearOp()(input, weight, bias) class LinearNN(torch.nn.Module): def __init__(self, input_features, output_features, bias = True): torch.nn.Module.__init__(self) self.input_features = input_features self.output_features = output_features # nn.Parameter is a special kind of Variable, that will get # automatically registered as Module's parameter once it's assigned # as an attribute. Parameters and buffers need to be registered, or # they won't appear in .parameters() (doesn't apply to buffers), and # won't be converted when e.g. .cuda() is called. You can use # .register_buffer() to register buffers. # nn.Parameters can never be volatile and, different than Variables, # they require gradients by default. self.weight = torch.nn.Parameter(torch.Tensor(input_features, output_features)) if bias is not None: self.bias = torch.nn.Parameter(torch.Tensor(output_features)) else: # You should always register all possible parameters, but the # optional ones can be None if you want. self.register_parameter('bias', None) # Not a very smart way to initialize weights self.weight.data.uniform_(-0.1, 0.1) if bias is not None: self.bias.data.uniform_(-0.1, 0.1) def forward(self, input): # See the autograd section for explanation of what happens here. return LinearOp()(input, self.weight, self.bias)
What is your opinion on the code snippet? Is it working? Let me know your thoughts in the comments below or using the following platforms: