IAM

OPENSOURCEFAN STUDYING
STUDYINGCOMPUTERSCIENCEANDMATH COMPUTERSCIENCE

Check out the latest superpixel benchmark — Superpixel Benchmark (2016) — and let me know your opinion! @david_stutz
17thFEBRUARY2017

SNIPPET

Following the PyTorch documentation, this snippet illustrates how to extend PyTorch by manually adding a linear neural network module. The example includes the linear module as discussed in the documentation and an example application on linearly separable data.

main.py
"""
Simple example using the operations defined in :mod:`linear`.
"""

import numpy
import torch
import linear
import random

def simple_example():
    """
    Simple example illustrating forward and backward pass through the linear operation.
    """

    input = torch.autograd.Variable(torch.rand(1, 3), requires_grad = True)
    weights = torch.autograd.Variable(torch.rand(1, 3))
    bias = torch.autograd.Variable(torch.zeros(1))

    output = linear.linear(input, weights, bias)
    output.backward()
    print(input.grad)

class Net(torch.nn.Module):
    """
    Network definition consisting of one fully connected (linear) layer followed by a softmax with two outputs.
    """

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = linear.LinearNN(6, 2, True)

    def forward(self, x):
        x = self.fc1(x)
        return torch.nn.functional.softmax(x)

def nn_example():
    """
    Simple, linearly separable classification example.
    """

    N = 1000
    inputs = numpy.zeros((N, 6))
    outputs = numpy.zeros((N, 1))
    for n in range(N):
        outputs[n, 0] = random.randint(0, 1)
        if outputs[n, 0] > 0:
            inputs[n, 0:3] = 1
        else:
            inputs[n, 3:6] = 1

    model = Net()
    optimizer = torch.optim.SGD(model.parameters(), lr = 0.05)

    for t in range(100):
        indices = numpy.arange(N)
        numpy.random.shuffle(indices)
        indices = indices[0:10]

        data = torch.autograd.Variable(torch.Tensor(inputs[indices]))
        target = torch.autograd.Variable(torch.Tensor(outputs[indices]))
        pred = model(data)
        # better pass long to loss: https://discuss.pytorch.org/t/problems-with-target-arrays-of-int-int32-types-in-loss-functions/140
        # also target has to be 1D - https://github.com/torch/cutorch/issues/227
        loss = torch.nn.functional.nll_loss(pred, target.resize(10).long())
        loss.backward()
        optimizer.step()

        if t%10 == 0:
            pred = model(torch.autograd.Variable(torch.Tensor(inputs)))
            # use pred.data to get from a torch.autograd.Variable to the underlying Tensor
            accuracy = (pred.data.numpy().argmax(1).reshape((N)) == outputs.reshape((N))).astype(int).sum() / float(N)
            print(accuracy)

if __name__ == '__main__':
    #simple_example()
    nn_example()
linear.py
"""
Implementation of a linear operation following http://pytorch.org/docs/notes/extending.html.
"""

import torch

class LinearOp(torch.autograd.Function):
    """
    Linear Operation for PyTorch.
    """

    # bias is an optional argument
    def forward(self, input, weight, bias = None):
        self.save_for_backward(input, weight, bias)
        output = input.mm(weight)
        if bias is not None:
            output += bias.repeat(1, input.size()[0])
        return output

    # This function has only a single output, so it gets only one gradient
    def backward(self, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias = self.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.
        if self.needs_input_grad[0]:
            grad_input = grad_output.mm(weight)
        if self.needs_input_grad[1]:
            grad_weight = grad_output.t().mm(input)
        if bias is not None and self.needs_input_grad[2]:
            grad_bias = grad_output.sum(0).squeeze(0)

        return grad_input, grad_weight, grad_bias

def linear(input, weight, bias = None):
    """
    Interface to the newly created operation.
    """

    return LinearOp()(input, weight, bias)

class LinearNN(torch.nn.Module):
    def __init__(self, input_features, output_features, bias = True):
        torch.nn.Module.__init__(self)

        self.input_features = input_features
        self.output_features = output_features

        # nn.Parameter is a special kind of Variable, that will get
        # automatically registered as Module's parameter once it's assigned
        # as an attribute. Parameters and buffers need to be registered, or
        # they won't appear in .parameters() (doesn't apply to buffers), and
        # won't be converted when e.g. .cuda() is called. You can use
        # .register_buffer() to register buffers.
        # nn.Parameters can never be volatile and, different than Variables,
        # they require gradients by default.
        self.weight = torch.nn.Parameter(torch.Tensor(input_features, output_features))
        if bias is not None:
            self.bias = torch.nn.Parameter(torch.Tensor(output_features))
        else:
            # You should always register all possible parameters, but the
            # optional ones can be None if you want.
            self.register_parameter('bias', None)

        # Not a very smart way to initialize weights
        self.weight.data.uniform_(-0.1, 0.1)
        if bias is not None:
            self.bias.data.uniform_(-0.1, 0.1)

    def forward(self, input):
        # See the autograd section for explanation of what happens here.
        return LinearOp()(input, self.weight, self.bias)

What is your opinion on the code snippet? Is it working? Let me know your thoughts in the comments below or using the following platforms: