# Part 1: calculate gradients

There are two ways of getting gradients:

Backward

`x=torch.tensor([3.0], requires_grad=True)y = torch.pow(x, 2) # y=x**2y.backward(retain_graph=True)print(x.grad)6`

`x=torch.tensor([3.0], requires_grad=True)y = torch.pow(x, 2)grad_1 = torch.autograd.grad(y, x, create_graph=True)print(grad_1[0].item())`

# Part 2: Note

(1) gradient will not be cleared unless explicitly cleared

`w = torch.tensor([1.], requires_grad=True)x = torch.tensor([2.], requires_grad=True)     for i in range(4):        a = torch.add(w, x)        b = torch.add(w, 1)        y = torch.mul(a, b)        y.backward()        print(w.grad)        w.grad.zero_()`

`import torchw = torch.tensor([1.], requires_grad=True)x = torch.tensor([2.], requires_grad=True)# y=(x+w)*(w+1)a = torch.add(w, x)     # retain_grad()b = torch.add(w, 1)y = torch.mul(a, b) y.backward() print("is_leaf:\n", w.is_leaf, x.is_leaf,       a.is_leaf, b.is_leaf, y.is_leaf)print("gradient:\n", w.grad, x.grad, a.grad, b.grad, y.grad)print("w.grad_fn = ", w.grad_fn)print("x.grad_fn = ", x.grad_fn)print("a.grad_fn = ", a.grad_fn)print("b.grad_fn = ", b.grad_fn)print("y.grad_fn = ", y.grad_fn)`

The result is

`is_leaf: True True False False Falsegradient: tensor([5.]) tensor([2.]) None None Nonew.grad_fn =  Nonex.grad_fn =  Nonea.grad_fn =  <AddBackward0 object at 0x7fe63c2edc40>b.grad_fn =  <AddBackward0 object at 0x7fe63c2edaf0>y.grad_fn =  <MulBackward0 object at 0x7fe63c2edc40>`

In the computation graph, if the variable is leaf, then its gradient makes sense, but it does not contain gradient function `grad_fun` . When the variable is not a leaf, we can check its gradient function.

(3) Two ways of disabling gradient calculation

Method 1

`z = torch.matmul(x, w)+bz_det = z.detach()print(z_det.requires_grad) # False`

Method 2

`z = torch.matmul(x, w)+bprint(z.requires_grad) #Truewith torch.no_grad():   z = torch.matmul(x, w)+bprint(z.requires_grad) # False`

# 3.1 Regression

`import torchimport matplotlib.pyplot as plttorch.manual_seed(10)lr = 0.05x = torch.rand(20, 1) * 10  y = 2*x + (5 + torch.randn(20, 1))w = torch.randn((1), requires_grad=True) b = torch.zeros((1), requires_grad=True)for iteration in range(100):     wx = torch.mul(w, x)    y_pred = torch.add(wx, b)    loss = (0.5 * (y - y_pred) ** 2).mean()     loss.backward()    b.data.sub_(lr * b.grad)    w.data.sub_(lr * w.grad)    w.grad.zero_()    b.grad.zero_()     plt.scatter(x.data.numpy(), y.data.numpy())    plt.plot(x.data.numpy(), y_pred.data.numpy(), 'r-',                  lw=5)           if loss.data.numpy() < 1:            break`

see regression code.

# 3.2 Logistic Regression

`import torchimport torch.nn as nnimport matplotlib.pyplot as pltimport numpy as nptorch.manual_seed(10)sample_nums = 100mean_value = 1.7bias = 5n_data = torch.ones(sample_nums, 2) x0 = torch.normal(mean_value * n_data, 1) + bias      y0 = torch.zeros(sample_nums)                      x1 = torch.normal(-mean_value * n_data, 1) + bias      y1 = torch.ones(sample_nums)                         train_x = torch.cat((x0, x1), 0)train_y = torch.cat((y0, y1), 0)class LR(nn.Module):    def __init__(self):        super(LR, self).__init__()        self.features = nn.Linear(2, 1)        self.sigmoid = nn.Sigmoid()def forward(self, x):        x = self.features(x)        x = self.sigmoid(x)        return xlr_net = LR()loss_fn = nn.BCELoss()lr = 0.01   optimizer = torch.optim.SGD(lr_net.parameters(), lr=lr, momentum=0.9)print(lr_net.features.bias)for iteration in range(100):    lr_net.train()        y_pred = lr_net(train_x)      loss = loss_fn(y_pred.squeeze(), train_y)        loss.backward()        optimizer.step()     optimizer.zero_grad()        if iteration % 20 == 0:        mask = y_pred.ge(0.5).float().squeeze()           correct = (mask == train_y).sum()         acc = correct.item() / train_y.size(0)        plt.scatter(x0.data.numpy()[:, 0], x0.data.numpy()[:, 1], c='r', label='class 0')        plt.scatter(x1.data.numpy()[:, 0], x1.data.numpy()[:, 1], c='b', label='class 1')        w0, w1 = lr_net.features.weight[0]        w0, w1 = float(w0.item()), float(w1.item())        plot_b = float(lr_net.features.bias[0].item())        plot_x = np.arange(-6, 6, 0.1)        plot_y = (-w0 * plot_x - plot_b) / w1        plt.xlim(-10, 10)        plt.ylim(-10, 10)        plt.plot(plot_x, plot_y)         plt.legend()                plt.show()        plt.pause(0.5)                if acc > 0.99:            break`

# Part 4: Jacobian Product

In many cases, we have a scalar loss function, and we need to compute the gradient with respect to some parameters. However, there are cases when the output function is an arbitrary tensor. In this case, PyTorch allows you to compute so-called Jacobian product, and not the actual gradient.

Instead of computing the Jacobian matrix itself, PyTorch allows you to compute Jacobian Product vT⋅JvT⋅J for a given input vector v=(v1…vm)v=(v1…vm). This is achieved by calling `backward` with vv as an argument. The size of vv should be the same as the size of the original tensor, with respect to which we want to compute the product:

`inp = torch.eye(5, requires_grad=True)print(inp.shape) # 5 by 5out = (inp+1).pow(2)print(out.shape) # 5 by 5out.backward(torch.ones_like(inp), retain_graph=True)print("First call\n", inp.grad) # 5 by 5`

# Part 5: back propagation

(1) manual gradient calculation

code

(2) automatic gradient calculation via back propagation

code

--

--

--

Love podcasts or audiobooks? Learn on the go with our new app.