# This is an example to visualize how learning rate and momentum affect to reduce model's loss function
import numpy as np
import matplotlib.pyplot as plt
def optimize_and_plot(lr=0.01, momentum=0.0):
x = torch.tensor(2.0, requires_grad=True)
buffer = torch.zeros_like(x.data)
values = []
for i in range(10):
y = function(x)
values.append((x.clone(), y.clone()))
y.backward()
d_p = x.grad.data
if momentum !=0 :
buffer.mul_(momentum).add_(d_p)
d_p = buffer
x.data.add_(d_p, alpha=-lr)
x.grad.zero_()
x = np.arange(-3, 2, 0.001)
y = function(x)
plt.figure(figsize=(10, 5))
plt.plot([v[0].detach().numpy() for v in values], [v[1].detach().numpy() for v in values], 'r-X',
linewidth=2, markersize=7)
for i in range(10):
plt.text(values[i][0]+0.1, values[i][1], f'step {i}', fontdict={'color': 'r'})
plt.plot(x, y, linewidth=2)
plt.grid()
plt.tick_params(axis='both', which='major', labelsize=12)
plt.legend(['Optimizer steps', 'Square function'])
plt.show()
def function(x):
return x**4 + x**3 - 5*x**2
# return x**6 + x**5 - 5*x**4
# Try a first learning rate value
lr0 = 0.01 # also do this for 0.1, 0.09
optimize_and_plot(lr=lr0)
# Try a first value for momentum
mom0 = 0.3 # also do this for 0.94
optimize_and_plot(momentum=mom0)