some_list = [[1, 2, 3], [4, 5, 6]]
np_array = np.array(some_list)
np_tensor = torch.from_numpy(np_array) # creating tensors from array
import torch
tensor = torch.tensor(some_list) # Creating tensor from list, data type is based on the input value type
tensor2 = torch.Tensor(some_list) # Creating tensor from list with float32 as default data type
tensor.dtype # See data type of tensors
tensor.device # See what device the tensor is using
a + b # Tensor addition (subtraction, multiplication are done same way as elementwise operation)
torch.manual_seed(seed) # Setting seed
torch.rand_like(some_array, dtype=torch.float) # Create a tensor with random numbers with same dimension as given array
torch.ones_like(some_array, dtype=torch.float) # Create a tensor of 1s with same dimension as given array
shape = (2,3,)
rand_tensor = torch.rand(shape) # Create a tensor of random numbers with specified shape
ones_tensor = torch.ones(shape) # Create a tensor of 1s with specified shape
zeros_tensor = torch.zeros(shape) # Create a tensor of 0s with specified shape
tensor.shape # Shape of tensor
tensor.dtype # Datatype of tensor
tensor.device # Device tensor is stored on
some_tensor = torch.arange(12).reshape(3, 4).float() # Creating, reshaping and giving data type to a tensor
print('First row: ',some_tensor[0])
print('First column: ', some_tensor[:, 0])
print('Last column:', some_tensor[:, -1])
t1 = torch.cat([tensor1, tensor2, tensor3], dim=1) # Horizontal concatenation, increasing features/columns
matrix_mul = tensor @ tensor.T # matrix multiplication
matrix_mul = tensor.matmul(tensor.T) # matrix multiplication with a tensor and the TRANSPOSE of it
matrix_mul = torch.mm(mat.t(), mat) # matrix multiplication with a tensor and the TRANSPOSE of it
normal_mul = tensor1 * tensor2
normal_mul = tensor1.mul(tensor2)
agg = tensor.sum() # summing
agg_item = agg.item() # extracting value from a tensor
x = torch.arange(4.0)
x.requires_grad_(True) # Same as `x = torch.arange(4.0, requires_grad=True)` which means we can do derivation with respect to it
y = 2 * torch.dot(x, x) # function y = x*x. square of each x and then sum for dot product: 2⋅(0+1+4+9)=2⋅14=28
y.backward() # Derivative of y which is (2.2.x = 4.x) : 4(0, 1, 2, 3) = (0, 4, 8, 12)
x.grad # Show derivated result
x = np.linspace(-np.pi, np.pi, 100)
x = torch.tensor(x, requires_grad=True)
y = torch.sin(x)
y.backward(torch.ones_like(x)) # specify initial gradients if x is not scalar when back-propagation
# NOTE : y x is not a scalar. so `y.backward()` will throw error. any y using such x which is not a scalar will require initial grads
# raise NotImplementedError
x.grad # Show derivated result
def sigmoid(x): # turns a value from 0 to 1
return 1/(1+ torch.exp(-x))
def softmax(X): # turns a row of values into probabilities. sum of the row is 1
result = torch.zeros_like(X) # This is where we will store the probability values
for i in range(X.shape[0]): # iterate over each row
row = X[i]
max_val = torch.max(row) # get the maximum value of the row
exp_row = torch.exp(row - max_val) # scale each value of row so that maximum value of row is 0
row_sum = torch.sum(exp_row) # get the sum of all values in the row
softmax_row = exp_row / row_sum # get probability for each value
result[i] = softmax_row # store the result row
return result
def linear(X, W, b): # Linear regression (y = mX + c) m is the weight W and c is the bias b
return X @ W + b
def squared_loss(y_hat, y): # Loss function for regression
return ((y_hat - y.reshape(y_hat.shape)) ** 2 / 2).mean()
def cross_entropy(y_hat, y): # Loss function for multi-class problem
n = y.shape[0] # Number of examples/samples
loss = -torch.sum(y * torch.log(y_hat)) / n
return loss
def sgd(params, lr, batch_size): # Optimizer of loss function for non-convex graph
""" Minibatch stochastic gradient descent """
# lr = lr / batch_size
with torch.no_grad(): # disables gradient calculation in PyTorch, we do not want to calculate gradient during backpropagation
for param in params:
param -= lr * param.grad # Manually update parameters (weights and bias)
param.grad.zero_() # Reset gradients. new gradients will be calculated during next forward pass
### Training
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
y_pred = linear(X, w, b)
loss = squared_loss(y_pred , y) # Minibatch loss in `X` and `y`
loss.backward() # Now do backpropagation to calculate for each batch (very efficient)
sgd([w, b], lr, batch_size) # Update parameters using their gradient for each batch (very efficient)
with torch.no_grad(): # Finally, see the loss after 1 epoch training (make sure calculating gradient is disabled in this step)
train_loss = squared_loss(linear(features, w, b), labels)