Similar to NumPy arrays + Can run on CPU or GPU + Support automatic differentiation
Can hold elements of a single data type (like float32, int64, etc.)
torch.float64
torch.float32 (default for most computations)
torch.int64 (used for indexing or integer operations)
torch.int32
torch.bool
import torch
t = torch.tensor([1, 2, 3], dtype=torch.float32)
print(t.dtype) # torch.float32
Type: torch.Size (tuple-like)
# A Scalar
s = torch.tensor(5)
print(s.shape) # torch.Size([])
# A Vector
v = torch.tensor([1, 2, 3])
print(v.shape) # torch.Size([3])
# A Matrix
m = torch.tensor([[1, 2],
[3, 4]])
print(m.shape) # torch.Size([2, 2])
# 3D or higher: Can represent images, sequences, or batches of data.
cpu
cuda:0 (GPU)
# Option 1: Directly setting device is NOT recommended❌
t = torch.tensor([1, 2, 3], device='cuda:0')
# First check, if it's available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Option 2: First create the tensor in CPU and then move it to GPU if available
t = torch.tensor([1, 2, 3]).to(device)
# Option 3: Set the available device when creating ✅ (more efficient)
t = torch.tensor([1, 2, 3], device=device)
print(t.device)
Default: False
Setting True → Autograd tracking:
Build a computational graph that records every operation.
Store intermediate results for backward computation.
x = torch.tensor([2.0, 3.0], requires_grad=True)
print(x.requires_grad) # True
x = torch.tensor([2.0, 3.0], requires_grad=True)
print(x.requires_grad)
y = x**2 + 1 # Any operation
y.sum().backward() # Compute gradients: dy/dx = 2*x
print(x.grad) # tensor([4., 6.])
# Here requires_grad=False by defualt --> both x & y = leaf
x = torch.tensor([1.0, 2.0, 3.0])
y = x + 2
print(x.is_leaf) # True
print(y.is_leaf) # True
# Here requires_grad=True, so y = not a leaf in the computation graph
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x + 2
print(x.is_leaf) # True
print(y.is_leaf) # False
Scaler = 0
Vector = 1
Matrix = 2
x = torch.randn(2, 2, 4)
print(x.ndim) # 3
x = torch.randn(2, 2, 4)
print(x.numel()) # 16
import numpy as np
# From a list
t1 = torch.tensor([[1, 2], [3, 4]])
# From a NumPy array
np_array = np.array([5, 6, 7])
t2 = torch.from_numpy(np_array)
# Zeros, ones, or random values
zeros = torch.zeros(2, 3) # 2x3 tensor filled with 0s
ones = torch.ones(2, 3) # 2x3 tensor filled with 1s
rand_tensor = torch.rand(2, 3) # 2x3 tensor with random values [0, 1)
# Identity matrix
eye = torch.eye(3) # 3x3 identity matrix
# Tensors with a range: "Fixed step size"
# Simplified Syntax: torch.arange(start=0, end, step=1)
arange = torch.arange(0, 10, 2) # tensor([0, 2, 4, 6, 8])
# Tensor with a fixed number of evenly spaced values: "Fixed number of steps"
# Simplified Syntax: torch.linspace(start, end, steps=100)
ls = torch.linspace(0, 10, steps=5) # tensor([[ 0.0, 2.5, 5.0, 7.5, 10.0]])
c = a + b → Element-wise addition
c = a - b → Element-wise Subtraction
c = a * b → Element-wise multiplication
c = a / b → Element-wise division
c = torch.pow(a, 2) → Raises each element to a power
a = torch.tensor([[1, 2, 3],
[4, 5, 6]])
b = torch.tensor([[7, 8, 9],
[10,11,12]])
c = a + b
print(c) # tensor([[ 8, 10, 12],
# [14, 16, 18]])
c = a * b
print(c) # tensor([[ 7, 16, 27],
# [40, 55, 72]])
c = a / b
print(c) # tensor([[0.1429, 0.2500, 0.3333],
# [0.4000, 0.4545, 0.5000]])
c = a + 2
print(c) # tensor([[3, 4, 5],
# [6, 7, 8]])
c = torch.pow(a,2)
print(c) # tensor([[ 1, 4, 9],
# [16, 25, 36]])
X = torch.tensor([[1, 2], [3, 4]])
y = torch.tensor([10, 20])
print(X + y) # tensor([[11, 22],
# [13, 24]])
✅ If the dimensions of a = (n x m) and b = (m x n), then c = (n x n); ❌ other cases, it won't work
dot(a, b) = Σ aᵢ bᵢ → scalar value
Syntax: tensor.transpose(dim0, dim1)
dim0: The first dimension to swap; dim1: The second dimension to swap
A new tensor view (not a copy) with the two specified dimensions swapped.
c = a @ b # ❌ Throw RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x3 and 2x3)
print(b.T) # tensor([[ 7, 10],
# [ 8, 11],
# [ 9, 12]])
c = a @ b.T
print(c) # tensor([[ 50, 68],
# [122, 167]])
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])
result = torch.dot(a, b)
print(result) # 32
It is a scalar value that gives information about a square matrix:
Whether the matrix is invertible (non-singular)
How the matrix scales space during transformation (e.g., area or volume)
Whether the matrix preserves or flips orientation
a x a⁻¹ = I; where I = identity matrix (a matrix with 1s on the diagonal and 0s elsewhere)
Definition: If av=λv, then: λ is an eigenvalue (a scalar), v is the corresponding eigenvector (a nonzero vector)
The function returns two tensors:
eigenvalues (evals) — complex tensor of shape (n,) → Contains the eigenvalues λ1,λ2,...,λn
eigenvectors (evecs) — complex tensor of shape (n, n) → Columns are the corresponding eigenvectors v1,v2,...,vn
x = torch.tensor([[1, 2, 3],
[4, 5, 6]]) # shape (2, 3)
y = x.reshape(3, 2) # change to shape (3, 2)
print(y) # tensor([[1, 2],
# [3, 4],
# [5, 6]])
Note: view() requires the tensor to be stored contiguously in memory, while reshape() can handle non-contiguous tensors safely (it creates a copy if needed).
print(x.is_contiguous()) # True
z = x.T # It changes the view, it doesn't rearrange the data in memory
print(z.is_contiguous()) # False
z_con = z.contiguous() # makes a copy with arranged in contiguous memory
print(z_con.is_contiguous()) # True
x = torch.tensor([[1, 2, 3],
[4, 5, 6]])
y = x.flatten()
print(y) # tensor([1, 2, 3, 4, 5, 6])
x = torch.zeros(1, 3, 1, 5) # torch.Size([1, 3, 1, 5])
y = x.squeeze() # Removes all dimensions of size 1
print(y.shape) # torch.Size([3, 5])
Note: x.squeeze(dim) → removes only that dimension if size = 1
x = torch.zeros(1, 3, 1, 5) # torch.Size([1, 3, 1, 5])
y = x.squeeze(2) # Removes 3rd (index from 0) dimension if it's size 1
print(y.shape) # torch.Size([1, 3, 5])
x = torch.tensor([1, 2, 3]) # torch.Size([3])
y = x.unsqueeze(0) # Add dimension at position 0
print(y) # tensor([[1, 2, 3]])
print(y.shape) # tensor.Size([1, 3])
z = x.unsqueeze(1) # Add dimension at position 1
print(z) # tensor([[1],
# [2],
# [3]])
print(z.shape) # tensor.Size([3, 1])
a = torch.tensor([[1, 2],
[3, 4]])
b = torch.tensor([[5, 6]])
c = torch.cat((a, b), dim=0)
print(c) #tensor([[1, 2],
# [3, 4],
# [5, 6]])
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])
c = torch.stack([a, b])
print(c) # tensor([[1, 2, 3],
# [4, 5, 6]])
d = torch.stack([a, b], dim=1)
print(d) # tensor([[1, 4],
# [2, 5],
# [3, 6]])
a = torch.tensor([[10, 20, 30],
[40, 50, 60],
[70, 80, 90]])
# Access an element using indices (row, column)
print(a[0, 2]) # Output: 30 (element in 1st row, 3rd column)
print(a[2, 1]) # Output: 80 (element in 3rd row, 2nd column)
Use negative indices to access from the end:
print(a[-1, -1]) # Output: 90 (last element)
# Extract first two rows and first two columns
print(a[:2, :2]) # tensor([[10, 20],
# [40, 50]])
# Extract last two columns of all rows
print(a[:, 1:]) # tensor([[20, 30],
# [50, 60],
# [80, 90]])
Modify slices:
a[:2, 1:] = torch.tensor([[0, 0],
[0, 0]])
print(a) # tensor([[10, 0, 0],
# [40, 0, 0],
# [70, 80, 90]])
x = torch.tensor([1, 2, 3, 4])
y = torch.tensor([1, 0, 3, 5])
equal = x == y # Equal
print(equal) # tensor([ True, False, True, False])
greater = x > y # Greater
print(greater) # tensor([ False, True, False, False])
Logical operations (torch.logical_and, torch.logical_or, etc.) are used on Boolean tensors.
b = torch.tensor([5, 10, 15, 20, 25])
# Create a mask for elements greater than 15
mask = b > 15
print(mask) # tensor([False, False, False, True, True])
# Use mask to select elements
print(b[mask]) # tensor([20, 25])
# Modify elements using mask
b[mask] = 0
print(b) # tensor([ 5, 10, 15, 0, 0])
x = torch.tensor([[1, 2], [3, 4]])
print(x.sum()) # 10 (1+2+3+4)
print(x.sum(dim=0)) # tensor([4, 6]) → sum of columns
print(x.sum(dim=1)) # tensor([3, 7]) → sum of rows
print(x.mean()) # 2.5 (10 / 4)
print(x.mean(dim=0)) # tensor([2., 3.])
print(x.min()) # 1
# This gives both values and indices of the max in each column
print(x.max(dim=0)) # values=tensor([3, 4]), indices=tensor([1, 1])
print(x.argmax()) # 3 (index of element 4 in flattened tensor [1,2,3,4])
print(x.argmin()) # 0 (index of element 1)
print(x.argmax(dim=0)) # tensor([1, 1]) → max indices in each column
default: Euclidean / L2 norm
print(torch.norm(A)) # sqrt(1² + 2² + 3² + 4²) = 5.4772
print(torch.norm(A, p=1)) # L1 norm = sum of absolute values = 10
x = torch.tensor([[1., 2., 3.],
[4., 5., 6.]])
# Without dim (all elements)
print(torch.mean(x)) # 3.5
print(torch.std(x)) # 1.8708
print(torch.var(x)) # 3.5
print(torch.median(x)) # 3.5
# dim=0 (column-wise)
print(torch.mean(x, dim=0)) # tensor([2.5, 3.5, 4.5])
print(torch.median(x, dim=0)) # torch.return_types.median(
# values=tensor([2.5, 3.5, 4.5]),
# indices=tensor([1, 1, 1]))
# dim=1 (row-wise)
print(torch.mean(x, dim=1)) # tensor([2., 5.]
They are marked by an underscore _.
add_(), sub_(), mul_(), div_(), pow_(), t_(), fill_()
Saves memory and can improve performance, especially for large tensors.
x = torch.tensor([1.0, 2.0, 3.0])
x.add_(10)
print(x) # tensor([11., 12., 13.])
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
x.add_(2) # ❌ RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.
Modifying a tensor requiring gradients is really needed (e.g., in training loops when updating weights manually), do it safely inside a torch.no_grad() block (or use x.detach())
with torch.no_grad():
x.add_(2)
This is how optimizers like torch.optim.SGD update parameters — they modify model weights in place but under torch.no_grad().