Example1) PCA 2D to 1D¶

In [1]:

import numpy as np
import matplotlib.pyplot as plt

In [ ]:

Create 2D dataset¶

In [2]:

np.random.seed(42)
n_samples = 10

# Create data along x1 ≈ x2 with some noise
x = np.linspace(0, 10, n_samples)
x = np.round(x).astype(int)

y = x + np.random.normal(0, 1, n_samples)
y = np.round(y).astype(int)

X = np.column_stack((x, y))  # shape (100, 2)

# Plot original data
plt.figure(figsize=(6, 6))
plt.scatter(X[:, 0], X[:, 1], alpha=0.7)
plt.title("Original 2D Data (x1 vs x2)")
plt.xlabel("x1")
plt.ylabel("x2")
plt.axis('equal')
plt.grid(True)
plt.show()

No description has been provided for this image

In [ ]:

Step 1: Standardize the data¶

In [3]:

X_mean = np.mean(X, axis=0)
print("X_mean:\n", X_mean) # (mean_of_col1, mean_of_col2)

X_std = np.std(X, axis=0)
print("\nX_std:\n", X_std) # # (mstd_of_col1, std_of_col2)

X_scaled = (X - X_mean) / X_std

print("\nStandardized Data:\n", X_scaled)

X_mean:
 [5.  5.7]

X_std:
 [3.31662479 3.55105618]

Standardized Data:
 [[-1.50755672 -1.60515624]
 [-1.20604538 -1.32354989]
 [-0.90453403 -0.76033717]
 [-0.60302269 -0.19712445]
 [-0.30151134 -0.47873081]
 [ 0.30151134  0.08448191]
 [ 0.60302269  0.92930098]
 [ 0.90453403  0.92930098]
 [ 1.20604538  0.92930098]
 [ 1.50755672  1.4925137 ]]

In [14]:

# Plot original data
plt.figure(figsize=(6, 6))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], alpha=0.7)
plt.title("Scaled 2D Data (x1 vs x2)")
plt.xlabel("x1")
plt.ylabel("x2")
plt.axis('equal')
plt.grid(True)
plt.show()

In [ ]:

Step 2: Compute Covariance Matrix¶

In [4]:

cov_matrix = np.cov(X_scaled.T)
print("\nCovariance Matrix:\n", cov_matrix)

Covariance Matrix:
 [[1.11111111 1.08492932]
 [1.08492932 1.11111111]]

In [ ]:

Step 3: Compute Eigenvalues and Eigenvectors¶

In [5]:

eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print("\nEigenvalues:\n", eigenvalues)
print("\nEigenvectors:\n", eigenvectors)

Eigenvalues:
 [2.19604043 0.02618179]

Eigenvectors:
 [[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]

In [ ]:

Step 4: Sort Eigenvalues and Select Top k=1¶

In [6]:

sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues_sorted = eigenvalues[sorted_indices]
eigenvectors_sorted = eigenvectors[:, sorted_indices]

print("\nSorted Eigenvalues:\n", eigenvalues_sorted)
print("\nSorted Eigenvectors:\n", eigenvectors_sorted)

Sorted Eigenvalues:
 [2.19604043 0.02618179]

Sorted Eigenvectors:
 [[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]

In [ ]:

Step 5: Project Data to 1D¶

In [7]:

k=1
W = eigenvectors_sorted[:, :k]  # Top-1 component
print(f"\nProjection Matrix W (top {k} eigenvectors):\n", W)
X_pca = X_scaled @ W  # shape (100, 1)

Projection Matrix W (top 1 eigenvectors):
 [[0.70710678]
 [0.70710678]]

In [8]:

print("Original points:\n", X )
print("\nScaled points:\n", X_scaled )
print("\nPoints on PC1:\n", X_pca)

Original points:
 [[ 0  0]
 [ 1  1]
 [ 2  3]
 [ 3  5]
 [ 4  4]
 [ 6  6]
 [ 7  9]
 [ 8  9]
 [ 9  9]
 [10 11]]

Scaled points:
 [[-1.50755672 -1.60515624]
 [-1.20604538 -1.32354989]
 [-0.90453403 -0.76033717]
 [-0.60302269 -0.19712445]
 [-0.30151134 -0.47873081]
 [ 0.30151134  0.08448191]
 [ 0.60302269  0.92930098]
 [ 0.90453403  0.92930098]
 [ 1.20604538  0.92930098]
 [ 1.50755672  1.4925137 ]]

Points on PC1:
 [[-2.20102045]
 [-1.78869396]
 [-1.17724172]
 [-0.56578947]
 [-0.55171452]
 [ 0.27293845]
 [ 1.08351646]
 [ 1.29671718]
 [ 1.50991789]
 [ 2.12137014]]

In [15]:

# Lets the points on PC1

# Create an array of zeros for the y-axis
y_zeros = np.zeros_like(X_pca)

# 2. Plotting
plt.figure(figsize=(10, 2))  # Short height to emphasize 1D space

# Plot points on the line
plt.scatter(X_pca, y_zeros, color='red', marker='x', s=100, linewidths=2, label='Points on PC1')

# Draw the single axis line
plt.axhline(0, color='black', linewidth=1)

# Aesthetics
plt.title('1D Projection of Data onto PC1 Axis', fontsize=12, pad=15)
plt.xlabel('PC1 Coordinate Value', fontsize=10)
plt.grid(axis='x', linestyle=':', alpha=0.6)

# Hide the y-axis entirely since it has no meaning here
plt.gca().get_yaxis().set_visible(False)

# Clean up borders
for spine in ['top', 'left', 'right']:
    plt.gca().spines[spine].set_visible(False)

plt.xlim(min(X_pca) - 0.5, max(X_pca) + 0.5)
plt.legend(loc='upper right')
plt.tight_layout()
plt.show()

In [ ]:

Step 6: Reconstruct to Original 2D Space¶

In [16]:

X_reconstructed = X_pca @ W.T  # shape (100, 2)
X_reconstructed = X_reconstructed * X_std + X_mean  # unscale

top_n=10
print("\nReconstructed Data (Approximate Original):\n", X_reconstructed[:top_n])
print("Actual data:\n",X[:top_n])

Reconstructed Data (Approximate Original):
 [[-0.16185049  0.17329069]
 [ 0.80514082  1.20863255]
 [ 2.23912345  2.74397441]
 [ 3.67310608  4.27931628]
 [ 3.70611476  4.31465814]
 [ 5.64009739  6.38534186]
 [ 7.54107134  8.42068372]
 [ 8.04107134  8.95602559]
 [ 8.54107134  9.49136745]
 [ 9.97505397 11.02670931]]
Actual data:
 [[ 0  0]
 [ 1  1]
 [ 2  3]
 [ 3  5]
 [ 4  4]
 [ 6  6]
 [ 7  9]
 [ 8  9]
 [ 9  9]
 [10 11]]

In [ ]:

Step 7: Plot Reconstructed vs Original¶

In [17]:

# Assuming X (original data) and X_reconstructed are already defined

plt.figure(figsize=(6, 6))
plt.scatter(X[:, 0], X[:, 1], color='green', alpha=0.6, label='Original Data')
plt.scatter(X_reconstructed[:, 0], X_reconstructed[:, 1], color='purple', alpha=0.6, label='Reconstructed Data (1D)')
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Original vs Reconstructed Data (PCA 2D → 1D)")
plt.axis('equal')
plt.grid(True)
plt.legend()
plt.show()

# Optional: Print a few samples to compare
print("\nOriginal Data (first 5 rows):\n", X[:5])
print("\nReconstructed Data (first 5 rows):\n", X_reconstructed[:5])

Original Data (first 5 rows):
 [[0 0]
 [1 1]
 [2 3]
 [3 5]
 [4 4]]

Reconstructed Data (first 5 rows):
 [[-0.16185049  0.17329069]
 [ 0.80514082  1.20863255]
 [ 2.23912345  2.74397441]
 [ 3.67310608  4.27931628]
 [ 3.70611476  4.31465814]]

In [ ]:

Prettify this¶

In [18]:

fig, ax = plt.subplots(figsize=(6, 6))
ax.scatter(X[:, 0], X[:, 1], alpha=0.6, label='Original Data')
ax.scatter(X_reconstructed[:, 0], X_reconstructed[:, 1], alpha=0.6, label='Reconstructed Data (1D)')
for i in range(n_samples):
    ax.plot([X[i, 0], X_reconstructed[i, 0]],
            [X[i, 1], X_reconstructed[i, 1]],
            'r--', alpha=0.3)

# Draw first principal component vector
origin = X_mean
pc1_vector = eigenvectors_sorted[:, 0] * 3  # scaled for visualization
ax.quiver(*origin, *pc1_vector, angles='xy', scale_units='xy', scale=1, color='green', label='1st PC')

ax.set_title("1D PCA Projection and Reconstruction")
ax.set_xlabel("x1")
ax.set_ylabel("x2")
ax.axis('equal')
ax.grid(True)
ax.legend()
plt.show()

# --------------------------
# Explained Variance
# --------------------------
explained_variance_ratio = eigenvalues_sorted[0] / np.sum(eigenvalues_sorted)
print(f"Explained Variance by 1st PC: {explained_variance_ratio:.2%}")

Explained Variance by 1st PC: 98.82%

In [ ]:

Example 2) PCA: 3D to 2D¶

In [1]:

import numpy as np
import matplotlib.pyplot as plt

Create a sample 3D dataset (10 samples, 3 features)¶

In [35]:

# Set seed for reproducibility
np.random.seed(42)

# Generate random integers from 0 to 15 inclusive
X = np.random.randint(0, 16, size=(10, 3))

print("Actual data:\n", X)

Actual data:
 [[ 6  3 12]
 [14 10  7]
 [12  4  6]
 [ 9  2  6]
 [10 10  7]
 [ 4  3  7]
 [ 7  2  5]
 [ 4  1  7]
 [11 13  5]
 [ 1 15 11]]

In [ ]:

Step 1: Standardize the data¶

In [23]:

X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
X_scaled = (X - X_mean) / X_std

print("Standardized Data:\n", X_scaled)

Standardized Data:
 [[-0.46229895 -0.67346939  2.09980514]
 [ 1.59236304  0.75510204 -0.13403012]
 [ 1.07869754 -0.46938776 -0.58079717]
 [ 0.3081993  -0.87755102 -0.58079717]
 [ 0.56503205  0.75510204 -0.13403012]
 [-0.97596444 -0.67346939 -0.13403012]
 [-0.2054662  -0.87755102 -1.02756422]
 [-0.97596444 -1.08163265 -0.13403012]
 [ 0.8218648   1.36734694 -1.02756422]
 [-1.74646269  1.7755102   1.65303809]]

In [24]:

# Compare original and scaled

print(X[0], "--->", X_scaled[0])  # 1st point
print(X[1], "--->", X_scaled[1])  # 2nd point

[ 6  3 12] ---> [-0.46229895 -0.67346939  2.09980514]
[14 10  7] ---> [ 1.59236304  0.75510204 -0.13403012]

In [ ]:

Step 2: Compute covariance matrix (features as columns)¶

In [25]:

cov_matrix = np.cov(X_scaled.T)  # shape: (3, 3)

print("Covariance Matrix:\n", cov_matrix)

Covariance Matrix:
 [[ 1.11111111  0.11997176 -0.59157118]
 [ 0.11997176  1.11111111  0.20362852]
 [-0.59157118  0.20362852  1.11111111]]

In [ ]:

Step 3: Compute eigenvalues and eigenvectors¶

In [26]:

eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

print("Eigenvalues:\n", eigenvalues)
print("\nEigenvectors:\n", eigenvectors)

Eigenvalues:
 [0.4410725  1.70900006 1.18326077]

Eigenvectors:
 [[ 0.65804062 -0.68851856  0.30483558]
 [-0.32434184  0.10617556  0.9399623 ]
 [ 0.67954758  0.71740431  0.15344755]]

In [27]:

# print eigen value and its corresponding eigen vector

print(eigenvalues[0], "-->", eigenvectors[:, 0])
print(eigenvalues[1], "-->", eigenvectors[:, 1])
print(eigenvalues[2], "-->", eigenvectors[:, 2])

0.441072499257265 --> [ 0.65804062 -0.32434184  0.67954758]
1.7090000646535835 --> [-0.68851856  0.10617556  0.71740431]
1.183260769422485 --> [0.30483558 0.9399623  0.15344755]

In [ ]:

Step 4: Sort eigenvalues and eigenvectors in descending order¶

In [28]:

sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues_sorted = eigenvalues[sorted_indices]
eigenvectors_sorted = eigenvectors[:, sorted_indices]

print("Sorted Eigenvalues:\n", eigenvalues_sorted)
print("\nSorted Eigenvectors:\n", eigenvectors_sorted)

Sorted Eigenvalues:
 [1.70900006 1.18326077 0.4410725 ]

Sorted Eigenvectors:
 [[-0.68851856  0.30483558  0.65804062]
 [ 0.10617556  0.9399623  -0.32434184]
 [ 0.71740431  0.15344755  0.67954758]]

In [ ]:

Step 5: Select top k eigenvectors (k=2 here)¶

In [29]:

k = 2
W = eigenvectors_sorted[:, :k]  # projection matrix (3x2)

print(f"Projection Matrix W (top {k} eigenvectors):\n", W)

Projection Matrix W (top 2 eigenvectors):
 [[-0.68851856  0.30483558]
 [ 0.10617556  0.9399623 ]
 [ 0.71740431  0.15344755]]

In [ ]:

Step 6: Transform data to new subspace¶

In [30]:

X_pca = X_scaled @ W  # shape: (10, 2)

print("\nTransformed Data (Reduced to 2D):\n", X_pca)

Transformed Data (Reduced to 2D):
 [[ 1.75320467 -0.45175104]
 [-1.11235191  1.17460977]
 [-1.20920718 -0.2015033 ]
 [-0.7220418  -0.82003666]
 [-0.40501545  0.86144273]
 [ 0.50430987 -0.95111111]
 [-0.68888617 -1.04517549]
 [ 0.4609729  -1.33476919]
 [-1.15786934  1.37811099]
 [ 2.57688441  1.39018333]]

In [31]:

# Optional: Visualize the 2D projection
plt.scatter(X_pca[:, 0], X_pca[:, 1], color='blue', label='PCA Projection')
plt.title("PCA - 2D Projection of 3D Data")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.grid(True)
plt.legend()
plt.show()

In [ ]:

Step 7 (Optional): Reconstruct original data (approximate)¶

In [32]:

X_reconstructed = X_pca @ W.T  # shape: (10, 3)
X_reconstructed = (X_reconstructed * X_std) + X_mean  # reverse standardization

print("Actual data:\n",X)
print("\nReconstructed Data (Approximate Original):\n", X_reconstructed)

Actual data:
 [[ 6  3 12]
 [14 10  7]
 [12  4  6]
 [ 9  2  6]
 [10 10  7]
 [ 4  3  7]
 [ 7  2  5]
 [ 4  1  7]
 [11 13  5]
 [ 1 15 11]]

Reconstructed Data (Approximate Original):
 [[ 2.56381513  5.13144082  9.96008088]
 [12.17614672 11.1313231   5.91725465]
 [10.80248398  4.7428106   5.28908487]
 [ 8.76234935  2.14741299  5.85891676]
 [ 9.90821655 10.05693262  6.94551201]
 [ 5.31916679  2.1817303   7.78313411]
 [ 8.40624762  1.12771469  5.83483035]
 [ 4.97997979  0.39212557  7.58177298]
 [12.53970629 12.04493123  5.91405918]
 [ 2.54188778 14.04357807 11.91535423]]

In [ ]:

In [28]:

# Analysis
explained_ratio = np.sum(eigenvalues_sorted[:k]) / np.sum(eigenvalues_sorted)
print(f"Explained Variance Ratio: {explained_ratio:.4f}")

Explained Variance Ratio: 0.8391

In [ ]:

PCA – Implementation

Example1) PCA 2D to 1D¶

Create 2D dataset¶

Step 1: Standardize the data¶

Step 2: Compute Covariance Matrix¶

Step 3: Compute Eigenvalues and Eigenvectors¶

Step 4: Sort Eigenvalues and Select Top k=1¶

Step 5: Project Data to 1D¶

Step 6: Reconstruct to Original 2D Space¶

Step 7: Plot Reconstructed vs Original¶

Prettify this¶

Example 2) PCA: 3D to 2D¶

Create a sample 3D dataset (10 samples, 3 features)¶

Step 1: Standardize the data¶

Step 2: Compute covariance matrix (features as columns)¶

Step 3: Compute eigenvalues and eigenvectors¶

Step 4: Sort eigenvalues and eigenvectors in descending order¶

Step 5: Select top k eigenvectors (k=2 here)¶

Step 6: Transform data to new subspace¶

Step 7 (Optional): Reconstruct original data (approximate)¶

Leave a Comment Cancel Reply

Categories

Archives

GET HELP

COURSES

CONTACT US

PCA – Implementation

Example1) PCA 2D to 1D¶

Create 2D dataset¶

Step 1: Standardize the data¶

Step 2: Compute Covariance Matrix¶

Step 3: Compute Eigenvalues and Eigenvectors¶

Step 4: Sort Eigenvalues and Select Top k=1¶

Step 5: Project Data to 1D¶

Step 6: Reconstruct to Original 2D Space¶

Step 7: Plot Reconstructed vs Original¶

Prettify this¶

Example 2) PCA: 3D to 2D¶

Create a sample 3D dataset (10 samples, 3 features)¶

Step 1: Standardize the data¶

Step 2: Compute covariance matrix (features as columns)¶

Step 3: Compute eigenvalues and eigenvectors¶

Step 4: Sort eigenvalues and eigenvectors in descending order¶

Step 5: Select top k eigenvectors (k=2 here)¶

Step 6: Transform data to new subspace¶

Step 7 (Optional): Reconstruct original data (approximate)¶

Leave a Comment Cancel Reply

Categories

Archives

Tags

GET HELP

COURSES

CONTACT US

Search