import numpy as np
import matplotlib.pyplot as plt
# Function to generate synthetic data
def generate_data(n_samples, n_features):
np.random.seed(0)
theta_true = np.random.randn(n_features, 1)
X = 2 * np.random.rand(n_samples, n_features)
y = X.dot(theta_true) + np.random.randn(n_samples, 1)
idx = np.random.choice(n_samples, size=int(0.7*n_samples), replace=False)
X_subset = np.delete(X, idx, axis=0)
y_subset = np.delete(y, idx, axis=0)
return theta_true, X, y, X_subset, y_subset
# Define gradient descent function for linear regression
def gradient_descent(X, y, learning_rate=0.01, n_iterations=10000):
n, d = X.shape
theta = np.random.randn(d, 1) # random initialization
thetas = []
losses = []
for iteration in range(n_iterations):
gradients = 2/n * X.T.dot(X.dot(theta) - y)
theta = theta - learning_rate * gradients
thetas.append(theta)
# Calculate loss (Mean Squared Error)
loss = np.mean((X.dot(theta) - y) ** 2)
losses.append(loss)
return thetas, losses
# Generate synthetic data
n_features = 75
n_samples = 100
theta_true, X, y, X_subset, y_subset = generate_data(100, n_features)
# Train linear regression models
thetas_rich, losses_rich = gradient_descent(X, y)
thetas_poor, losses_poor = gradient_descent(X_subset, y_subset)
# Plot the convergence of losses for data-rich and data-poor regimes
plt.figure(figsize=(10, 5))
plt.plot(losses_rich[0:500], color='blue')
plt.xlabel('Iteration')
plt.ylabel('Mean Squared Error (Loss)')
plt.title('Loss Convergence under 100 Samples (Data-Rich Regime)')
plt.legend()
plt.grid(True)
plt.show()
No handles with labels found to put in legend.
# Plot the convergence of losses for data-rich and data-poor regimes
plt.figure(figsize=(10, 5))
plt.plot(losses_poor[0:500], color='red')
plt.xlabel('Iteration')
plt.ylabel('Mean Squared Error (Loss)')
plt.title('Loss Convergence under 30 Samples (Data-Poor Regime)')
plt.legend()
plt.grid(True)
plt.show()
No handles with labels found to put in legend.
# Function to calculate squared distance
def squared_distances(thetas,theta_opt):
distances = [np.linalg.norm(theta-theta_opt)**2 for theta in thetas]
return distances
distances_true_rich = squared_distances(thetas_rich,theta_true)
distances_true_poor = squared_distances(thetas_poor,theta_true)
# Plot the convergence of distances to the true model for the data-rich and data-poor regimes
plt.figure(figsize=(10, 5))
plt.plot(distances_true_rich[0:5000], label='Data-Rich Regime', color='blue')
plt.plot(distances_true_poor[0:5000], label='Data-Poor Regime', color='red')
plt.plot(range(5000), [np.linalg.norm(theta_true) ** 2]*5000, label='Squared Norm of the True Model', color='green')
plt.xlabel('Iteration')
plt.ylabel('Squared Distance to the True Model')
plt.title('Convergence to the True Model')
plt.legend()
plt.grid(True)
plt.show()
theta_opt_rich = np.linalg.pinv(X).dot(y)
theta_opt_poor = np.linalg.pinv(X).dot(y)
distances_opt_rich = squared_distances(thetas_rich,theta_opt_rich)
distances_opt_poor = squared_distances(thetas_poor,theta_opt_poor)
# Plot the convergence of distances to the optimal model under the data-poor regime
plt.figure(figsize=(10, 5))
plt.plot(distances_opt_poor[0:5000], label='Data-Poor Regime', color='red')
plt.plot(range(5000), [np.linalg.norm(theta_opt_poor) ** 2]*5000, label='Squared Norm of the Optimal Model', color='purple')
plt.xlabel('Iteration')
plt.ylabel('Squared Distance to the Optimal Model')
plt.title('Convergence to the Optimal Model (Data-Poor Regime)')
plt.legend()
plt.grid(True)
plt.show()
# Plot the convergence of distances to the optimal model under the data-rich regime
plt.figure(figsize=(10, 5))
plt.plot(distances_opt_rich[0:5000], label='Data-Rich Regime', color='blue')
plt.plot(range(5000), [np.linalg.norm(theta_opt_rich) ** 2]*5000, label='Squared Norm of the Optimal Model', color='purple')
plt.xlabel('Iteration')
plt.ylabel('Squared Distance to the Optimal Model')
plt.title('Convergence to the Optimal Model (Data-Rich Regime)')
plt.legend()
plt.grid(True)
plt.show()
# Define gradient descent function for linear regression with L2 regularization
def gradient_descent_with_regularization(X, y, learning_rate=0.01, l2_penalty=0.01, n_iterations=10000):
n, d = X.shape
theta = np.random.randn(d, 1) # random initialization
thetas = []
losses = []
for iteration in range(n_iterations):
# Compute gradient of the loss function
gradients = 2/n * X.T.dot(X.dot(theta) - y)
# Add gradient of L2 regularization term
gradients += 2 * l2_penalty * theta
# Update parameter using gradient descent
theta = theta - learning_rate * gradients
thetas.append(theta)
# Calculate loss (Mean Squared Error)
loss = np.mean((X.dot(theta) - y) ** 2) + l2_penalty * np.sum(theta**2) # L2 regularization term
losses.append(loss)
return thetas, losses
l2_penalty=0.01
theta_opt_poor_l2 = np.linalg.inv(X.T.dot(X)+n_samples*l2_penalty*np.eye(n_features)).dot(X.T.dot(y))
# Train linear regression models
thetas_poor_l2, losses_poor_l2 = gradient_descent_with_regularization(X_subset, y_subset)
distances_true_poor_l2 = squared_distances(thetas_poor_l2,theta_true)
# Plot the convergence of distances to the true model under the data-poor regime
plt.figure(figsize=(10, 5))
plt.plot(distances_true_poor_l2, label='Data-Poor Regime with Regularization', color='orange')
plt.plot(distances_true_poor, label='Data-Poor Regime', color='red')
plt.plot(range(10000), [np.linalg.norm(theta_true) ** 2]*10000, label='Squared Norm of the True Model', color='green')
plt.xlabel('Iteration')
plt.ylabel('Squared Distance to the True Model')
plt.title('Convergence to the True Model (Data-Poor Regime)')
plt.legend()
plt.grid(True)
plt.show()
distances_opt_poor_l2 = squared_distances(thetas_poor_l2,theta_opt_poor)
# Plot the convergence of distances to the optimal model under the data-poor regime
plt.figure(figsize=(10, 5))
plt.plot(distances_opt_poor_l2, label='Data-Poor Regime with Regularization', color='orange')
plt.plot(distances_opt_poor, label='Data-Poor Regime', color='red')
plt.plot(range(10000), [np.linalg.norm(theta_opt_poor_l2) ** 2]*10000, label='Squared Norm of the Optimal Model', color='purple')
plt.xlabel('Iteration')
plt.ylabel('Squared Distance to the Optimal Model')
plt.title('Convergence to the Optimal Model (Data-Poor Regime)')
plt.legend()
plt.grid(True)
plt.show()