import numpy as np
import matplotlib.pyplot as plt


# Function to generate synthetic data
def generate_data(n_samples, n_features):
    np.random.seed(0)
    theta_true = np.random.randn(n_features, 1)
    X = 2 * np.random.rand(n_samples, n_features)
    Y = X.dot(theta_true) + np.random.randn(n_samples, 1)
    return theta_true, X, Y


reg_coeff=0.05
n_iterations=10000
n_samples=100
n_features=300


def l1_subgradient(theta):
    g = np.ones(theta.shape)
    g[theta < 0.] = -1.0
    return g


def LASSO_loss(X,Y,theta, reg_coeff):
    return np.linalg.norm(X.dot(theta) - Y) ** 2 / n_samples + reg_coeff * np.linalg.norm(theta, 1)


def subgradient_method(X, Y):
    n, d = X.shape
    np.random.seed(0)
    theta = np.random.randn(d, 1)  
    thetas = []
    losses = []
    learning_rate = 0.001
    
    for iteration in range(n_iterations):
        subgradient = 2/n * X.T.dot(X.dot(theta) - Y) + reg_coeff * l1_subgradient(theta)
        theta = theta - learning_rate * subgradient
        thetas.append(theta)
        
        loss = LASSO_loss(X,Y,theta,reg_coeff)
        losses.append(loss)
    
    return thetas, losses


def smoothness(X, n_samples):
    return 2 * np.linalg.eigvalsh(X.T.dot(X))[-1] / n_samples


def shrinkage(theta, eta):
    return np.sign(theta) * np.maximum(np.abs(theta) - eta, 0.)


def ISTA(X, Y):
    n, d = X.shape
    np.random.seed(0)
    theta = np.random.randn(d, 1)  
    beta = smoothness(X, n)  
    thetas = []
    losses = []
    
    for iteration in range(n_iterations):
        gradient = 2/n * X.T.dot(X.dot(theta) - Y)
        theta = shrinkage(theta - gradient / beta, reg_coeff / beta)
        thetas.append(theta)
        
        loss = LASSO_loss(X,Y,theta,reg_coeff)
        losses.append(loss)
    
    return thetas, losses


def FISTA(X, Y):
    n, d = X.shape
    np.random.seed(0)
    theta = np.random.randn(d, 1)
    theta_old = theta.copy()
    beta = smoothness(X, n)  
    thetas = []
    losses = []
    
    for iteration in range(n_iterations):
        coeff = (iteration - 1) / (iteration + 2)
        auxiliary = theta + coeff * (theta - theta_old)
        gradient = 2/n * X.T.dot(X.dot(auxiliary) - Y)
        theta_old = theta.copy()
        theta = shrinkage(auxiliary - gradient / beta, reg_coeff / beta)
        gradient = 2/n * X.T.dot(X.dot(theta) - Y)
        theta = shrinkage(theta - gradient / beta, reg_coeff / beta)
        thetas.append(theta)
        
        loss = LASSO_loss(X,Y,theta,reg_coeff)
        losses.append(loss)

    return thetas, losses


theta_true, X, Y = generate_data(n_samples, n_features)
thetas_subgrad, losses_subgrad = subgradient_method(X,Y)
thetas_ista, losses_ista = ISTA(X, Y)
thetas_fista, losses_fista = FISTA(X, Y)


plt.figure(figsize=(10, 5))
plt.plot(losses_ista, label='ISTA', color='blue')
plt.plot(losses_fista, label='FISTA', color='green')
plt.plot(losses_subgrad, label='Subgradient Method', color='red')
plt.xlabel('Iteration')
plt.ylabel('LASSO Loss')
plt.title('Loss Convergence for LASSO')
plt.legend()
plt.grid(True)
plt.show()