Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

added ridge_regression.py #12553

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions machine_learning/ridge_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import numpy as np
from matplotlib import pyplot as plt
from sklearn import datasets


# Ridge Regression function
# reference : https://en.wikipedia.org/wiki/Ridge_regression
def ridge_cost_function(
x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float
) -> float:
"""
Compute the Ridge regression cost function with L2 regularization.

J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (a/2) * Σ θ_j^2 (for j=1 to n)

Where:
- J(θ) is the cost function we aim to minimize
- m is the number of training examples
- hθ(x) = X * θ (prediction)
- y_i is the actual target value for example i
- a is the regularization parameter

@param X: The feature matrix (m x n)
@param y: The target vector (m,)
@param theta: The parameters (weights) of the model (n,)
@param alpha: The regularization parameter

@returns: The computed cost value
"""
m = len(y)
predictions = np.dot(x, theta)
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum(
theta[1:] ** 2
)

return cost


def ridge_gradient_descent(
x: np.ndarray,
y: np.ndarray,
theta: np.ndarray,
alpha: float,
learning_rate: float,
max_iterations: int,
) -> np.ndarray:
"""
Perform gradient descent to minimize the
cost function and fit the Ridge regression model.

@param X: The feature matrix (m x n)
@param y: The target vector (m,)
@param theta: The initial parameters (weights) of the model (n,)
@param alpha: The regularization parameter
@param learning_rate: The learning rate for gradient descent
@param max_iterations: The number of iterations for gradient descent

@returns: The optimized parameters (weights) of the model (n,)
"""
m = len(y)

for iteration in range(max_iterations):
predictions = np.dot(x, theta)
error = predictions - y

# calculate the gradient
gradient = (1 / m) * np.dot(x.T, error)
gradient[1:] += (alpha / m) * theta[1:]
theta -= learning_rate * gradient

if iteration % 100 == 0:
cost = ridge_cost_function(x, y, theta, alpha)
print(f"Iteration {iteration}, Cost: {cost}")

return theta


if __name__ == "__main__":
import doctest

doctest.testmod()

# Load California Housing dataset
california_housing = datasets.fetch_california_housing()
x = california_housing.data[:, :2] # 2 features for simplicity
y = california_housing.target
x = (x - np.mean(x, axis=0)) / np.std(x, axis=0)

# Add a bias column (intercept) to X
x = np.c_[np.ones(x.shape[0]), x]

# Initialize parameters (theta)
theta_initial = np.zeros(x.shape[1])

# Set hyperparameters
alpha = 0.1
learning_rate = 0.01
max_iterations = 1000

optimized_theta = ridge_gradient_descent(
x, y, theta_initial, alpha, learning_rate, max_iterations
)
print(f"Optimized theta: {optimized_theta}")

# Prediction
def predict(x, theta):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please provide return type hint for the function: predict. If the function does not return a value, please provide the type hint as: def function() -> None:

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression.py, please provide doctest for the function predict

Please provide descriptive name for the parameter: x

Please provide type hint for the parameter: x

Please provide type hint for the parameter: theta

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please provide return type hint for the function: predict. If the function does not return a value, please provide the type hint as: def function() -> None:

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression.py, please provide doctest for the function predict

Please provide descriptive name for the parameter: x

Please provide type hint for the parameter: x

Please provide type hint for the parameter: theta

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please provide return type hint for the function: predict. If the function does not return a value, please provide the type hint as: def function() -> None:

As there is no test file in this pull request nor any test function or class in the file machine_learning/ridge_regression.py, please provide doctest for the function predict

Please provide descriptive name for the parameter: x

Please provide type hint for the parameter: x

Please provide type hint for the parameter: theta

return np.dot(x, theta)

y_pred = predict(x, optimized_theta)

# Plotting the results (here we visualize predicted vs actual values)
plt.figure(figsize=(10, 6))
plt.scatter(y, y_pred, color="b", label="Predictions vs Actual")
plt.plot([min(y), max(y)], [min(y), max(y)], color="r", label="Perfect Fit")
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Ridge Regression: Actual vs Predicted Values")
plt.legend()
plt.show()