Commit f63fea2a authored by Armand S's avatar Armand S

added linear regression

computed data visualization for linear regression
parent c8a9fee8
# -----------------------------------------------------------
# CS229: Machine Learnig Assigment 1
#
# author: Armand Sumo
#
# email: armandsumo@gmail.com
# -----------------------------------------------------------
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
"""
Part 1: Logistic Regression
"""
x = np.loadtxt(open("data/logistic_x.txt","r")) #input array x = np.loadtxt(open("data/logistic_x.txt","r")) #input array
y = np.loadtxt(open("data/logistic_y.txt","r")) #target array y = np.loadtxt(open("data/logistic_y.txt","r")) #target array
...@@ -50,11 +59,12 @@ def newton(theta,x,y,tolerance=0.01,itercount=False): ...@@ -50,11 +59,12 @@ def newton(theta,x,y,tolerance=0.01,itercount=False):
print('Number of loop iterations:',niter) print('Number of loop iterations:',niter)
return theta return theta
theta = newton(theta,x,y) theta = newton(theta,x,y)
print("parameter vector theta:",theta) print("parameter vector theta for logistic regression:",theta)
#Plot Training data and decision boundary fit by logistic regression #Plot Training data and decision boundary fit by logistic regression
#plt.title("Training data and decision boundary fit by logistic regression") #plt.title("Training data and decision boundary fit by logistic regression")
plt.figure(dpi=200)
plus = np.where(y>0)[0] plus = np.where(y>0)[0]
minus = np.where(y<0)[0] minus = np.where(y<0)[0]
plt.scatter(x[plus,1],x[plus,2],color = "r", marker="+",label="y= 1") plt.scatter(x[plus,1],x[plus,2],color = "r", marker="+",label="y= 1")
...@@ -73,4 +83,61 @@ min2, max2 = x[:, 2].min()-1, x[:, 2].max()+1 ...@@ -73,4 +83,61 @@ min2, max2 = x[:, 2].min()-1, x[:, 2].max()+1
xbound = np.arange(min1,max1,0.1) xbound = np.arange(min1,max1,0.1)
ybound = boundary(theta,xbound) ybound = boundary(theta,xbound)
plt.plot(xbound,ybound,label="decision boundary") plt.plot(xbound,ybound,label="decision boundary")
plt.legend() plt.legend()
\ No newline at end of file
"""
Part 5: Regression for denoising quasar spectra
"""
# import numpy as np
# import matplotlib.pyplot as plt
import pandas as pd
# load quasar data for first training example
data = pd.read_csv("D:/stanford-cs229/assigment 1/data/quasar_train.csv", nrows=2,header=None).to_numpy()
x1 = np.expand_dims(data[0],axis=1)
y1 = np.expand_dims(data[1],axis=1)
m1 = x1.shape[0] #number of training examples
x1 = np.c_[np.ones(m1),x1] #append intercept term to x
theta1 = np.zeros((x1.shape[1],1)) #theta: parameter array
#first we do non-weighted linear regression
#the value of theta that minimizes our cost function is given by:
theta1 = np.dot(np.linalg.inv(x1.T@x1),x1.T@y1)
print("parameter vector theta for unweighted linear regression:",theta1)
#the straight line fit by non-weighted linear regression is given by:
y_nw = np.dot(x1,theta1)
#plot non-weighted linear regression raw and predicted output values
plt.figure(dpi=200)
plt.xlabel("Wavelength λ")
plt.ylabel("Flux")
plt.plot(x1[:,1],y1,color="c",label="raw y")
plt.plot(x1[:,1],y_nw,color="b",label="fit y",linewidth=1.2)
plt.legend()
#weighted linear regression
tau = [1,5,10,100,1000] #bandwidth parameters
y_w = np.zeros((len(tau),x1.shape[0]))
for i in range(len(tau)):
for j in range(x1.shape[0]):
w_j = np.exp(-(x1[j,1]-x1[:,1])**2/(2*(tau[i])**2))
W = np.diag(w_j)
theta2 = np.linalg.inv(np.dot(x1.T,W).dot(x1)).dot(np.dot(x1.T,W).dot(y1))
y_w[i,j] = np.squeeze(np.dot(theta2.T,x1[j,:]))
for i in range(len(tau)):
plt.figure(dpi=200)
plt.plot(x1[:,1],y1,color="c",label="raw y")
plt.plot(x1[:,1],y_w[i,:],color="b",label="fit y with τ="+str(tau[i]),linewidth=1.2)
plt.xlabel("Wavelength λ")
plt.ylabel("Flux")
plt.legend()
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment