added linear regression

computed data visualization for linear regression

added linear regression
computed data visualization for linear regression
f63fea2a · Armand S · c8a9fee8 · f63fea2a
Commit f63fea2a authored Apr 28, 2021 by Armand S
Hide whitespace changes
Inline Side-by-side

Showing with 71 additions and 4 deletions

assignment-1.py assigment 1/assignment-1.py +71 -4

No files found.
--- a/assigment 1/assignment-1.py
+++ b/assigment 1/assignment-1.py
+# -----------------------------------------------------------
+# CS229: Machine Learnig Assigment 1 
+#
+# author: Armand Sumo
+# 
+# email: armandsumo@gmail.com
+# -----------------------------------------------------------
 import numpy as np
 import matplotlib.pyplot as plt
+"""
+Part 1: Logistic Regression
+"""
 x = np.loadtxt(open("data/logistic_x.txt","r")) #input array
 y = np.loadtxt(open("data/logistic_y.txt","r")) #target array
@@ -50,11 +59,12 @@ def newton(theta,x,y,tolerance=0.01,itercount=False):
        print('Number of loop iterations:',niter)
    return theta
 theta = newton(theta,x,y)
-print("parameter vector theta:",theta)
+print("parameter vector theta for logistic regression:",theta)
 #Plot Training data and decision boundary fit by logistic regression
 #plt.title("Training data and decision boundary fit by logistic regression")
+plt.figure(dpi=200)
 plus = np.where(y>0)[0]
 minus = np.where(y<0)[0]
 plt.scatter(x[plus,1],x[plus,2],color = "r", marker="+",label="y= 1")
@@ -73,4 +83,61 @@ min2, max2 = x[:, 2].min()-1, x[:, 2].max()+1
 xbound = np.arange(min1,max1,0.1)
 ybound = boundary(theta,xbound)
 plt.plot(xbound,ybound,label="decision boundary")
 plt.legend()
\ No newline at end of file
+"""
+Part 5: Regression for denoising quasar spectra
+"""
+# import numpy as np
+# import matplotlib.pyplot as plt
+import pandas as pd
+# load quasar data for first training example 
+data = pd.read_csv("D:/stanford-cs229/assigment 1/data/quasar_train.csv", nrows=2,header=None).to_numpy()
+x1 = np.expand_dims(data[0],axis=1)
+y1 = np.expand_dims(data[1],axis=1)
+m1 = x1.shape[0]                                  #number of training examples
+x1 = np.c_[np.ones(m1),x1]                        #append intercept term to x
+theta1 = np.zeros((x1.shape[1],1))                #theta: parameter array
+#first we do non-weighted linear regression
+#the value of theta that minimizes our cost function is given by:
+theta1 = np.dot(np.linalg.inv(x1.T@x1),x1.T@y1)
+print("parameter vector theta for unweighted linear regression:",theta1)
+#the straight line fit by non-weighted linear regression is given by:
+y_nw = np.dot(x1,theta1)
+#plot non-weighted linear regression raw and predicted output values
+plt.figure(dpi=200)
+plt.xlabel("Wavelength λ")
+plt.ylabel("Flux")
+plt.plot(x1[:,1],y1,color="c",label="raw y")
+plt.plot(x1[:,1],y_nw,color="b",label="fit y",linewidth=1.2)
+plt.legend()
+#weighted linear regression
+tau = [1,5,10,100,1000]                     #bandwidth parameters
+y_w = np.zeros((len(tau),x1.shape[0]))
+for i in range(len(tau)):
+    for j in range(x1.shape[0]):
+        w_j = np.exp(-(x1[j,1]-x1[:,1])**2/(2*(tau[i])**2))
+        W  = np.diag(w_j)
+        theta2 = np.linalg.inv(np.dot(x1.T,W).dot(x1)).dot(np.dot(x1.T,W).dot(y1))
+        y_w[i,j] = np.squeeze(np.dot(theta2.T,x1[j,:]))
+for i in range(len(tau)):
+    plt.figure(dpi=200)
+    plt.plot(x1[:,1],y1,color="c",label="raw y")
+    plt.plot(x1[:,1],y_w[i,:],color="b",label="fit y with τ="+str(tau[i]),linewidth=1.2)
+    plt.xlabel("Wavelength λ")
+    plt.ylabel("Flux")
+    plt.legend()
+plt.show()