Commit 5d6c4435 authored by Antoine Rollet's avatar Antoine Rollet

Upload New File

parent 47ef245d
Pipeline #200 failed with stages
import numpy as np
import sklearn as skl
import pandas as pd
import matplotlib.pyplot as plt
## Création de la classe perceptron
class Perceptron:
def __init__(self,dimension,max_iter,learning_rate=0.1):
self.dim = dimension
self.max_iter =max_iter
self.learning_rate=learning_rate
self.vect_w = np.array([np.random.random() for i in range(dimension)])
self.w0=np.random.random()
def fit(self, x_train, y_train):
for i in range(self.max_iter):
j=np.random.randint(len(x_train))
X,y = x_train[j],y_train[j]
# print(y)
pred=sum(X*self.vect_w)+self.w0
if pred > 0 :
pred_y=1
else:
pred_y=-1
if pred_y != y:
# print(i,self.vect_w,self.w0,y,X)
self.vect_w+=self.learning_rate*y*X
self.w0 += self.learning_rate*y
# print(self.vect_w,self.w0)
# print("Mauvaise prédiction")
# print(self.vect_w,self.w0)
def predict(self,x_predict):
y_predicted = np.array([0 for t in x_predict])
for i in range(len(x_predict)):
if sum(x_predict[i]*self.vect_w) + self.w0 >=0:
y_predicted[i]=1
else:
y_predicted[i]=-1
return y_predicted
def test(self,x_test,y_test):
y_predicted=self.predict(x_test)
error_rate = sum(np.abs(y_predicted-y_test)/2)/len(y_test)
print("La précision du modèle est de ",(1-error_rate)*100," %.")
return 1-error_rate
## Création de la classe Kmeans
class Kmeans:
def __init__(self, dimension, max_iter, n_clusters):
self.dim = dimension
self.max_iter = max_iter
self.n_clusters = n_clusters
self.representants = np.array([(np.random.random(), np.random.random()) for i in range(n_clusters)])
print(self.representants)
def fit(self, x_train):
x_min=0
x_max=0
#Comment choisir le x_min x_max ?
x_mins_maxs=np.array([np.array([min(x_train[:,i]),max(x_train[:,i])]) for i in range(len(x_train[0,:]))])
self.representants = x_mins_maxs.dot(np.random.rand(2,self.n_clusters)).transpose()
affectations=np.array([0 for x in x_train])
for j in range(self.max_iter):
for i in range(len(x_train)):
#ici on affecte chaque echantillon à son représentant le plus proche
X=x_train[i]
distance_des_representants = [ np.sqrt((X[0]-R[0])**2 + (X[1]-R[1])**2) for R in self.representants ]
affect=distance_des_representants.index(min(distance_des_representants))
affectations[i]=affect
for i in range(len(self.representants)):
# ici on met à jour les coordonnées des représentats
groupe=np.array([x_train[j] for j in range(len(x_train)) if affectations[j] == i])
if len(groupe)!=0:
# print(groupe)
self.representants[i]=np.array([np.mean(groupe[:,s]) for s in range(self.dim)])
else:
alea=np.random.randint(len(x_train))
self.representants[i]=x_train[alea]
#Affichage de l'évolution des représentants
plt.scatter(self.representants[i][0],self.representants[i][1],c="grey")
# print(affectations,self.representants)
return affectations
# def get_data_clusters(self,x_train):
#
# affectations=np.array([-1 for e in x_train[:,0]])
# for i in range(len(x_train)):
#
# #ici on affecte chaque echantillon à son représentant le plus proche
#
# X=x_train[i]
# print((X[0])**2 + (X[1]**2))
#
# distance_des_representants = [ np.sqrt((X[0]-R[0])**2 + (X[1]-R[1])**2) for R in self.representants ]
#
# affect=distance_des_representants.index(min(distance_des_representants))
# affectations[i]=affect
# return affectations
## Données iris
df = pd.read_csv("C:/Users/Antoine Rollet/Documents/FISE_2021_L3/Data/iris.csv")
X_data=df.iloc[0:100,[0,2]].values
y_data=df.iloc[0:100,4].values
y_data=np.where(y_data=="Iris-setosa",-1,1)
X_test=df.iloc[101:149,[0,2]].values
y_test=df.iloc[101:149,4].values
y_test=np.where(y_test=="Iris-setosa",-1,1)
# print(X_data,y_data)
def affichage_donnees(X_data,y_data,c1="blue",c2="red"):
for i in range(len(X_data)):
x = X_data[i]
y = y_data[i]
if y == 1:
c=c1
else:
c=c2
plt.scatter(x[0],x[1],c=c)
plt.show()
affichage_donnees(X_data,y_data)
affichage_donnees(X_test,y_test,c1="green",c2="orange")
## Test
per=Perceptron(dimension=2,max_iter=100)
per.fit(X_data,y_data)
h=np.linspace(4,7,100)
y=[(per.vect_w[0]*x+per.w0)/(-per.vect_w[1]) for x in h]
plt.plot(h,y)
plt.show()
per.test(X_test,y_test)
## TestK_means
K1=Kmeans(dimension=2,max_iter=10,n_clusters=3)
X_data=df.iloc[0:149,[0,2]].values
y_data=df.iloc[0:149,4].values
affectations=K1.fit(X_data)
couleurs=["red","blue","green"]
for i in range(len(X_data[:,0])):
plt.scatter(X_data[i][0],X_data[i][1],c=couleurs[affectations[i]])
for e in K1.representants:
plt.scatter(e[0],e[1],c="black")
print(K1.representants)
plt.show()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment