Commit bb45d191 authored by Antoine Rollet's avatar Antoine Rollet

Upload New File

parent 0e496a8d
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import scipy.cluster.hierarchy as hrch
import matplotlib.pyplot as plt
import sklearn.metrics as metr
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
df=pd.read_csv("C:/Users/Antoine/Downloads/hotels.csv")
#Corrélation des données
print(df.iloc[:,2:].corr())
#On centre et réduit les données
SS= StandardScaler()
df_centre_reduit=SS.fit_transform(df[list(df)[2:]])
#mean~=0 et std=1 Tout est ok
print(np.mean(df_centre_reduit,axis=0))
print(np.std(df_centre_reduit,axis=0))
# Arborescence hiérarchique + affichage de l'arbre
L=hrch.linkage(df_centre_reduit)
# hrch.dendrogram(L)
# plt.show()
# FC Contiendra les clusters
FC=hrch.fcluster(L,2.1,criterion="distance")
print(metr.silhouette_score(df_centre_reduit,FC))
#PCA pour visualisation
my_PCA=PCA()
pca_df=my_PCA.fit_transform(df_centre_reduit)
#Couleurs pour chaque cluster
couleurs=["red","blue","green","yellow","orange","purple","black","gray","rose"]
c=[couleurs[i-1] for i in FC]
plt.scatter(pca_df[:,0],pca_df[:,1],c=c)
# plt.show()
## KMeans n_cluster à changer
n_cluster=5
my_kmeans=KMeans(n_cluster)
my_kmeans.fit_transform(df_centre_reduit)
c=[couleurs[i] for i in my_kmeans.labels_]
plt.scatter(pca_df[:,0],pca_df[:,1],c=c)
# plt.show()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment