diff --git a/README.md b/README.md
index 5ce1ba8..37dc186 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # Plotabit
 Dataset link: https://www.kaggle.com/datasets/fedesoriano/stellar-classification-dataset-sdss17
 Deep Learning: https://machinelearningmastery.com/tutorial-first-neural-network-python-keras/
+Python Machine Learning: https://machinelearningmastery.com/machine-learning-in-python-step-by-step/
 
 ## Columns
 
@@ -20,3 +21,4 @@ Deep Learning: https://machinelearningmastery.com/tutorial-first-neural-network-
 - [ ] Train models with just 20000 "GALAXY" class (has an impact?)
 - [ ] Which model is the best, ratio learn_time/precision
 - [ ] Can we drop more categories and have same results (useless data?)
+- [ ] Compare prediction with y_test that were false
diff --git a/src/main.py b/src/main.py
index 85d5b75..1b46856 100755
--- a/src/main.py
+++ b/src/main.py
@@ -4,37 +4,52 @@ import pandas as pd
 import matplotlib.pyplot as plt 
 import sklearn as sk
 
-# Classification 
-
-## KNN
+from sklearn import svm
+from sklearn.linear_model import SGDClassifier
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import RandomForestClassifier
 
 from sklearn.metrics import accuracy_score
-from sklearn.neighbors import KNeighborsClassifier
 from sklearn.model_selection import train_test_split
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier
 
-#charger les données
-df=pd.read_csv('../data.csv')
 
-# Clear datas
-# alpha delta u g r i z redshift spec_OBJ_ID 
-# Y : class 
-x = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1)
-y = df['class'].values
+# Open dataset with panda
+def read_dataset(filename):
+    df = pd.read_csv(filename)
+    return df
+
+# Drop useless columns and return x and y
+def get_xy_from_dataset(filename):
+    df = read_dataset(filename)
+    x = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1)
+    y = df['class'].values
+    return x, y 
+    
+x, y = get_xy_from_dataset("data.csv")
 
 
 x.hist()
-plt.show()
+#plt.show()
 
-print(" Rentre un chiffre:\n\n1 - KNN\n2 - Tree\n3- RandomForestClassifier")
+print("""Choose a model:
+(1) - KNN
+(2) - Tree
+(3) - RandomForestClassifier
+(4) - SGD
+(5) - Linear SVC""")
 res = int(input())
-if(res == 1):
+
+if (res == 1):
     model = KNeighborsClassifier()
-elif(res == 2):
+elif (res == 2):
     model = DecisionTreeClassifier(random_state=0, max_depth=20)
-elif(res == 3):
+elif (res == 3):
     model = RandomForestClassifier(n_estimators=100 ,criterion='entropy')
+elif (res == 4):
+    model = SGDClassifier(max_iter=1000, tol=0.01)
+elif (res == 5):
+    model = svm.SVC(kernel='linear', C = 1.0)
 else:
     raise Exception('RENTRE LE BON NOMBRE GROS CON')