rewrite + conflict

2 years ago · 3882a5f0b3
parent fc899e7bee 7ccbb3df5d
commit 3882a5f0b3
1 changed files with 101 additions and 40 deletions
--- a/src/main.py
+++ b/src/main.py
@ -1,4 +1,5 @@
 #!/usr/bin/python3
+import os
 import numpy as np
 import pandas as pd 
 import matplotlib.pyplot as plt 
@ -12,7 +13,20 @@ from sklearn.ensemble import RandomForestClassifier

 from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
+# from sklearn.externals.joblib import parallel_backend

+# main
+def main():
+    # User input
+    opt = prompt_display()
+    model = model_switch(opt)
+
+    # Get interesting data
+    df = read_dataset("data.csv")
+    x, y = get_xy_from_dataframe(df)
+
+    # Train model
+    training(model, x, y)

 # Open dataset with panda
 def read_dataset(filename):
@ -20,54 +34,101 @@ def read_dataset(filename):
    return df

 # Drop useless columns and return x and y
-def get_xy_from_dataset(filename):
-    df = read_dataset(filename)
+def get_xy_from_dataframe(df):
    x = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1)
    y = df['class'].values
    return x, y 

-x, y = get_xy_from_dataset("data.csv")
-
-
-x.hist()
-#plt.show()
+# Ask for model choice
+def prompt_display():
+    print("""Choose a model:

-print("""Choose a model:
 (1) - KNN
 (2) - Tree
 (3) - RandomForestClassifier
 (4) - SGD
 (5) - Linear SVC""")
-res = int(input())
-
-if (res == 1):
-    model = KNeighborsClassifier()
-elif (res == 2):
-    model = DecisionTreeClassifier(random_state=0, max_depth=20)
-elif (res == 3):
-    model = RandomForestClassifier(n_estimators=100 ,criterion='entropy')
-elif (res == 4):
-    model = SGDClassifier(max_iter=1000, tol=0.01)
-elif (res == 5):
-    model = svm.SVC(kernel='linear', C = 1.0)
-else:
-    raise Exception('RENTRE LE BON NOMBRE GROS CON')
-
-
-
-
-Xtrain, Xtest, ytrain, ytest = train_test_split(x, y,test_size=0.25, random_state=0)
-
-Xtrain = Xtrain.values
-Xtest = Xtest.values
-
-if len(Xtrain.shape) < 2:
-    Xtrain = Xtrain.reshape(-1, 1)
-    
-if len(Xtest.shape) < 2:
-    Xtest = Xtest.reshape(-1, 1)
-model.fit(Xtrain,ytrain)
-
-ypredit = model.predict(Xtest)
-# print(ypredit)
-print(accuracy_score(ytest, ypredit))
+    return int(input())
+
+def model_switch(choice):
+    if (choice == 1):
+        model = KNeighborsClassifier()
+    elif (choice == 2):
+        model = DecisionTreeClassifier(random_state=0, max_depth=20)
+    elif (choice == 3):
+        model = RandomForestClassifier(n_estimators=100 ,criterion='entropy')
+    elif (choice == 4):
+        model = SGDClassifier(max_iter=1000, tol=0.01)
+    elif (choice == 5):
+        model = svm.SVC(kernel='linear', C = 1.0)    
+    else:
+        raise Exception('RENTRE LE BON NOMBRE GROS CON')       
+    
+    return model
+
+def plot_columns_hist(columns):
+    x.hist()
+    plt.show()
+    
+def printPredictedValues(ypredit,ytest):
+    for i in range(0,len(ypredit)):
+        print("✅ Prédit/Réel: ",ypredit[i],ytest[i]) if ypredit[i]==ytest[i] else print("🔴 Prédit/Réel: ",ypredit[i], ytest[i])
+
+def printStatValues(ypredit,ytest):
+    galaxyStats = 0 
+    starStats = 0 
+    QSOStats = 0 
+    N = len(ypredit)
+    NF = 0
+    for i in range(0,N):
+        if ypredit[i] != ytest[i]:
+            NF +=1
+            if ypredit[i] == "GALAXY":
+                galaxyStats+=1
+            elif ypredit[i] == "QSO":
+                QSOStats+=1 
+            elif ypredit[i]=="STAR":
+                starStats+=1 
+    print("Répartition des prédiction fausses : ")
+    print("Galaxy : ",(galaxyStats*100/NF),"%","Star :",(starStats*100/NF),"%","QSO : ",(QSOStats*100/NF),"%")
+
+# Train model
+def training(model, x, y):
+    Xtrain, Xtest, ytrain, ytest = train_test_split(x, y,test_size=0.25, random_state=0)
+    Xtrain = Xtrain.values
+    Xtest = Xtest.values
+    
+    if len(Xtrain.shape) < 2:
+        Xtrain = Xtrain.reshape(-1, 1)
+    if len(Xtest.shape) < 2:
+        Xtest = Xtest.reshape(-1, 1)
+
+    # if isinstance(model, svm.LinearSVC):
+    #     with parallel_backend('threading', n_jobs=-1):
+    #         model.fit(X_train, y_train)
+    
+    #else: 
+    model.fit(Xtrain,ytrain)
+    
+    ypredit = model.predict(Xtest)
+    os.system("clear")
+    res = int(input())
+    while(res != 0):
+        print(" Rentre un chiffre:\n\n1 - Stats %\n2 - Stats raw\n3 - accuracy_score")
+        print("0 - QUIT")
+        res = int(input())
+        if(res == 1):
+            os.system("clear")
+            printStatValues(ypredit,ytest)
+        elif(res == 2):
+            os.system("clear")
+            printPredictedValues(ypredit,ytest)
+        elif res == 3:
+            os.system("clear")
+            print(accuracy_score(ytest, ypredit))
+        elif res == 0:
+            break
+        else:
+            raise Exception('Mauvaise saisie')
+
+main()