rewrite + conflict

master
remrem 1 year ago
commit 3882a5f0b3

@ -1,4 +1,5 @@
#!/usr/bin/python3
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
@ -12,7 +13,20 @@ from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
# from sklearn.externals.joblib import parallel_backend
# main
def main():
# User input
opt = prompt_display()
model = model_switch(opt)
# Get interesting data
df = read_dataset("data.csv")
x, y = get_xy_from_dataframe(df)
# Train model
training(model, x, y)
# Open dataset with panda
def read_dataset(filename):
@ -20,54 +34,101 @@ def read_dataset(filename):
return df
# Drop useless columns and return x and y
def get_xy_from_dataset(filename):
df = read_dataset(filename)
def get_xy_from_dataframe(df):
x = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1)
y = df['class'].values
return x, y
x, y = get_xy_from_dataset("data.csv")
x.hist()
#plt.show()
# Ask for model choice
def prompt_display():
print("""Choose a model:
print("""Choose a model:
(1) - KNN
(2) - Tree
(3) - RandomForestClassifier
(4) - SGD
(5) - Linear SVC""")
res = int(input())
if (res == 1):
model = KNeighborsClassifier()
elif (res == 2):
model = DecisionTreeClassifier(random_state=0, max_depth=20)
elif (res == 3):
model = RandomForestClassifier(n_estimators=100 ,criterion='entropy')
elif (res == 4):
model = SGDClassifier(max_iter=1000, tol=0.01)
elif (res == 5):
model = svm.SVC(kernel='linear', C = 1.0)
else:
raise Exception('RENTRE LE BON NOMBRE GROS CON')
Xtrain, Xtest, ytrain, ytest = train_test_split(x, y,test_size=0.25, random_state=0)
Xtrain = Xtrain.values
Xtest = Xtest.values
if len(Xtrain.shape) < 2:
Xtrain = Xtrain.reshape(-1, 1)
if len(Xtest.shape) < 2:
Xtest = Xtest.reshape(-1, 1)
model.fit(Xtrain,ytrain)
ypredit = model.predict(Xtest)
# print(ypredit)
print(accuracy_score(ytest, ypredit))
return int(input())
def model_switch(choice):
if (choice == 1):
model = KNeighborsClassifier()
elif (choice == 2):
model = DecisionTreeClassifier(random_state=0, max_depth=20)
elif (choice == 3):
model = RandomForestClassifier(n_estimators=100 ,criterion='entropy')
elif (choice == 4):
model = SGDClassifier(max_iter=1000, tol=0.01)
elif (choice == 5):
model = svm.SVC(kernel='linear', C = 1.0)
else:
raise Exception('RENTRE LE BON NOMBRE GROS CON')
return model
def plot_columns_hist(columns):
x.hist()
plt.show()
def printPredictedValues(ypredit,ytest):
for i in range(0,len(ypredit)):
print("✅ Prédit/Réel: ",ypredit[i],ytest[i]) if ypredit[i]==ytest[i] else print("🔴 Prédit/Réel: ",ypredit[i], ytest[i])
def printStatValues(ypredit,ytest):
galaxyStats = 0
starStats = 0
QSOStats = 0
N = len(ypredit)
NF = 0
for i in range(0,N):
if ypredit[i] != ytest[i]:
NF +=1
if ypredit[i] == "GALAXY":
galaxyStats+=1
elif ypredit[i] == "QSO":
QSOStats+=1
elif ypredit[i]=="STAR":
starStats+=1
print("Répartition des prédiction fausses : ")
print("Galaxy : ",(galaxyStats*100/NF),"%","Star :",(starStats*100/NF),"%","QSO : ",(QSOStats*100/NF),"%")
# Train model
def training(model, x, y):
Xtrain, Xtest, ytrain, ytest = train_test_split(x, y,test_size=0.25, random_state=0)
Xtrain = Xtrain.values
Xtest = Xtest.values
if len(Xtrain.shape) < 2:
Xtrain = Xtrain.reshape(-1, 1)
if len(Xtest.shape) < 2:
Xtest = Xtest.reshape(-1, 1)
# if isinstance(model, svm.LinearSVC):
# with parallel_backend('threading', n_jobs=-1):
# model.fit(X_train, y_train)
#else:
model.fit(Xtrain,ytrain)
ypredit = model.predict(Xtest)
os.system("clear")
res = int(input())
while(res != 0):
print(" Rentre un chiffre:\n\n1 - Stats %\n2 - Stats raw\n3 - accuracy_score")
print("0 - QUIT")
res = int(input())
if(res == 1):
os.system("clear")
printStatValues(ypredit,ytest)
elif(res == 2):
os.system("clear")
printPredictedValues(ypredit,ytest)
elif res == 3:
os.system("clear")
print(accuracy_score(ytest, ypredit))
elif res == 0:
break
else:
raise Exception('Mauvaise saisie')
main()

Loading…
Cancel
Save