|
|
@ -5,16 +5,27 @@ import pandas as pd
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import sklearn as sk
|
|
|
|
import sklearn as sk
|
|
|
|
|
|
|
|
|
|
|
|
from sklearn import svm
|
|
|
|
from sklearn.svm import LinearSVC
|
|
|
|
from sklearn.linear_model import SGDClassifier
|
|
|
|
from sklearn.linear_model import SGDClassifier
|
|
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
|
|
|
|
|
|
from sklearn.neighbors import NearestCentroid
|
|
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
|
|
|
|
|
|
from sklearn.neural_network import MLPClassifier
|
|
|
|
|
|
|
|
|
|
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
# from sklearn.externals.joblib import parallel_backend
|
|
|
|
# from sklearn.externals.joblib import parallel_backend
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from numpy import mean
|
|
|
|
|
|
|
|
from numpy import std
|
|
|
|
|
|
|
|
from sklearn.datasets import make_classification
|
|
|
|
|
|
|
|
from sklearn.model_selection import cross_val_score
|
|
|
|
|
|
|
|
from sklearn.model_selection import RepeatedStratifiedKFold
|
|
|
|
|
|
|
|
from sklearn.feature_selection import RFECV
|
|
|
|
|
|
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
|
|
|
|
|
|
from sklearn.pipeline import Pipeline
|
|
|
|
|
|
|
|
|
|
|
|
# main
|
|
|
|
# main
|
|
|
|
def main():
|
|
|
|
def main():
|
|
|
|
# User input
|
|
|
|
# User input
|
|
|
@ -25,6 +36,8 @@ def main():
|
|
|
|
df = read_dataset("data.csv")
|
|
|
|
df = read_dataset("data.csv")
|
|
|
|
x, y = get_xy_from_dataframe(df)
|
|
|
|
x, y = get_xy_from_dataframe(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# rfecv_test(x, y, RandomForestClassifier())
|
|
|
|
# Train model
|
|
|
|
# Train model
|
|
|
|
training(model, x, y)
|
|
|
|
training(model, x, y)
|
|
|
|
|
|
|
|
|
|
|
@ -42,25 +55,33 @@ def get_xy_from_dataframe(df):
|
|
|
|
# Ask for model choice
|
|
|
|
# Ask for model choice
|
|
|
|
def prompt_display():
|
|
|
|
def prompt_display():
|
|
|
|
print("""Choose a model:
|
|
|
|
print("""Choose a model:
|
|
|
|
|
|
|
|
(1) - KNN (auto)
|
|
|
|
(1) - KNN
|
|
|
|
(2) - KNN (ball_tree, n=5)
|
|
|
|
(2) - Tree
|
|
|
|
(3) - Tree
|
|
|
|
(3) - RandomForestClassifier
|
|
|
|
(4) - RandomForestClassifier
|
|
|
|
(4) - SGD
|
|
|
|
(5) - SGD
|
|
|
|
(5) - Linear SVC""")
|
|
|
|
(6) - Linear SVC
|
|
|
|
|
|
|
|
(7) - NearestCentroid
|
|
|
|
|
|
|
|
(8) - MLPClassifier""")
|
|
|
|
return int(input())
|
|
|
|
return int(input())
|
|
|
|
|
|
|
|
|
|
|
|
def model_switch(choice):
|
|
|
|
def model_switch(choice):
|
|
|
|
if (choice == 1):
|
|
|
|
if (choice == 1):
|
|
|
|
model = KNeighborsClassifier()
|
|
|
|
model = KNeighborsClassifier(algorithm="auto")
|
|
|
|
elif (choice == 2):
|
|
|
|
elif (choice == 2):
|
|
|
|
model = DecisionTreeClassifier(random_state=0, max_depth=20)
|
|
|
|
model = KNeighborsClassifier(n_neighbors=2, algorithm="ball_tree", weights="distance")
|
|
|
|
elif (choice == 3):
|
|
|
|
elif (choice == 3):
|
|
|
|
model = RandomForestClassifier(n_estimators=100 ,criterion='entropy', n_jobs=-1)
|
|
|
|
model = DecisionTreeClassifier(random_state=0, max_depth=20)
|
|
|
|
elif (choice == 4):
|
|
|
|
elif (choice == 4):
|
|
|
|
model = SGDClassifier(max_iter=1000, tol=0.01)
|
|
|
|
model = RandomForestClassifier(n_estimators=100 ,criterion='entropy', n_jobs=-1)
|
|
|
|
elif (choice == 5):
|
|
|
|
elif (choice == 5):
|
|
|
|
model = svm.SVC(kernel='linear', C = 1.0)
|
|
|
|
model = SGDClassifier(max_iter=1000, tol=0.01)
|
|
|
|
|
|
|
|
elif (choice == 6):
|
|
|
|
|
|
|
|
model = LinearSVC(C=1.0, dual=False, verbose=True, loss="squared_hinge", multi_class="crammer_singer")
|
|
|
|
|
|
|
|
elif (choice == 7):
|
|
|
|
|
|
|
|
model = NearestCentroid()
|
|
|
|
|
|
|
|
elif (choice == 8):
|
|
|
|
|
|
|
|
model = MLPClassifier(solver='adam', alpha=1e-5, random_state=1, activation="logistic", hidden_layer_sizes=(100,80,60,40,20,10,3))
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
raise Exception('RENTRE LE BON NOMBRE GROS CON')
|
|
|
|
raise Exception('RENTRE LE BON NOMBRE GROS CON')
|
|
|
|
|
|
|
|
|
|
|
@ -111,7 +132,7 @@ def training(model, x, y):
|
|
|
|
model.fit(Xtrain,ytrain)
|
|
|
|
model.fit(Xtrain,ytrain)
|
|
|
|
|
|
|
|
|
|
|
|
ypredit = model.predict(Xtest)
|
|
|
|
ypredit = model.predict(Xtest)
|
|
|
|
os.system("clear")
|
|
|
|
# os.system("clear")
|
|
|
|
res = -1
|
|
|
|
res = -1
|
|
|
|
while(res != 0):
|
|
|
|
while(res != 0):
|
|
|
|
print(" Rentre un chiffre:\n\n1 - Stats %\n2 - Stats raw\n3 - accuracy_score")
|
|
|
|
print(" Rentre un chiffre:\n\n1 - Stats %\n2 - Stats raw\n3 - accuracy_score")
|
|
|
@ -146,5 +167,20 @@ def showDate(df):
|
|
|
|
plt.figure(figsize = (8, 8))
|
|
|
|
plt.figure(figsize = (8, 8))
|
|
|
|
plt.pie(x, labels = ['GALAXY', 'QSO', 'Star'])
|
|
|
|
plt.pie(x, labels = ['GALAXY', 'QSO', 'Star'])
|
|
|
|
plt.legend()
|
|
|
|
plt.legend()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rfecv_test(x, y, model):
|
|
|
|
|
|
|
|
rfe = RFECV(estimator=model)
|
|
|
|
|
|
|
|
pipeline = Pipeline(steps=[('s',rfe),('m',model)])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# evaluate model
|
|
|
|
|
|
|
|
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
|
|
|
|
|
|
|
|
n_scores = cross_val_score(pipeline, x, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise', verbose=3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# report performance
|
|
|
|
|
|
|
|
print('Accuracy: %.3f (%.3f)' % (max(n_scores), std(n_scores)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rfe.fit(x,y)
|
|
|
|
|
|
|
|
for i in range(x.shape[1]):
|
|
|
|
|
|
|
|
print('Column: %d, Selected %s, Rank: %.3f' % (i, rfe.support_[i], rfe.ranking_[i]))
|
|
|
|
|
|
|
|
|
|
|
|
main()
|
|
|
|
main()
|
|
|
|