|
|
@ -36,7 +36,6 @@ def main():
|
|
|
|
df = read_dataset("data.csv")
|
|
|
|
df = read_dataset("data.csv")
|
|
|
|
x, y = get_xy_from_dataframe(df)
|
|
|
|
x, y = get_xy_from_dataframe(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# rfecv_test(x, y, RandomForestClassifier())
|
|
|
|
# rfecv_test(x, y, RandomForestClassifier())
|
|
|
|
# Train model
|
|
|
|
# Train model
|
|
|
|
training(model, x, y)
|
|
|
|
training(model, x, y)
|
|
|
@ -83,7 +82,7 @@ def model_switch(choice):
|
|
|
|
elif (choice == 8):
|
|
|
|
elif (choice == 8):
|
|
|
|
model = MLPClassifier(solver='adam', alpha=1e-5, random_state=1, activation="logistic", hidden_layer_sizes=(100,80,60,40,20,10,3))
|
|
|
|
model = MLPClassifier(solver='adam', alpha=1e-5, random_state=1, activation="logistic", hidden_layer_sizes=(100,80,60,40,20,10,3))
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
raise Exception('RENTRE LE BON NOMBRE GROS CON')
|
|
|
|
raise Exception('Wrong entry')
|
|
|
|
|
|
|
|
|
|
|
|
return model
|
|
|
|
return model
|
|
|
|
|
|
|
|
|
|
|
@ -123,12 +122,7 @@ def training(model, x, y):
|
|
|
|
Xtrain = Xtrain.reshape(-1, 1)
|
|
|
|
Xtrain = Xtrain.reshape(-1, 1)
|
|
|
|
if len(Xtest.shape) < 2:
|
|
|
|
if len(Xtest.shape) < 2:
|
|
|
|
Xtest = Xtest.reshape(-1, 1)
|
|
|
|
Xtest = Xtest.reshape(-1, 1)
|
|
|
|
|
|
|
|
|
|
|
|
# if isinstance(model, svm.LinearSVC):
|
|
|
|
|
|
|
|
# with parallel_backend('threading', n_jobs=-1):
|
|
|
|
|
|
|
|
# model.fit(X_train, y_train)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#else:
|
|
|
|
|
|
|
|
model.fit(Xtrain,ytrain)
|
|
|
|
model.fit(Xtrain,ytrain)
|
|
|
|
|
|
|
|
|
|
|
|
ypredit = model.predict(Xtest)
|
|
|
|
ypredit = model.predict(Xtest)
|
|
|
@ -150,18 +144,17 @@ def training(model, x, y):
|
|
|
|
elif res == 0:
|
|
|
|
elif res == 0:
|
|
|
|
break
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
raise Exception('Mauvaise saisie')
|
|
|
|
raise Exception('Wrong entry')
|
|
|
|
|
|
|
|
|
|
|
|
def clearData(df):
|
|
|
|
def clearData(df):
|
|
|
|
res = df["class"].value_counts()
|
|
|
|
res = df["class"].value_counts()
|
|
|
|
dtemp = df.sort_values(by=['class'])
|
|
|
|
dtemp = df.sort_values(by=['class'])
|
|
|
|
supr = int(res["GALAXY"]/1.5)
|
|
|
|
supr = int(res["GALAXY"]/1.5)
|
|
|
|
|
|
|
|
|
|
|
|
dtemp.drop(dtemp.index[range(1,supr)])
|
|
|
|
dtemp.drop(dtemp.index[range(1,supr)])
|
|
|
|
dtemp = dtemp.iloc[34000:]
|
|
|
|
dtemp = dtemp.iloc[34000:]
|
|
|
|
return dtemp
|
|
|
|
return dtemp
|
|
|
|
|
|
|
|
|
|
|
|
def showDate(df):
|
|
|
|
def showData(df):
|
|
|
|
res = df["class"].value_counts()
|
|
|
|
res = df["class"].value_counts()
|
|
|
|
x = [res["GALAXY"],res["QSO"],res["STAR"]]
|
|
|
|
x = [res["GALAXY"],res["QSO"],res["STAR"]]
|
|
|
|
plt.figure(figsize = (8, 8))
|
|
|
|
plt.figure(figsize = (8, 8))
|
|
|
@ -183,4 +176,65 @@ def rfecv_test(x, y, model):
|
|
|
|
for i in range(x.shape[1]):
|
|
|
|
for i in range(x.shape[1]):
|
|
|
|
print('Column: %d, Selected %s, Rank: %.3f' % (i, rfe.support_[i], rfe.ranking_[i]))
|
|
|
|
print('Column: %d, Selected %s, Rank: %.3f' % (i, rfe.support_[i], rfe.ranking_[i]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def allModels(df):
|
|
|
|
|
|
|
|
dfClone = df.copy()
|
|
|
|
|
|
|
|
# Aditionnale model randomforestclassifier(n_estimators=100 ,criterion='entropy', n_jobs=-1)
|
|
|
|
|
|
|
|
modelArray= ['KNN','Classifier']
|
|
|
|
|
|
|
|
dfTemp = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1)
|
|
|
|
|
|
|
|
y = df['class'].values
|
|
|
|
|
|
|
|
x = list(dfTemp.columns.values)
|
|
|
|
|
|
|
|
datas = []
|
|
|
|
|
|
|
|
for i in range(0,len(x)):
|
|
|
|
|
|
|
|
arrayColumns = [x[i]]
|
|
|
|
|
|
|
|
for j in range(i+1,len(x)):
|
|
|
|
|
|
|
|
xValues = dfTemp[arrayColumns]
|
|
|
|
|
|
|
|
for k in range(0,len(modelArray)):
|
|
|
|
|
|
|
|
if modelArray[k] == "KNN":
|
|
|
|
|
|
|
|
model = model_switch(1)
|
|
|
|
|
|
|
|
elif modelArray[k] == "Classifier":
|
|
|
|
|
|
|
|
model = model_switch(2)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
model = model_switch(1)
|
|
|
|
|
|
|
|
print("Model used : ",modelArray[k], "---- Case : ",model)
|
|
|
|
|
|
|
|
print("X values used : ",arrayColumns)
|
|
|
|
|
|
|
|
accu = customTrainingRaw(model,xValues,y,3)
|
|
|
|
|
|
|
|
it = [modelArray[k],arrayColumns,accu]
|
|
|
|
|
|
|
|
datas.append(it)
|
|
|
|
|
|
|
|
arrayColumns.append(x[j])
|
|
|
|
|
|
|
|
return datas
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def customTrainingRaw(model, x, y,res=-1):
|
|
|
|
|
|
|
|
Xtrain, Xtest, ytrain, ytest = train_test_split(x, y,test_size=0.25, random_state=0)
|
|
|
|
|
|
|
|
Xtrain = Xtrain.values
|
|
|
|
|
|
|
|
Xtest = Xtest.values
|
|
|
|
|
|
|
|
if len(Xtrain.shape) < 2:
|
|
|
|
|
|
|
|
Xtrain = Xtrain.reshape(-1, 1)
|
|
|
|
|
|
|
|
if len(Xtest.shape) < 2:
|
|
|
|
|
|
|
|
Xtest = Xtest.reshape(-1, 1)
|
|
|
|
|
|
|
|
model.fit(Xtrain,ytrain)
|
|
|
|
|
|
|
|
ypredit = model.predict(Xtest)
|
|
|
|
|
|
|
|
print(accuracy_score(ytest, ypredit))
|
|
|
|
|
|
|
|
return accuracy_score(ytest, ypredit)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def bestModelFinder(datas):
|
|
|
|
|
|
|
|
maxi = 0
|
|
|
|
|
|
|
|
knnMean= 0
|
|
|
|
|
|
|
|
treeMean= 0
|
|
|
|
|
|
|
|
for i in range(0,len(datas)):
|
|
|
|
|
|
|
|
if datas[i][0] == 'KNN':
|
|
|
|
|
|
|
|
knnMean += datas[i][2]
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
treeMean += datas[i][2]
|
|
|
|
|
|
|
|
if (datas[i][2] > maxi):
|
|
|
|
|
|
|
|
maxi = datas[i][2]
|
|
|
|
|
|
|
|
res = datas[i]
|
|
|
|
|
|
|
|
print("BEST CHOICE IS :", res)
|
|
|
|
|
|
|
|
print("Knn mean accuracy_score : ", mean(knnMean))
|
|
|
|
|
|
|
|
print("Knn variance accuracy_score : ", variance(knnMean))
|
|
|
|
|
|
|
|
print("Knn ecart-type accuracy_score : ", stdev(knnMean))
|
|
|
|
|
|
|
|
print("Tree mean accuracy_score : ", mean(treeMean))
|
|
|
|
|
|
|
|
print("Tree variance accuracy_score : ", variance(treeMean))
|
|
|
|
|
|
|
|
print("Tree ecart-type accuracy_score : ", stdev(treeMean))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main()
|
|
|
|
main()
|
|
|
|