add plots and try auto_sklearn

master
remrem 1 year ago
parent 9c4e1afc66
commit cf44fa3b0e

@ -5,6 +5,7 @@ Deep Learning: https://machinelearningmastery.com/tutorial-first-neural-network-
Python Machine Learning: https://machinelearningmastery.com/machine-learning-in-python-step-by-step/ Python Machine Learning: https://machinelearningmastery.com/machine-learning-in-python-step-by-step/
AI Plot data: https://machinelearningmastery.com/visualize-machine-learning-data-python-pandas/
## Columns ## Columns
|Keep |Skip | |Keep |Skip |

@ -26,15 +26,22 @@ from sklearn.feature_selection import RFECV
from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import pandas
from pandas.plotting import scatter_matrix
from sklearn.metrics import confusion_matrix
# main # main
def main(): def main():
# plotAll()
#auto_sklearn()
# User input # User input
opt = prompt_display() opt = prompt_display()
model = model_switch(opt) model = model_switch(opt)
# Get interesting data # Get interesting data
df = read_dataset("data.csv") df, x, y = read_dataset('data.csv')
x, y = get_xy_from_dataframe(df)
# rfecv_test(x, y, RandomForestClassifier()) # rfecv_test(x, y, RandomForestClassifier())
# Train model # Train model
@ -43,14 +50,11 @@ def main():
# Open dataset with panda # Open dataset with panda
def read_dataset(filename): def read_dataset(filename):
df = pd.read_csv(filename) df = pd.read_csv(filename)
return df
# Drop useless columns and return x and y
def get_xy_from_dataframe(df):
x = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1) x = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1)
y = df['class'].values y = df['class'].values
return x, y
return df, x, y
# Ask for model choice # Ask for model choice
def prompt_display(): def prompt_display():
print("""Choose a model: print("""Choose a model:
@ -80,7 +84,7 @@ def model_switch(choice):
elif (choice == 7): elif (choice == 7):
model = NearestCentroid() model = NearestCentroid()
elif (choice == 8): elif (choice == 8):
model = MLPClassifier(solver='adam', alpha=1e-5, random_state=1, activation="logistic", hidden_layer_sizes=(100,80,60,40,20,10,3)) model = MLPClassifier(solver='adam', alpha=1e-5, random_state=1, activation="logistic", hidden_layer_sizes=(1000, 300, 100, 30, 10, 3))
else: else:
raise Exception('Wrong entry') raise Exception('Wrong entry')
@ -124,8 +128,9 @@ def training(model, x, y):
Xtest = Xtest.reshape(-1, 1) Xtest = Xtest.reshape(-1, 1)
model.fit(Xtrain,ytrain) model.fit(Xtrain,ytrain)
ypredit = model.predict(Xtest) ypredict = model.predict(Xtest)
# confusion_matrix(ytrain, ypredict)
# os.system("clear") # os.system("clear")
res = -1 res = -1
while(res != 0): while(res != 0):
@ -134,13 +139,13 @@ def training(model, x, y):
res = int(input()) res = int(input())
if(res == 1): if(res == 1):
os.system("clear") os.system("clear")
printStatValues(ypredit,ytest) printStatValues(ypredict,ytest)
elif(res == 2): elif(res == 2):
os.system("clear") os.system("clear")
printPredictedValues(ypredit,ytest) printPredictedValues(ypredict,ytest)
elif res == 3: elif res == 3:
os.system("clear") os.system("clear")
print(accuracy_score(ytest, ypredit)) print(accuracy_score(ytest, ypredict))
elif res == 0: elif res == 0:
break break
else: else:
@ -236,5 +241,58 @@ def bestModelFinder(datas):
print("Tree variance accuracy_score : ", variance(treeMean)) print("Tree variance accuracy_score : ", variance(treeMean))
print("Tree ecart-type accuracy_score : ", stdev(treeMean)) print("Tree ecart-type accuracy_score : ", stdev(treeMean))
def auto_sklearn():
df = read_dataset('data.csv')
X_train, X_test, y_train, ytest = train_test_split(x, y,test_size=0.25, random_state=0)
X_train = X_train.values
X_test = X_test.values
if len(Xtrain.shape) < 2:
Xtrain = Xtrain.reshape(-1, 1)
if len(Xtest.shape) < 2:
Xtest = Xtest.reshape(-1, 1)
cls = autosklearn.classification.AutoSklearnClassifier()
cls.fit(X_train, y_train)
y_hat = predictions = cls.predict(X_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_hat))
def plotAll():
df = read_dataset('data.csv')
plotHistograms(df)
plotDensity(df)
plotBoxWhisker(df)
plotCorrelationMatrix(df)
plotScatterMatrix(df)
def plotHistograms(df):
df.hist()
plt.show()
def plotDensity(df):
df.plot(kind='density', subplots=True, layout=(3,3), sharex=False)
plt.show()
def plotBoxWhisker(df):
df.plot(kind='box', subplots=True, layout=(3,3), sharex=False, sharey=False)
plt.show()
def plotCorrelationMatrix(df):
correlations = df.corr()
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,9,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(list(df))
ax.set_yticklabels(list(df))
plt.show()
def plotScatterMatrix(df):
scatter_matrix(df)
plt.show()
main() main()

Loading…
Cancel
Save