add support for Linear SVC and SGD model

master
remrem 1 year ago
parent f9f942ad42
commit fc899e7bee

@ -1,6 +1,7 @@
# Plotabit # Plotabit
Dataset link: https://www.kaggle.com/datasets/fedesoriano/stellar-classification-dataset-sdss17 Dataset link: https://www.kaggle.com/datasets/fedesoriano/stellar-classification-dataset-sdss17
Deep Learning: https://machinelearningmastery.com/tutorial-first-neural-network-python-keras/ Deep Learning: https://machinelearningmastery.com/tutorial-first-neural-network-python-keras/
Python Machine Learning: https://machinelearningmastery.com/machine-learning-in-python-step-by-step/
## Columns ## Columns
@ -20,3 +21,4 @@ Deep Learning: https://machinelearningmastery.com/tutorial-first-neural-network-
- [ ] Train models with just 20000 "GALAXY" class (has an impact?) - [ ] Train models with just 20000 "GALAXY" class (has an impact?)
- [ ] Which model is the best, ratio learn_time/precision - [ ] Which model is the best, ratio learn_time/precision
- [ ] Can we drop more categories and have same results (useless data?) - [ ] Can we drop more categories and have same results (useless data?)
- [ ] Compare prediction with y_test that were false

@ -4,37 +4,52 @@ import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import sklearn as sk import sklearn as sk
# Classification from sklearn import svm
from sklearn.linear_model import SGDClassifier
## KNN from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
#charger les données
df=pd.read_csv('../data.csv')
# Clear datas # Open dataset with panda
# alpha delta u g r i z redshift spec_OBJ_ID def read_dataset(filename):
# Y : class df = pd.read_csv(filename)
x = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1) return df
y = df['class'].values
# Drop useless columns and return x and y
def get_xy_from_dataset(filename):
df = read_dataset(filename)
x = df.drop(['obj_ID','field_ID','run_ID','rerun_ID','cam_col','plate','MJD','fiber_ID','class'],axis=1)
y = df['class'].values
return x, y
x, y = get_xy_from_dataset("data.csv")
x.hist() x.hist()
plt.show() #plt.show()
print(" Rentre un chiffre:\n\n1 - KNN\n2 - Tree\n3- RandomForestClassifier") print("""Choose a model:
(1) - KNN
(2) - Tree
(3) - RandomForestClassifier
(4) - SGD
(5) - Linear SVC""")
res = int(input()) res = int(input())
if(res == 1):
if (res == 1):
model = KNeighborsClassifier() model = KNeighborsClassifier()
elif(res == 2): elif (res == 2):
model = DecisionTreeClassifier(random_state=0, max_depth=20) model = DecisionTreeClassifier(random_state=0, max_depth=20)
elif(res == 3): elif (res == 3):
model = RandomForestClassifier(n_estimators=100 ,criterion='entropy') model = RandomForestClassifier(n_estimators=100 ,criterion='entropy')
elif (res == 4):
model = SGDClassifier(max_iter=1000, tol=0.01)
elif (res == 5):
model = svm.SVC(kernel='linear', C = 1.0)
else: else:
raise Exception('RENTRE LE BON NOMBRE GROS CON') raise Exception('RENTRE LE BON NOMBRE GROS CON')

Loading…
Cancel
Save