From a7e33559242284b4d9db0abc03d2cea87d9866d4 Mon Sep 17 00:00:00 2001 From: Hugo LIVET Date: Fri, 9 Feb 2024 14:15:48 +0100 Subject: [PATCH] :sparkles: add analysis --- README.md | 2 ++ requirements.txt | 4 ++++ src/analysis.py | 32 ++++++++++++++++++++++++++++++-- src/classifier.py | 8 ++++---- src/main.py | 6 ++++-- 5 files changed, 44 insertions(+), 8 deletions(-) create mode 100644 requirements.txt diff --git a/README.md b/README.md index 983424e..f899827 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ # PlotaFakeNews +Run : +``pip install -r requirements.txt`` \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..64809d8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +scikit-learn +matplotlib +numpy +pandas diff --git a/src/analysis.py b/src/analysis.py index d2bfc93..1d51c11 100644 --- a/src/analysis.py +++ b/src/analysis.py @@ -1,5 +1,5 @@ -from sklearn import metrics -from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay +#from sklearn import metrics +from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay, roc_curve, auc, RocCurveDisplay import matplotlib.pyplot as plt from sklearn.model_selection import learning_curve import numpy as np @@ -10,3 +10,31 @@ def metrics(y_test, y_pred): class_report = classification_report(y_test, y_pred) return accuracy, conf_matrix, class_report + +def confusion_matrix_view(confusion_matrix, knn_classes): + cmap = plt.cm.Blues + disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix, display_labels=knn_classes) + disp.plot(cmap=cmap) + plt.show() + +def roc_curve_view(y_test, y_pred): + fpr, tpr, thresholds = roc_curve(y_test, y_pred) + roc_auc = auc(fpr, tpr) + display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='estimator') + display.plot() + plt.show() + +def learning_curve_view(classifier, X_train, y_train, learning_reps): + print('Training in progress, wait please...') + train_sizes, train_scores, test_scores = learning_curve(classifier, X_train, y_train, cv=learning_reps, scoring='accuracy', n_jobs=-1) + + train_scores_mean = np.mean(train_scores, axis=1) + test_scores_mean = np.mean(test_scores, axis=1) + + plt.plot(train_sizes, train_scores_mean, label='Train') + plt.plot(train_sizes, test_scores_mean, label='Test') + plt.xlabel("Training set size") + plt.ylabel('Score') + plt.title('Learning curve') + plt.legend() + plt.show() \ No newline at end of file diff --git a/src/classifier.py b/src/classifier.py index 91a6d5a..fa8ce1d 100644 --- a/src/classifier.py +++ b/src/classifier.py @@ -7,22 +7,22 @@ def logistic_regression(X_train, y_train, X_test): logistic = LogisticRegression(max_iter = 100000) logistic.fit(X_train,y_train) - return logistic.predict(X_test) + return logistic.predict(X_test), logistic def decision_tree(X_train, y_train, X_test): decisionTree = DecisionTreeClassifier() decisionTree = decisionTree.fit(X_train,y_train) - return decisionTree.predict(X_test) + return decisionTree.predict(X_test), decisionTree def knn_classifier(X_train, y_train, X_test): knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, y_train) - return knn.predict(X_test) + return knn.predict(X_test), knn def sgd_classifier(X_train, y_train, X_test): sgd = SGDClassifier(loss="hinge", penalty="l2") sgd.fit(X_train, y_train) - return sgd.predict(X_test) + return sgd.predict(X_test), sgd diff --git a/src/main.py b/src/main.py index 31eb69a..bbb5456 100644 --- a/src/main.py +++ b/src/main.py @@ -13,7 +13,7 @@ if __name__ == '__main__': print("\nTraining models...") - y_pred_knn = classifier.knn_classifier(X_train, y_train, X_test) + y_pred_knn, knn = classifier.knn_classifier(X_train, y_train, X_test) print("Knn... OK") y_pred_dt = classifier.decision_tree(X_train, y_train, X_test) @@ -33,4 +33,6 @@ if __name__ == '__main__': print(f'Confusion Matrix:\n{knn_conf_matrix}') print(f'Classification Report:\n{knn_class_report}') - print("\nTODO()") + analysis.confusion_matrix_view(knn_conf_matrix, knn.classes_) + analysis.roc_curve_view(y_test, y_pred_knn) + analysis.learning_curve_view(knn, X_train, y_train, 10)