transfer code from colab (traing model / metrics)

pull/1/head
Axel DE LA FUENTE 1 year ago
parent a41ee5e5fd
commit 140e2dd5c7

@ -0,0 +1,12 @@
from sklearn import metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve
import numpy as np
def metrics(y_test, y_pred):
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
return accuracy, conf_matrix, class_report

@ -0,0 +1,28 @@
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
def logistic_regression(X_train, y_train, X_test):
logistic = LogisticRegression(max_iter = 100000)
logistic.fit(X_train,y_train)
return logistic.predict(X_test)
def decision_tree(X_train, y_train, X_test):
decisionTree = DecisionTreeClassifier()
decisionTree = decisionTree.fit(X_train,y_train)
return decisionTree.predict(X_test)
def knn_classifier(X_train, y_train, X_test):
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
return knn.predict(X_test)
def sgd_classifier(X_train, y_train, X_test):
sgd = SGDClassifier(loss="hinge", penalty="l2")
sgd.fit(X_train, y_train)
return sgd.predict(X_test)

@ -2,8 +2,35 @@ import preprocessing
import classifier
import analysis
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
if __name__ == '__main__':
print("Start learning... :)")
print("Start learning...")
X_train, X_test, y_train, y_test = preprocessing.process()
print("\nPre-processing... OK")
print("\nTraining models...")
y_pred_knn = classifier.knn_classifier(X_train, y_train, X_test)
print("Knn... OK")
y_pred_dt = classifier.decision_tree(X_train, y_train, X_test)
print("DecisionTree... OK")
y_pred_logistic_reg = classifier.logistic_regression(X_train, y_train, X_test)
print("Logistic Regression... OK")
y_pred_sgd = classifier.sgd_classifier(X_train, y_train, X_test)
print("SGD... OK")
print("\nMetrics calculations...")
print("\n--------------Knn metrics---------------")
knn_accuracy, knn_conf_matrix, knn_class_report = analysis.metrics(y_test, y_pred_knn)
print(f'Accuracy: {knn_accuracy}')
print(f'Confusion Matrix:\n{knn_conf_matrix}')
print(f'Classification Report:\n{knn_class_report}')
df = preprocessing.load_datas()
df.head(5)
print("\nTODO()")

@ -1,4 +1,46 @@
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
def process():
df = load_datas()
df = tokenize_datas(df)
X, y = features_selection(df)
X_train, X_test, y_train, y_test = split_df(X, y)
return X_train, X_test, y_train, y_test
def load_datas():
return pd.read_csv("../datas/FakeNewsNet.csv")
def tokenize_datas(df):
le = LabelEncoder()
label = le.fit_transform(df['news_url'])
label1=le.fit_transform(df['title'])
label2=le.fit_transform(df['source_domain'])
df.drop("news_url", axis=1, inplace=True)
df.drop("title", axis=1, inplace=True)
df.drop("source_domain", axis=1, inplace=True)
df["news_url"] = label
df["title"] = label1
df["source_domain"] = label2
return df
def features_selection(df):
features = ["title", "news_url", "source_domain"]
return df[features].fillna(''), df["real"]
def split_df(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.30, random_state=42)
return X_train, X_test, y_train, y_test

Loading…
Cancel
Save