diff --git a/src/app/src/classifier.py b/src/app/src/classifier.py
new file mode 100644
index 0000000..fa8ce1d
--- /dev/null
+++ b/src/app/src/classifier.py
@@ -0,0 +1,28 @@
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.linear_model import SGDClassifier
+
+def logistic_regression(X_train, y_train, X_test):
+ logistic = LogisticRegression(max_iter = 100000)
+ logistic.fit(X_train,y_train)
+
+ return logistic.predict(X_test), logistic
+
+def decision_tree(X_train, y_train, X_test):
+ decisionTree = DecisionTreeClassifier()
+ decisionTree = decisionTree.fit(X_train,y_train)
+
+ return decisionTree.predict(X_test), decisionTree
+
+def knn_classifier(X_train, y_train, X_test):
+ knn = KNeighborsClassifier(n_neighbors=5)
+ knn.fit(X_train, y_train)
+
+ return knn.predict(X_test), knn
+
+def sgd_classifier(X_train, y_train, X_test):
+ sgd = SGDClassifier(loss="hinge", penalty="l2")
+ sgd.fit(X_train, y_train)
+
+ return sgd.predict(X_test), sgd
diff --git a/src/app/src/html/assets/favicon.svg b/src/app/src/html/assets/favicon.svg
new file mode 100644
index 0000000..5c0be60
--- /dev/null
+++ b/src/app/src/html/assets/favicon.svg
@@ -0,0 +1,3 @@
+
diff --git a/src/app/src/html/home.html b/src/app/src/html/home.html
index 6f756d6..4b7c53f 100644
--- a/src/app/src/html/home.html
+++ b/src/app/src/html/home.html
@@ -8,10 +8,12 @@
+
-
Fake News Detector
+
Fake News Detector
Enter title and url of the news:
+
+ {{ result }}
+
diff --git a/src/app/src/models.py b/src/app/src/models.py
index ccda823..6bb3ec6 100644
--- a/src/app/src/models.py
+++ b/src/app/src/models.py
@@ -1,4 +1,5 @@
from django.db import models
+from urllib.parse import urlparse
# Create your models here.
@@ -12,3 +13,33 @@ class Text(models.Model):
def __str__(self):
return self.title
+def get_domain(url):
+ parsed_url = urlparse(url)
+ if parsed_url.netloc.startswith('www.'):
+ return parsed_url.netloc[4:]
+ else:
+ return parsed_url.netloc
+
+from .preprocessing import *
+from .classifier import *
+import pandas as pd
+import numpy as np
+
+def prediction(title, url):
+ domain = get_domain(url)
+
+ input_df = pd.DataFrame({'title': title, 'news_url': url, 'source_domain': domain}, index = ['1'])
+ concat_df = pd.concat([load_datas(), input_df], ignore_index=True)
+
+ input_df_tokenized = tokenize_datas(concat_df).tail(1)
+ input_df_tokenized.drop("tweet_num", axis=1, inplace=True)
+ input_df_tokenized.drop("real", axis=1, inplace=True)
+
+ #return input_df_tokenized
+
+ X, y = no_split_process()
+
+ prediction, knn = knn_classifier(X, y, input_df_tokenized)
+
+ return prediction
+
diff --git a/src/app/src/preprocessing.py b/src/app/src/preprocessing.py
new file mode 100644
index 0000000..c713ed9
--- /dev/null
+++ b/src/app/src/preprocessing.py
@@ -0,0 +1,54 @@
+import pandas as pd
+
+from sklearn.preprocessing import LabelEncoder
+
+from sklearn.model_selection import train_test_split
+
+def process():
+ df = load_datas()
+
+ df = tokenize_datas(df)
+
+ X, y = features_selection(df)
+
+ X_train, X_test, y_train, y_test = split_df(X, y)
+
+ return X_train, X_test, y_train, y_test
+
+def no_split_process():
+ df = load_datas()
+
+ df = tokenize_datas(df)
+
+ X, y = features_selection(df)
+
+ return X, y
+
+def load_datas():
+ return pd.read_csv("../../datas/FakeNewsNet.csv")
+
+def tokenize_datas(df):
+ le = LabelEncoder()
+ label = le.fit_transform(df['news_url'])
+ label1=le.fit_transform(df['title'])
+ label2=le.fit_transform(df['source_domain'])
+ df.drop("news_url", axis=1, inplace=True)
+ df.drop("title", axis=1, inplace=True)
+ df.drop("source_domain", axis=1, inplace=True)
+
+ df["news_url"] = label
+ df["title"] = label1
+ df["source_domain"] = label2
+
+ return df
+
+def features_selection(df):
+ features = ["title", "news_url", "source_domain"]
+
+ return df[features].fillna(''), df["real"]
+
+
+def split_df(X, y):
+ X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.30, random_state=42)
+ return X_train, X_test, y_train, y_test
+
diff --git a/src/app/src/views.py b/src/app/src/views.py
index 9627e54..426ea89 100644
--- a/src/app/src/views.py
+++ b/src/app/src/views.py
@@ -2,14 +2,26 @@ from django.shortcuts import render, redirect
# Create your views here.
from .forms import TextForm
+from .models import *
def index(request):
if request.method == 'POST':
form = TextForm(request.POST)
if form.is_valid():
+ # get datas from the news
title = form.cleaned_data["title"]
url = form.cleaned_data["url"]
- return redirect("index") # Rediriger vers une page d'accueil ou une autre vue
+
+ # get result from model
+ if(prediction(title, url) == 1):
+ result = "This is not fake news !"
+ else:
+ result = "It's a Fake News !!!"
+
+ # reset form
+ form = TextForm()
+
+ return render(request, 'home.html', {'form':form, 'result':result})
else:
form = TextForm()
return render(request, 'home.html', {'form': form})
diff --git a/src/main.py b/src/main.py
index bbb5456..d18aae6 100644
--- a/src/main.py
+++ b/src/main.py
@@ -5,6 +5,8 @@ import analysis
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
+import pandas as pd
+
if __name__ == '__main__':
print("Start learning...")
diff --git a/src/preprocessing.py b/src/preprocessing.py
index e010468..ceee027 100644
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@@ -32,6 +32,8 @@ def tokenize_datas(df):
df["title"] = label1
df["source_domain"] = label2
+ print(df)
+
return df
def features_selection(df):