add classifier to web app, can now detect fake news from app
continuous-integration/drone/push Build is failing Details

pull/3/head
Axel DE LA FUENTE 1 year ago
parent 57e34833c0
commit b0c32a3acb

@ -0,0 +1,28 @@
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
def logistic_regression(X_train, y_train, X_test):
logistic = LogisticRegression(max_iter = 100000)
logistic.fit(X_train,y_train)
return logistic.predict(X_test), logistic
def decision_tree(X_train, y_train, X_test):
decisionTree = DecisionTreeClassifier()
decisionTree = decisionTree.fit(X_train,y_train)
return decisionTree.predict(X_test), decisionTree
def knn_classifier(X_train, y_train, X_test):
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
return knn.predict(X_test), knn
def sgd_classifier(X_train, y_train, X_test):
sgd = SGDClassifier(loss="hinge", penalty="l2")
sgd.fit(X_train, y_train)
return sgd.predict(X_test), sgd

@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg">
<text y="32" font-size="32">📰</text>
</svg>

After

Width:  |  Height:  |  Size: 92 B

@ -8,10 +8,12 @@
<link rel="stylesheet" href="https://www.w3schools.com/w3css/4/w3.css">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="icon" href="assets/favicon.svg">
<link href="https://fonts.googleapis.com/css2?family=Fira+Code:wght@700&display=swap" rel="stylesheet">
<style>
body, html {height: 100%}
h1, h4, span, label, button {font-family: 'Fira Code', monospace;}
h1, h4, span, label, button, p {font-family: 'Fira Code', monospace;}
p {color:red}
.credit{position: absolute; bottom:0; right:0; margin:.5rem;}
form > div{margin:.5rem;}
form{
@ -19,7 +21,6 @@
justify-content: center;
}
button{
font-size: 16px;
letter-spacing: 2px;
text-decoration: none;
@ -34,26 +35,27 @@
-webkit-user-select: none;
touch-action: manipulation;
}
button:active {
box-shadow: 0px 0px 0px 0px;
top: 5px;
left: 5px;
}
</style>
</head>
<body>
<div class="w3-animate-opacity">
<div class="w3-display-middle">
<h1 class="w3-jumbo w3-animate-top">Fake News Detector</h1>
<h1 class="w3-jumbo">Fake News Detector</h1>
<h4>Enter title and url of the news:</h4>
<form method="post">
{% csrf_token %}
{{ form }}
<button type="submit">Is it fake ??</button>
</form>
<p>
{{ result }}
</p>
</div>
</div>

@ -1,4 +1,5 @@
from django.db import models
from urllib.parse import urlparse
# Create your models here.
@ -12,3 +13,33 @@ class Text(models.Model):
def __str__(self):
return self.title
def get_domain(url):
parsed_url = urlparse(url)
if parsed_url.netloc.startswith('www.'):
return parsed_url.netloc[4:]
else:
return parsed_url.netloc
from .preprocessing import *
from .classifier import *
import pandas as pd
import numpy as np
def prediction(title, url):
domain = get_domain(url)
input_df = pd.DataFrame({'title': title, 'news_url': url, 'source_domain': domain}, index = ['1'])
concat_df = pd.concat([load_datas(), input_df], ignore_index=True)
input_df_tokenized = tokenize_datas(concat_df).tail(1)
input_df_tokenized.drop("tweet_num", axis=1, inplace=True)
input_df_tokenized.drop("real", axis=1, inplace=True)
#return input_df_tokenized
X, y = no_split_process()
prediction, knn = knn_classifier(X, y, input_df_tokenized)
return prediction

@ -0,0 +1,54 @@
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
def process():
df = load_datas()
df = tokenize_datas(df)
X, y = features_selection(df)
X_train, X_test, y_train, y_test = split_df(X, y)
return X_train, X_test, y_train, y_test
def no_split_process():
df = load_datas()
df = tokenize_datas(df)
X, y = features_selection(df)
return X, y
def load_datas():
return pd.read_csv("../../datas/FakeNewsNet.csv")
def tokenize_datas(df):
le = LabelEncoder()
label = le.fit_transform(df['news_url'])
label1=le.fit_transform(df['title'])
label2=le.fit_transform(df['source_domain'])
df.drop("news_url", axis=1, inplace=True)
df.drop("title", axis=1, inplace=True)
df.drop("source_domain", axis=1, inplace=True)
df["news_url"] = label
df["title"] = label1
df["source_domain"] = label2
return df
def features_selection(df):
features = ["title", "news_url", "source_domain"]
return df[features].fillna(''), df["real"]
def split_df(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.30, random_state=42)
return X_train, X_test, y_train, y_test

@ -2,14 +2,26 @@ from django.shortcuts import render, redirect
# Create your views here.
from .forms import TextForm
from .models import *
def index(request):
if request.method == 'POST':
form = TextForm(request.POST)
if form.is_valid():
# get datas from the news
title = form.cleaned_data["title"]
url = form.cleaned_data["url"]
return redirect("index") # Rediriger vers une page d'accueil ou une autre vue
# get result from model
if(prediction(title, url) == 1):
result = "This is not fake news !"
else:
result = "It's a Fake News !!!"
# reset form
form = TextForm()
return render(request, 'home.html', {'form':form, 'result':result})
else:
form = TextForm()
return render(request, 'home.html', {'form': form})

@ -5,6 +5,8 @@ import analysis
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
if __name__ == '__main__':
print("Start learning...")

@ -32,6 +32,8 @@ def tokenize_datas(df):
df["title"] = label1
df["source_domain"] = label2
print(df)
return df
def features_selection(df):

Loading…
Cancel
Save