diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..c117820 --- /dev/null +++ b/.drone.yml @@ -0,0 +1,30 @@ +kind: pipeline +type: docker +name: CD_PlotaFakeNews + +trigger: + event: + - push + +steps: + - name: docker_image + image: plugins/docker + settings: + dockerfile: src/app/Dockerfile + context: src/app/ + registry: hub.codefirst.iut.uca.fr + repo: hub.codefirst.iut.uca.fr/hugo.livet/plota-fake-news + username: + from_secret: SECRET_REGISTRY_USERNAME + password: + from_secret: SECRET_REGISTRY_PASSWORD + + - name: deploy-application + image: hub.codefirst.iut.uca.fr/thomas.bellembois/codefirst-dockerproxy-clientdrone:latest + environment: + IMAGENAME: hub.codefirst.iut.uca.fr/hugo.livet/plota-fake-news:latest + CONTAINERNAME: PlotaFakeNews + COMMAND: create + OVERWRITE: true + ADMINS: axelde_la_fuente, hugolivet + depends_on: [ docker_image] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 64809d8..52e0f8e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ scikit-learn matplotlib numpy pandas +Django diff --git a/src/app/Dockerfile b/src/app/Dockerfile new file mode 100644 index 0000000..0ff7221 --- /dev/null +++ b/src/app/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.8 + +EXPOSE 8000 +WORKDIR /app +#ENV PYTHONUNBUFFERED=1 + +COPY . /app + +RUN wget http://fr.archive.ubuntu.com/ubuntu/pool/universe/p/python-stdlib-extensions/python-tk_2.7.18-1_amd64.deb +RUN pip install --upgrade pip +RUN pip install -r requirements.txt + +ENTRYPOINT ["python3", "manage.py", "runserver", "0.0.0.0:8000"] \ No newline at end of file diff --git a/src/app/app/__init__.py b/src/app/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/app/app/asgi.py b/src/app/app/asgi.py new file mode 100644 index 0000000..de17191 --- /dev/null +++ b/src/app/app/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for app project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.0/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'app.settings') + +application = get_asgi_application() diff --git a/src/app/app/settings.py b/src/app/app/settings.py new file mode 100644 index 0000000..be1706b --- /dev/null +++ b/src/app/app/settings.py @@ -0,0 +1,123 @@ +""" +Django settings for app project. + +Generated by 'django-admin startproject' using Django 5.0.2. + +For more information on this file, see +https://docs.djangoproject.com/en/5.0/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/5.0/ref/settings/ +""" + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = 'django-insecure-9+n!natx3n$s!c64q&b#o2ji=-07gng^ncs94t&j6@5%)0qwx$' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = [] + + +# Application definition + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'app.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': ["./src/html/"], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'app.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/5.0/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': BASE_DIR / 'db.sqlite3', + } +} + + +# Password validation +# https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/5.0/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/5.0/howto/static-files/ + +STATIC_URL = 'static/' + +# Default primary key field type +# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' diff --git a/src/app/app/urls.py b/src/app/app/urls.py new file mode 100644 index 0000000..33326fd --- /dev/null +++ b/src/app/app/urls.py @@ -0,0 +1,23 @@ +""" +URL configuration for app project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/5.0/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" + +from django.contrib import admin +from django.urls import include, path + +urlpatterns = [ + path("", include("src.urls")), +] diff --git a/src/app/app/wsgi.py b/src/app/app/wsgi.py new file mode 100644 index 0000000..eb514ab --- /dev/null +++ b/src/app/app/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for app project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.0/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'app.settings') + +application = get_wsgi_application() diff --git a/src/app/manage.py b/src/app/manage.py new file mode 100755 index 0000000..4931389 --- /dev/null +++ b/src/app/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'app.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/src/app/requirements.txt b/src/app/requirements.txt new file mode 100644 index 0000000..7f5da7f --- /dev/null +++ b/src/app/requirements.txt @@ -0,0 +1,2 @@ +scikit-learn +pandas \ No newline at end of file diff --git a/src/app/src/__init__.py b/src/app/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/app/src/admin.py b/src/app/src/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/src/app/src/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/src/app/src/apps.py b/src/app/src/apps.py new file mode 100644 index 0000000..b717e82 --- /dev/null +++ b/src/app/src/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + +class SrcConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'src' + diff --git a/src/app/src/classifier.py b/src/app/src/classifier.py new file mode 100644 index 0000000..fa8ce1d --- /dev/null +++ b/src/app/src/classifier.py @@ -0,0 +1,28 @@ +from sklearn.neighbors import KNeighborsClassifier +from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier +from sklearn.linear_model import SGDClassifier + +def logistic_regression(X_train, y_train, X_test): + logistic = LogisticRegression(max_iter = 100000) + logistic.fit(X_train,y_train) + + return logistic.predict(X_test), logistic + +def decision_tree(X_train, y_train, X_test): + decisionTree = DecisionTreeClassifier() + decisionTree = decisionTree.fit(X_train,y_train) + + return decisionTree.predict(X_test), decisionTree + +def knn_classifier(X_train, y_train, X_test): + knn = KNeighborsClassifier(n_neighbors=5) + knn.fit(X_train, y_train) + + return knn.predict(X_test), knn + +def sgd_classifier(X_train, y_train, X_test): + sgd = SGDClassifier(loss="hinge", penalty="l2") + sgd.fit(X_train, y_train) + + return sgd.predict(X_test), sgd diff --git a/src/app/src/forms.py b/src/app/src/forms.py new file mode 100644 index 0000000..1e9e2a5 --- /dev/null +++ b/src/app/src/forms.py @@ -0,0 +1,8 @@ +from django import forms +from .models import Text + +class TextForm(forms.ModelForm): + class Meta: + model = Text + fields = ['title', 'url'] + diff --git a/src/app/src/html/assets/favicon.svg b/src/app/src/html/assets/favicon.svg new file mode 100644 index 0000000..5c0be60 --- /dev/null +++ b/src/app/src/html/assets/favicon.svg @@ -0,0 +1,3 @@ + + 📰 + diff --git a/src/app/src/html/home.html b/src/app/src/html/home.html new file mode 100644 index 0000000..4b7c53f --- /dev/null +++ b/src/app/src/html/home.html @@ -0,0 +1,68 @@ + + + +FakeNews + + +{% load static %} + + + + + + + + + +
+
+

Fake News Detector

+

Enter title and url of the news:

+
+ {% csrf_token %} + {{ form }} + +
+

+ {{ result }} +

+
+
+ + + +by: LIVET Hugo & DE LA FUENTE Axel + + + + diff --git a/src/app/src/migrations/__init__.py b/src/app/src/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/app/src/models.py b/src/app/src/models.py new file mode 100644 index 0000000..6bb3ec6 --- /dev/null +++ b/src/app/src/models.py @@ -0,0 +1,45 @@ +from django.db import models +from urllib.parse import urlparse + +# Create your models here. + +class Text(models.Model): + title = models.CharField(max_length=100) + url = models.URLField() + + class Meta: + app_label = 'app' + + def __str__(self): + return self.title + +def get_domain(url): + parsed_url = urlparse(url) + if parsed_url.netloc.startswith('www.'): + return parsed_url.netloc[4:] + else: + return parsed_url.netloc + +from .preprocessing import * +from .classifier import * +import pandas as pd +import numpy as np + +def prediction(title, url): + domain = get_domain(url) + + input_df = pd.DataFrame({'title': title, 'news_url': url, 'source_domain': domain}, index = ['1']) + concat_df = pd.concat([load_datas(), input_df], ignore_index=True) + + input_df_tokenized = tokenize_datas(concat_df).tail(1) + input_df_tokenized.drop("tweet_num", axis=1, inplace=True) + input_df_tokenized.drop("real", axis=1, inplace=True) + + #return input_df_tokenized + + X, y = no_split_process() + + prediction, knn = knn_classifier(X, y, input_df_tokenized) + + return prediction + diff --git a/src/app/src/preprocessing.py b/src/app/src/preprocessing.py new file mode 100644 index 0000000..c713ed9 --- /dev/null +++ b/src/app/src/preprocessing.py @@ -0,0 +1,54 @@ +import pandas as pd + +from sklearn.preprocessing import LabelEncoder + +from sklearn.model_selection import train_test_split + +def process(): + df = load_datas() + + df = tokenize_datas(df) + + X, y = features_selection(df) + + X_train, X_test, y_train, y_test = split_df(X, y) + + return X_train, X_test, y_train, y_test + +def no_split_process(): + df = load_datas() + + df = tokenize_datas(df) + + X, y = features_selection(df) + + return X, y + +def load_datas(): + return pd.read_csv("../../datas/FakeNewsNet.csv") + +def tokenize_datas(df): + le = LabelEncoder() + label = le.fit_transform(df['news_url']) + label1=le.fit_transform(df['title']) + label2=le.fit_transform(df['source_domain']) + df.drop("news_url", axis=1, inplace=True) + df.drop("title", axis=1, inplace=True) + df.drop("source_domain", axis=1, inplace=True) + + df["news_url"] = label + df["title"] = label1 + df["source_domain"] = label2 + + return df + +def features_selection(df): + features = ["title", "news_url", "source_domain"] + + return df[features].fillna(''), df["real"] + + +def split_df(X, y): + X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.30, random_state=42) + return X_train, X_test, y_train, y_test + diff --git a/src/app/src/tests.py b/src/app/src/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/src/app/src/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/src/app/src/urls.py b/src/app/src/urls.py new file mode 100644 index 0000000..5119061 --- /dev/null +++ b/src/app/src/urls.py @@ -0,0 +1,7 @@ +from django.urls import path + +from . import views + +urlpatterns = [ + path("", views.index, name="index"), +] diff --git a/src/app/src/views.py b/src/app/src/views.py new file mode 100644 index 0000000..426ea89 --- /dev/null +++ b/src/app/src/views.py @@ -0,0 +1,27 @@ +from django.shortcuts import render, redirect +# Create your views here. + +from .forms import TextForm +from .models import * + +def index(request): + if request.method == 'POST': + form = TextForm(request.POST) + if form.is_valid(): + # get datas from the news + title = form.cleaned_data["title"] + url = form.cleaned_data["url"] + + # get result from model + if(prediction(title, url) == 1): + result = "This is not fake news !" + else: + result = "It's a Fake News !!!" + + # reset form + form = TextForm() + + return render(request, 'home.html', {'form':form, 'result':result}) + else: + form = TextForm() + return render(request, 'home.html', {'form': form}) diff --git a/src/main.py b/src/main.py index bbb5456..d18aae6 100644 --- a/src/main.py +++ b/src/main.py @@ -5,6 +5,8 @@ import analysis from warnings import simplefilter simplefilter(action='ignore', category=FutureWarning) +import pandas as pd + if __name__ == '__main__': print("Start learning...") diff --git a/src/preprocessing.py b/src/preprocessing.py index e010468..ceee027 100644 --- a/src/preprocessing.py +++ b/src/preprocessing.py @@ -32,6 +32,8 @@ def tokenize_datas(df): df["title"] = label1 df["source_domain"] = label2 + print(df) + return df def features_selection(df):