✨ add classifier to web app, can now detect fake news from app
continuous-integration/drone/push Build is failing
Details
continuous-integration/drone/push Build is failing
Details
parent
57e34833c0
commit
b0c32a3acb
@ -0,0 +1,28 @@
|
|||||||
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
from sklearn.linear_model import SGDClassifier
|
||||||
|
|
||||||
|
def logistic_regression(X_train, y_train, X_test):
|
||||||
|
logistic = LogisticRegression(max_iter = 100000)
|
||||||
|
logistic.fit(X_train,y_train)
|
||||||
|
|
||||||
|
return logistic.predict(X_test), logistic
|
||||||
|
|
||||||
|
def decision_tree(X_train, y_train, X_test):
|
||||||
|
decisionTree = DecisionTreeClassifier()
|
||||||
|
decisionTree = decisionTree.fit(X_train,y_train)
|
||||||
|
|
||||||
|
return decisionTree.predict(X_test), decisionTree
|
||||||
|
|
||||||
|
def knn_classifier(X_train, y_train, X_test):
|
||||||
|
knn = KNeighborsClassifier(n_neighbors=5)
|
||||||
|
knn.fit(X_train, y_train)
|
||||||
|
|
||||||
|
return knn.predict(X_test), knn
|
||||||
|
|
||||||
|
def sgd_classifier(X_train, y_train, X_test):
|
||||||
|
sgd = SGDClassifier(loss="hinge", penalty="l2")
|
||||||
|
sgd.fit(X_train, y_train)
|
||||||
|
|
||||||
|
return sgd.predict(X_test), sgd
|
After Width: | Height: | Size: 92 B |
@ -0,0 +1,54 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
def process():
|
||||||
|
df = load_datas()
|
||||||
|
|
||||||
|
df = tokenize_datas(df)
|
||||||
|
|
||||||
|
X, y = features_selection(df)
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = split_df(X, y)
|
||||||
|
|
||||||
|
return X_train, X_test, y_train, y_test
|
||||||
|
|
||||||
|
def no_split_process():
|
||||||
|
df = load_datas()
|
||||||
|
|
||||||
|
df = tokenize_datas(df)
|
||||||
|
|
||||||
|
X, y = features_selection(df)
|
||||||
|
|
||||||
|
return X, y
|
||||||
|
|
||||||
|
def load_datas():
|
||||||
|
return pd.read_csv("../../datas/FakeNewsNet.csv")
|
||||||
|
|
||||||
|
def tokenize_datas(df):
|
||||||
|
le = LabelEncoder()
|
||||||
|
label = le.fit_transform(df['news_url'])
|
||||||
|
label1=le.fit_transform(df['title'])
|
||||||
|
label2=le.fit_transform(df['source_domain'])
|
||||||
|
df.drop("news_url", axis=1, inplace=True)
|
||||||
|
df.drop("title", axis=1, inplace=True)
|
||||||
|
df.drop("source_domain", axis=1, inplace=True)
|
||||||
|
|
||||||
|
df["news_url"] = label
|
||||||
|
df["title"] = label1
|
||||||
|
df["source_domain"] = label2
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def features_selection(df):
|
||||||
|
features = ["title", "news_url", "source_domain"]
|
||||||
|
|
||||||
|
return df[features].fillna(''), df["real"]
|
||||||
|
|
||||||
|
|
||||||
|
def split_df(X, y):
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.30, random_state=42)
|
||||||
|
return X_train, X_test, y_train, y_test
|
||||||
|
|
Loading…
Reference in new issue