You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.2 KiB
38 lines
1.2 KiB
from sklearn.model_selection import train_test_split
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.ensemble import RandomForestRegressor
|
|
from sklearn.metrics import f1_score
|
|
from sklearn.metrics import accuracy_score
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
|
|
def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
|
|
predictors = df[columns]
|
|
target = df[columnGoal]
|
|
|
|
if algoOfPrediction == "Linear Regression":
|
|
model = LinearRegression()
|
|
elif algoOfPrediction == "Random Forest":
|
|
model = RandomForestRegressor(n_estimators=100)
|
|
else:
|
|
raise NameError("No method name : \"" + algoOfPrediction + "\"")
|
|
|
|
model.fit(predictors, target)
|
|
prediction = model.predict(predictors)
|
|
return prediction
|
|
|
|
def correlation_matrix(df, columns):
|
|
new_df = df[columns]
|
|
correlations = new_df.corr()
|
|
print(correlations)
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111)
|
|
cax = ax.matshow(correlations, vmin=-1, vmax=1)
|
|
fig.colorbar(cax)
|
|
ticks = np.arange(0,new_df.shape[1],1)
|
|
ax.set_xticks(ticks)
|
|
ax.set_yticks(ticks)
|
|
ax.set_xticklabels(list(new_df))
|
|
ax.set_yticklabels(list(new_df))
|
|
return fig
|