from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import f1_score from sklearn.metrics import accuracy_score import numpy as np import matplotlib.pyplot as plt def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction): predictors = df[columns] target = df[columnGoal] if algoOfPrediction == "Linear Regression": model = LinearRegression() elif algoOfPrediction == "Random Forest": model = RandomForestRegressor(n_estimators=100) else: raise NameError("No method name : \"" + algoOfPrediction + "\"") model.fit(predictors, target) prediction = model.predict(predictors) return prediction def correlation_matrix(df, columns): new_df = df[columns] correlations = new_df.corr() print(correlations) fig = plt.figure() ax = fig.add_subplot(111) cax = ax.matshow(correlations, vmin=-1, vmax=1) fig.colorbar(cax) ticks = np.arange(0,new_df.shape[1],1) ax.set_xticks(ticks) ax.set_yticks(ticks) ax.set_xticklabels(list(new_df)) ax.set_yticklabels(list(new_df)) return fig