from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.preprocessing import LabelEncoder def perform_classification(data, data_name, target_name, test_size): X = data[data_name] y = data[target_name] label_encoders = {} for column in X.select_dtypes(include=['object']).columns: le = LabelEncoder() X[column] = le.fit_transform(X[column]) label_encoders[column] = le if y.dtype == 'object': le = LabelEncoder() y = le.fit_transform(y) label_encoders[target_name] = le else: if y.nunique() > 10: raise ValueError("The target variable seems to be continuous. Please select a categorical target for classification.") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42) model = LogisticRegression() model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) return model, label_encoders, accuracy def make_prediction(model, label_encoders, data_name, target_name, input_values): X_new = [] for feature, value in zip(data_name, input_values): if feature in label_encoders: value = label_encoders[feature].transform([value])[0] X_new.append(value) prediction = model.predict([X_new]) if target_name in label_encoders: prediction = label_encoders[target_name].inverse_transform(prediction) return prediction[0]