4. 다음 데이터를 파티셔닝한 후 로지스틱 회귀모형을 피팅하기
>>> df_credit = pd.read_csv('c:/creditset.csv', index_col=0) >>> print(df_credit.shape) (2000, 5) # 파티셔닝하기 >>> X = df_credit.loc[:, ['income', 'age', 'loan']] >>> Y = df_credit['default10yr'] >>> print(X.shape) (2000, 3) >>> X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=0.3, random_state=42) >>> model = linear_model.LogisticRegression() >>> model.fit(X, Y) >>> model.coef_ array([[-5.76078935e-05, -1.34998242e-01, 8.94164645e-04]])
5. Test 데이터로 모형의 성능 중 정분류율 구하기
>>> Y_pred = model.predict(X_test) >>> Y_pred2 = [0 if x < 0.5 else 1 for x in Y_pred] >>> Y_pred3 = Y_pred2 == Y_test >>> np.mean(Y_pred3 == Y_test) >>> from sklearn.metrics import classification_report, confusion_matrix >>> confusion_matrix(Y_test, Y_pred3) array([[ 12, 496], [ 43, 49]], dtype=int64) >>> classification_report(Y_test, Y_pred3) ' precision recall f1-score support\n\n 0 0.22 0.02 0.04 508\n 1 0.09 0.53 0.15 92\n\n accuracy 0.10 600\n macro avg 0.15 0.28 0.10 600\nweighted avg 0.20 0.10 0.06 600\n'