다음 절에서 검증 곡선을 사용하여 이런 문제들을 다루는 법을 알아보겠습니다. 먼저 사이킷런의 학습 곡선 함수를 사용하여 모델을 평가해 보죠.
>>> import matplotlib.pyplot as plt
>>> from sklearn.model_selection import learning_curve
>>> pipe_lr = make_pipeline(StandardScaler(),
... LogisticRegression(penalty='l2',
... max_iter=10000))
>>> train_sizes, train_scores, test_scores =\
... learning_curve(estimator=pipe_lr,
... X=X_train,
... y=y_train,
... train_sizes=np.linspace(
... 0.1, 1.0, 10),
... cv=10,
... n_jobs=1)
>>> train_mean = np.mean(train_scores, axis=1)
>>> train_std = np.std(train_scores, axis=1)
>>> test_mean = np.mean(test_scores, axis=1)
>>> test_std = np.std(test_scores, axis=1)
>>> plt.plot(train_sizes, train_mean,
... color='blue', marker='o',
... markersize=5, label='Training accuracy')
>>> plt.fill_between(train_sizes,
... train_mean+train_std,
... train_mean-train_std,
... alpha=0.15, color='blue')