머신 러닝 교과서 with 파이썬, 사이킷런, 텐서플로(개정 3판): 7.2.1 간단한 다수결 투표 분류기 구현

이들을 모두 결합하여 MajorityVoteClassifier 파이썬 클래스를 구현해 봅시다.

from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.base import clone
from sklearn.pipeline import _name_estimators
import numpy as np
import operator

class MajorityVoteClassifier(BaseEstimator,
                             ClassifierMixin):
    """다수결 투표 앙상블 분류기

    매개변수
    ----------
    classifiers : 배열 타입, 크기 = [n_classifiers]
      앙상블에 사용할 분류기

    vote : str, {'classlabel', 'probability'}
      기본값: 'classlabel'
      'classlabel'이면 예측은 다수인 클래스 레이블의 인덱스가 됩니다
      'probability'면 확률 합이 가장 큰 인덱스로
      클래스 레이블을 예측합니다(보정된 분류기에 추천합니다)

    weights : 배열 타입, 크기 = [n_classifiers]
      선택 사항, 기본값: None
      'int' 또는 'float' 값의 리스트가 주어지면 분류기가 이 중요도로 가중치됩니다
      'weights=None'이면 동일하게 취급합니다

    """
    def __init_(self, classifiers,
                vote='classlabel', weights=None):
        self.classifiers = classifiers
        self.named_classifiers = {key: value for
                                  key, value in
                                  _name_estimators(classifiers)}
        self.vote = vote
        self.weights = weights
    
    def fit(self, X, y):
        """분류기를 학습합니다

        매개변수
        ----------
        X : { 배열 타입, 희소 행렬>},
            크기 = [n_samples, n_features]
            훈련 샘플 행렬
        y : 배열 타입, 크기 = [n_samples]
            타깃 클래스 레이블 벡터

        반환값
        -------
        self : 객체

        """
        if self.vote not in ('probability', 'classlabel'):
            raise ValueError("vote는 'probability' 또는 'classlabel'이어야 합니다"
                             "; (vote=%r)이 입력되었습니다."
                              % self.vote)

        if self.weights and len(self.weights) != len(self.classifiers):
            raise ValueError('분류기와 가중치 개수는 같아야 합니다'
                             '; 가중치 %d 개, 분류기 %d 개'
                              % (len(self.weights), len(self.classifiers)))
        # self.predict 메서드에서 np.argmax를 호출할 때
        # 클래스 레이블이 0부터 시작되어야 하므로 LabelEncoder를 사용합니다
        self.lablenc_ = LabelEncoder()
        self.lablenc_.fit(y)
        self.classes_ = self.lablenc_.classes_
        self.classifiers_ = []
        for clf in self.classifiers:
            fitted_clf = clone(clf).fit(X,
                                        self.lablenc_.transform(y))
            self.classifiers_.append(fitted_clf)
        return self

신간 소식 구독하기

뉴스레터에 가입하시고 이메일로 신간 소식을 받아 보세요.