안타깝지만 SBS 알고리즘은 아직 사이킷런에 구현되어 있지 않습니다.17 간단한 알고리즘이므로 파이썬으로 직접 구현해 보죠.

    from sklearn.base import clone
    from itertools import combinations
    import numpy as np
    from sklearn.metrics import accuracy_score
    from sklearn.model_selection import train_test_split
    
    class SBS():
        def __init__(self, estimator, k_features,
                     scoring=accuracy_score,
                     test_size=0.25, random_state=1):
            self.scoring = scoring
            self.estimator = clone(estimator)
            self.k_features = k_features
            self.test_size = test_size
            self.random_state = random_state
    
        def fit(self, X, y):
    
            X_train, X_test, y_train, y_test = \
                train_test_split(X, y, test_size=self.test_size,
                                 random_state=self.random_state)
    
            dim = X_train.shape[1]
            self.indices_ = tuple(range(dim))
            self.subsets_ = [self.indices_]
            score = self._calc_score(X_train, y_train,
                                     X_test, y_test, self.indices_)
            self.scores_ = [score]
    
            while dim > self.k_features:
                scores = []
                subsets = []
    
                for p in combinations(self.indices_, r=dim - 1):
                    score = self._calc_score(X_train, y_train,
                                             X_test, y_test, p)
                    scores.append(score)
                    subsets.append(p)
    
                best = np.argmax(scores)
                self.indices_ = subsets[best]
                self.subsets_.append(self.indices_)
                dim -= 1
                self.scores_.append(scores[best])
            self.k_score_ = self.scores_[-1]
    
            return self
    
        def transform(self, X):
            return X[:, self.indices_]
    
        def _calc_score(self, X_train, y_train, X_test, y_test, indices):
            self.estimator.fit(X_train[:, indices], y_train)
            y_pred = self.estimator.predict(X_test[:, indices])
            score = self.scoring(y_test, y_pred)
            return score

     

     


    17 역주 사이킷런 0.24 버전에 순차 특성 선택 알고리즘인 SequentialFeatureSelector 클래스가 추가되었습니다.

    신간 소식 구독하기
    뉴스레터에 가입하시고 이메일로 신간 소식을 받아 보세요.