# 형태소 분석기에서 Okt 불러오기 from konlpy.tag import Okt okt = Okt() # 조사, 어미, 구두점 제거, 어간 추출 def okt_clean(text): clean_text = [] for word in okt.pos(text, stem=True): if word[1] not in ['Josa', 'Eomi', 'Punctuation']: clean_text.append(word[0]) return " ".join(clean_text) from tqdm import tqdm tqdm.pandas() train['title'] = train['title'].progress_map(okt_clean) test['title'] = test['title'].progress_map(okt_clean)
실행 결과
100%|██████████| 45654/45654 [01:44<00:00, 436.03it/s] 100%|██████████| 9131/9131 [00:16<00:00, 560.75it/s]