더북(TheBook)
def build_vocab_counter(sentences, verbose=True):
    """
    :param sentences: list of list of words
    :return: dictionary of words and their count
    """
    vocab_counter = {}
    for sentence in tqdm(sentences, disable=(not verbose)):
        for word in sentence:
            try:
                vocab_counter[word] += 1
            except KeyError:
                vocab_counter[word] = 1
    return vocab_counter

def check_coverage(vocab_counter, embeddings_index):
    hit = {}
    oov = {}
    sum_hits = 0
    sum_oovs = 0

    for word, cnt in tqdm(vocab_counter.items()):
        try:
            hit[word] = embeddings_index[word]
            sum_hits += cnt

        except:
            oov[word] = vocab_counter[word]
            sum_oovs += cnt
신간 소식 구독하기
뉴스레터에 가입하시고 이메일로 신간 소식을 받아 보세요.