def build_vocab_counter(sentences, verbose=True):
"""
:param sentences: list of list of words
:return: dictionary of words and their count
"""
vocab_counter = {}
for sentence in tqdm(sentences, disable=(not verbose)):
for word in sentence:
try:
vocab_counter[word] += 1
except KeyError:
vocab_counter[word] = 1
return vocab_counter
def check_coverage(vocab_counter, embeddings_index):
hit = {}
oov = {}
sum_hits = 0
sum_oovs = 0
for word, cnt in tqdm(vocab_counter.items()):
try:
hit[word] = embeddings_index[word]
sum_hits += cnt
except:
oov[word] = vocab_counter[word]
sum_oovs += cnt