df_test["num_words"] = df_test["comment_text"].apply(
lambda sen: len(str(sen).split())
)
# number of unique words in the text
df_train["num_unique_words"] = df_train["comment_text"].apply(
lambda sen: len(set(str(sen).split()))
)
df_test["num_unique_words"] = df_test["comment_text"].apply(
lambda sen: len(set(str(sen).split()))
)
# number of characters in the text
df_train["num_chars"] = df_train["comment_text"].apply(len)
df_test["num_chars"] = df_test["comment_text"].apply(len)
# number of stopwords
df_train["num_stopwords"] = df_train["comment_text"].apply(
lambda sen: len([w for w in str(sen).lower().split() if w in stop_words])
)
df_test["num_stopwords"] = df_test["comment_text"].apply(
lambda sen: len([w for w in str(sen).lower().split() if w in stop_words])
)