# number of punctuations in the text
df_train["num_punctuations"] = df_train["comment_text"].apply(
lambda x: len([c for c in str(x) if c in string.punctuation])
)
df_test["num_punctuations"] = df_test["comment_text"].apply(
lambda x: len([c for c in str(x) if c in string.punctuation])
)
# number of title case words in the text
df_train["num_words_upper"] = df_train["comment_text"].apply(
lambda x: len([w for w in str(x).split() if w.isupper()])
)
df_test["num_words_upper"] = df_test["comment_text"].apply(
lambda x: len([w for w in str(x).split() if w.isupper()])
)
# number of title case words in the text
df_train["num_words_title"] = df_train["comment_text"].apply(
lambda x: len([w for w in str(x).split() if w.istitle()])
)
df_test["num_words_title"] = df_test["comment_text"].apply(
lambda x: len([w for w in str(x).split() if w.istitle()])
)