main_cols = ["id", "target", "comment_text"]
sub_target_cols = [
"severe_toxicity", "obscene",
"threat","insult",
"identity_attack", "sexual_explicit",
]
identity_cols = [
"male", "female",
"homosexual_gay_or_lesbian",
"christian", "jewish",
"muslim", "black",
"white", "psychiatric_or_mental_illness",
]
metadata_cols = [
"created_date", "publication_id",
"parent_id", "article_id",
"rating", "funny",
"wow", "sad",
"likes", "disagree",
"identity_annotator_count",
"toxicity_annotator_count",
]
unused_identity = [
col
for col in df_train.columns
if col not in main_cols + sub_target_cols + identity_cols + metadata_cols
]