import math, os
import json
import unidecode
import tensorflow as tf
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import transformers
from transformers import BertConfig
from transformers import BertTokenizerFast
def preprocess(df):
url_pattern = r"https?://\S+|www\.\S+"
df["comment_text"] = df["comment_text"].str.replace(url_pattern, " ")
# apply unidecode
df["comment_text"] = df["comment_text"].map(unidecode.unidecode)
# apply unidecode
df["comment_text"] = df["comment_text"].str.lower()
return df