class TFRecordGenerator:
def __init__(
self,
dataset_type,
texts,
tokenizer,
labels=None,
max_len=220,
chunk_size=100000,
f_name="train.tfrecord",
out_path="/kaggle/working",
):
# Set parameters
self.dataset_type = str(dataset_type).upper()
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_len = max_len
self.chunk_size = chunk_size
self.tfrecord_file_path = os.path.join(out_path, f_name)
self.data_size = len(self.texts)
self._check_data()