TunisianEncodersArena / scripts /explore_dataset.py
hamzabouajila's picture
implement scripts for checking , add logging and update submission and integrate evaluation
742dfc3
raw
history blame
553 Bytes
from datasets import load_dataset
def explore_dataset():
# Load the dataset
dataset = load_dataset("arbml/Tunisian_Dialect_Corpus", split="train")
# Print dataset info
print("\nDataset Info:")
print(dataset.info)
# Print first example
print("\nFirst Example:")
print(dataset[0])
# Print all column names
print("\nColumn Names:")
print(dataset.column_names)
# Print first few rows
print("\nFirst few rows:")
print(dataset[:3])
if __name__ == "__main__":
explore_dataset()