{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "shellscript" } }, "outputs": [], "source": [ "pip install transformers" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import WhisperForAudioClassification\n", "# Load pre-trained Whisper model\n", "model = WhisperForAudioClassification.from_pretrained(\"openai/whisper-medium\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# Load the CSV file\n", "df = pd.read_csv('dataset.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import WhisperProcessor\n", "\n", "# Initialize the Whisper processor\n", "processor = WhisperProcessor.from_pretrained(\"openai/whisper-medium\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import librosa\n", "import torch\n", "\n", "# Create a custom dataset class\n", "class LispDataset(torch.utils.data.Dataset):\n", " def __init__(self, df):\n", " self.df = df\n", "\n", " def __len__(self):\n", " return len(self.df)\n", " \n", " def __getitem__(self, idx):\n", " row = self.df.iloc[idx]\n", " audio_path = row['file_path']\n", " label = row['label']\n", "\n", " audio, original_sr = librosa.load(audio_path, sr=44100)\n", "\n", " # Resample to target sample rate (if needed)\n", " target_sr = 16000\n", " if original_sr != target_sr:\n", " audio = librosa.resample(audio, orig_sr=original_sr, target_sr=target_sr)\n", "\n", " # Extract mel features\n", " mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=target_sr, n_mels=80, hop_length=512)\n", " mel_spectrogram_db = librosa.power_to_db(mel_spectrogram) # Convert to decibels\n", "\n", " # Pad mel spectrogram to fixed length (assuming max_len is pre-defined)\n", " max_len = 3000 # Replace with your desired maximum length\n", " pad_width = (0, max_len - mel_spectrogram_db.shape[1]) # Calculate padding width\n", " mel_spectrogram_db_padded = torch.nn.functional.pad(torch.from_numpy(mel_spectrogram_db).float(), \n", " pad_width, mode='constant', value=0)\n", "\n", " # Convert to tensor\n", " input_features = mel_spectrogram_db_padded\n", "\n", " # # Convert to tensor\n", " # input_features = torch.from_numpy(mel_spectrogram_db_padded).float()\n", "\n", " # Create dictionary with expected key\n", " return {'input_features': input_features, 'labels': label}\n", " \n", "# Create a DataLoader\n", "train_dataset = LispDataset(df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import TrainingArguments\n", "\n", "# Training arguments (adjust learning rate as needed)\n", "training_args = TrainingArguments(\n", " output_dir=\"./results\",\n", " num_train_epochs=10,\n", " per_device_train_batch_size=2,\n", " learning_rate=5e-5,\n", " fp16=True,\n", " use_cpu=True,\n", " warmup_ratio=0.1,\n", " metric_for_best_model=\"accuracy\",\n", " gradient_accumulation_steps=1 # No gradient accumulation (equivalent to no_auto_optimize=True)\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from torch.optim import AdamW # Import AdamW from PyTorch\n", "\n", "# Create the optimizer (adjust other hyperparameters as needed)\n", "optimizer = AdamW(model.parameters(), lr=training_args.learning_rate)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from torch.optim.lr_scheduler import LambdaLR\n", "\n", "lambda1 = lambda epoch: epoch // 30\n", "scheduler = LambdaLR(optimizer, lr_lambda=[lambda1,])\n", "\n", "optimizertuple = (optimizer,scheduler)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from transformers import Trainer\n", "\n", "# Trainer instance\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " optimizers=optimizertuple, # Wrap optimizer in a tuple\n", ")\n", "\n", "# Start training\n", "trainer.train()" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }