rawag
/

lispdetector

Model card Files Files and versions

xet

Community

rawag commited on Oct 2, 2024

Commit

344b95e

verified ·

1 Parent(s): 18f2c0e

Upload train.ipynb

Browse files

Files changed (1) hide show

train.ipynb +179 -0

train.ipynb ADDED Viewed

	@@ -0,0 +1,179 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pip install transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import WhisperForAudioClassification\n",
+    "# Load pre-trained Whisper model\n",
+    "model = WhisperForAudioClassification.from_pretrained(\"openai/whisper-medium\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# Load the CSV file\n",
+    "df = pd.read_csv('dataset.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import WhisperProcessor\n",
+    "\n",
+    "# Initialize the Whisper processor\n",
+    "processor = WhisperProcessor.from_pretrained(\"openai/whisper-medium\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import librosa\n",
+    "import torch\n",
+    "\n",
+    "# Create a custom dataset class\n",
+    "class LispDataset(torch.utils.data.Dataset):\n",
+    "  def __init__(self, df):\n",
+    "    self.df = df\n",
+    "\n",
+    "  def __len__(self):\n",
+    "    return len(self.df)\n",
+    "  \n",
+    "  def __getitem__(self, idx):\n",
+    "    row = self.df.iloc[idx]\n",
+    "    audio_path = row['file_path']\n",
+    "    label = row['label']\n",
+    "\n",
+    "    audio, original_sr = librosa.load(audio_path, sr=44100)\n",
+    "\n",
+    "    # Resample to target sample rate (if needed)\n",
+    "    target_sr = 16000\n",
+    "    if original_sr != target_sr:\n",
+    "        audio = librosa.resample(audio, orig_sr=original_sr, target_sr=target_sr)\n",
+    "\n",
+    "    # Extract mel features\n",
+    "    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=target_sr, n_mels=80, hop_length=512)\n",
+    "    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram)  # Convert to decibels\n",
+    "\n",
+    "    # Pad mel spectrogram to fixed length (assuming max_len is pre-defined)\n",
+    "    max_len = 3000  # Replace with your desired maximum length\n",
+    "    pad_width = (0, max_len - mel_spectrogram_db.shape[1])  # Calculate padding width\n",
+    "    mel_spectrogram_db_padded = torch.nn.functional.pad(torch.from_numpy(mel_spectrogram_db).float(), \n",
+    "                                                        pad_width, mode='constant', value=0)\n",
+    "\n",
+    "   # Convert to tensor\n",
+    "    input_features = mel_spectrogram_db_padded\n",
+    "\n",
+    "    # # Convert to tensor\n",
+    "    # input_features = torch.from_numpy(mel_spectrogram_db_padded).float()\n",
+    "\n",
+    "    # Create dictionary with expected key\n",
+    "    return {'input_features': input_features, 'labels': label}\n",
+    " \n",
+    "# Create a DataLoader\n",
+    "train_dataset = LispDataset(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "# Training arguments (adjust learning rate as needed)\n",
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"./results\",\n",
+    "    num_train_epochs=10,\n",
+    "    per_device_train_batch_size=2,\n",
+    "    learning_rate=5e-5,\n",
+    "    fp16=True,\n",
+    "    use_cpu=True,\n",
+    "    warmup_ratio=0.1,\n",
+    "    metric_for_best_model=\"accuracy\",\n",
+    "    gradient_accumulation_steps=1  # No gradient accumulation (equivalent to no_auto_optimize=True)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.optim import AdamW  # Import AdamW from PyTorch\n",
+    "\n",
+    "# Create the optimizer (adjust other hyperparameters as needed)\n",
+    "optimizer = AdamW(model.parameters(), lr=training_args.learning_rate)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.optim.lr_scheduler import LambdaLR\n",
+    "\n",
+    "lambda1 = lambda epoch: epoch // 30\n",
+    "scheduler = LambdaLR(optimizer, lr_lambda=[lambda1,])\n",
+    "\n",
+    "optimizertuple = (optimizer,scheduler)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import Trainer\n",
+    "\n",
+    "# Trainer instance\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=train_dataset,\n",
+    "    optimizers=optimizertuple,  # Wrap optimizer in a tuple\n",
+    ")\n",
+    "\n",
+    "# Start training\n",
+    "trainer.train()"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}