Spaces:

AnjaJ
/

packing_list

Sleeping

File size: 16,498 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "fce70006-809b-4c98-b89c-00910b8bbea1",
   "metadata": {},
   "source": [
    "Implementation for blog post"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1eaa3a9f-0b39-4d77-91d6-f935d226ac98",
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "import pickle\n",
    "import os\n",
    "import time\n",
    "import matplotlib.pyplot as plt\n",
    "from tabulate import tabulate\n",
    "\n",
    "from transformers import pipeline\n",
    "import json\n",
    "import pandas as pd\n",
    "\n",
    "# Get candidate labels\n",
    "with open(\"packing_label_structure.json\", \"r\") as file:\n",
    "    candidate_labels = json.load(file)\n",
    "keys_list = list(candidate_labels.keys())\n",
    "\n",
    "# Load test data (list of dictionaries)\n",
    "# with open(\"test_data.json\", \"r\") as file:\n",
    "#     packing_data = json.load(file)\n",
    "# Extract trip descriptions and classification (trip_types)\n",
    "# trip_descriptions = [trip['description'] for trip in packing_data]\n",
    "# trip_types = [trip['trip_types'] for trip in packing_data]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "bb1bc7ed-227e-4c0b-b769-ead4daf01c57",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      " activity_type :\n",
      "\t hut trek (summer)\n",
      "\t hut trek (winter)\n",
      "\t camping trip (wild camping)\n",
      "\t camping trip (campground)\n",
      "\t ski tour / skitour\n",
      "\t snowboard / splitboard trip\n",
      "\t long-distance hike / thru-hike\n",
      "\t digital nomad trip\n",
      "\t city trip\n",
      "\t road trip (car/camper)\n",
      "\t festival trip\n",
      "\t yoga / wellness retreat\n",
      "\t micro-adventure / weekend trip\n",
      "\t beach vacation\n",
      "\t cultural exploration\n",
      "\t nature escape\n",
      "\n",
      " activities :\n",
      "\t swimming\n",
      "\t going to the beach\n",
      "\t relaxing\n",
      "\t sightseeing\n",
      "\t biking\n",
      "\t running\n",
      "\t skiing\n",
      "\t cross-country skiing\n",
      "\t ski touring\n",
      "\t hiking\n",
      "\t hut-to-hut hiking\n",
      "\t rock climbing\n",
      "\t ice climbing\n",
      "\t snowshoe hiking\n",
      "\t kayaking / canoeing\n",
      "\t stand-up paddleboarding (SUP)\n",
      "\t snorkeling\n",
      "\t scuba diving\n",
      "\t surfing\n",
      "\t paragliding\n",
      "\t horseback riding\n",
      "\t photography\n",
      "\t fishing\n",
      "\t rafting\n",
      "\t yoga\n",
      "\n",
      " climate_or_season :\n",
      "\t cold destination / winter\n",
      "\t warm destination / summer\n",
      "\t variable weather / spring / autumn\n",
      "\t tropical / humid\n",
      "\t dry / desert-like\n",
      "\t rainy climate\n",
      "\n",
      " style_or_comfort :\n",
      "\t ultralight\n",
      "\t lightweight (but comfortable)\n",
      "\t luxury (including evening wear)\n",
      "\t minimalist\n",
      "\n",
      " dress_code :\n",
      "\t casual\n",
      "\t formal (business trip)\n",
      "\t conservative\n",
      "\n",
      " accommodation :\n",
      "\t indoor\n",
      "\t huts with half board\n",
      "\t sleeping in a tent\n",
      "\t sleeping in a car\n",
      "\n",
      " transportation :\n",
      "\t own vehicle\n",
      "\t no own vehicle\n",
      "\n",
      " special_conditions :\n",
      "\t off-grid / no electricity\n",
      "\t self-supported (bring your own cooking gear)\n",
      "\t travel with children\n",
      "\t pet-friendly\n",
      "\t snow and ice\n",
      "\t high alpine terrain\n",
      "\t snow, ice and avalanche-prone terrain\n",
      "\t no special conditions to consider\n",
      "\n",
      " trip_length_days :\n",
      "\t 1 day\n",
      "\t 2 days\n",
      "\t 3 days\n",
      "\t 4 days\n",
      "\t 5 days\n",
      "\t 6 days\n",
      "\t 7 days\n",
      "\t 7+ days\n"
     ]
    }
   ],
   "source": [
    "for key in candidate_labels:\n",
    "    print(\"\\n\", key, \":\")\n",
    "    for item in candidate_labels[key]:\n",
    "        print(\"\\t\", item)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "4b3a1bcb-3450-4128-b941-952f145baf99",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                             Label     Score\n",
      "0                   beach vacation  0.376311\n",
      "1   micro-adventure / weekend trip  0.350168\n",
      "2                    nature escape  0.133974\n",
      "3               digital nomad trip  0.031636\n",
      "4             cultural exploration  0.031271\n",
      "5          yoga / wellness retreat  0.012846\n",
      "6                    festival trip  0.012700\n",
      "7   long-distance hike / thru-hike  0.009527\n",
      "8                hut trek (summer)  0.008148\n",
      "9                        city trip  0.007793\n",
      "10          road trip (car/camper)  0.006512\n",
      "11              ski tour / skitour  0.005670\n",
      "12       camping trip (campground)  0.004448\n",
      "13     snowboard / splitboard trip  0.004113\n",
      "14     camping trip (wild camping)  0.002714\n",
      "15               hut trek (winter)  0.002170\n"
     ]
    }
   ],
   "source": [
    "key = keys_list[0]\n",
    "model_name = \"facebook/bart-large-mnli\"\n",
    "trip_descr = \"I am planning a trip to Greece with my boyfriend, where we will visit two islands. We have booked an apartment on each island for a few days and plan to spend most of our time relaxing. Our main goals are to enjoy the beach, try delicious local food, and possibly go on a hike—if it’s not too hot. We will be relying solely on public transport. We’re in our late 20s and traveling from the Netherlands.\"\n",
    "classifier = pipeline(\"zero-shot-classification\", model = model_name)\n",
    "result = classifier(trip_descr, candidate_labels[keys_list[0]])\n",
    "# Create DataFrame\n",
    "df = pd.DataFrame({\n",
    "    \"Label\": result[\"labels\"],\n",
    "    \"Score\": result[\"scores\"]\n",
    "})\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "04208f9e-59bb-408b-92c6-941d064bf43d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "beach vacation\n"
     ]
    }
   ],
   "source": [
    "# the labels are sorted by score. We choose the first one as our best guess for a class label\n",
    "class_label = result[\"labels\"][0]\n",
    "print(class_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9f5f1c45-b411-4de1-a0a6-a7ecde5d8eae",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                             Label     Score\n",
      "0                   beach vacation  0.376311\n",
      "1   micro-adventure / weekend trip  0.350168\n",
      "2                    nature escape  0.133974\n",
      "3               digital nomad trip  0.031636\n",
      "4             cultural exploration  0.031271\n",
      "5          yoga / wellness retreat  0.012846\n",
      "6                    festival trip  0.012700\n",
      "7   long-distance hike / thru-hike  0.009527\n",
      "8                hut trek (summer)  0.008148\n",
      "9                        city trip  0.007793\n",
      "10          road trip (car/camper)  0.006512\n",
      "11              ski tour / skitour  0.005670\n",
      "12       camping trip (campground)  0.004448\n",
      "13     snowboard / splitboard trip  0.004113\n",
      "14     camping trip (wild camping)  0.002714\n",
      "15               hut trek (winter)  0.002170\n"
     ]
    }
   ],
   "source": [
    "# we do this for each superclass and receive a list of class labels for our trip. We did do things differently for activities\n",
    "cut_off = 0.5\n",
    "result_activ = classifier(trip_descr, candidate_labels[\"activities\"], multi_label=True)\n",
    "indices = [i for i, score in enumerate(result_activ['scores']) if score > cut_off]\n",
    "classes = [result_activ['labels'][i] for i in indices]\n",
    "\n",
    "df = pd.DataFrame({\n",
    "    \"Label\": result[\"labels\"],\n",
    "    \"Score\": result[\"scores\"]\n",
    "})\n",
    "print(df)\n",
    "print(classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7c5c50c4-7d58-4f5a-8f22-e4c06298a2f7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['going to the beach', 'relaxing', 'hiking']\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3a7287c2-78f0-4a53-af72-1bc0f62da36f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# doing this for all superclasses, depending on local machine this might take a while\n",
    "def pred_trip(model_name, trip_descr, cut_off = 0.5):\n",
    "    \"\"\"\n",
    "    Classifies trip\n",
    "    \n",
    "    Parameters:\n",
    "    model_name: name of hugging-face model\n",
    "    trip_descr: text describing the trip\n",
    "    cut_off: cut_off for choosing activities\n",
    "\n",
    "    Returns:\n",
    "    pd Dataframe: with class predictions and true values\n",
    "    \"\"\"\n",
    "    \n",
    "    classifier = pipeline(\"zero-shot-classification\", model=model_name)\n",
    "    df = pd.DataFrame(columns=['superclass', 'pred_class'])\n",
    "    for i, key in enumerate(keys_list):\n",
    "        print(f\"\\rProcessing {i + 1}/{len(keys_list)}\", end=\"\", flush=True)\n",
    "        if key == 'activities':\n",
    "            result = classifier(trip_descr, candidate_labels[key], multi_label=True)\n",
    "            indices = [i for i, score in enumerate(result['scores']) if score > cut_off]\n",
    "            classes = [result['labels'][i] for i in indices]\n",
    "        else:\n",
    "            result = classifier(trip_descr, candidate_labels[key])\n",
    "            classes = result[\"labels\"][0]\n",
    "        df.loc[i] = [key, classes]\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "43481d4c-039a-4a37-bd6d-dfe638bf9732",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing 9/9           superclass                              pred_class\n",
      "0       activity_type                          beach vacation\n",
      "1          activities  [going to the beach, relaxing, hiking]\n",
      "2   climate_or_season               warm destination / summer\n",
      "3    style_or_comfort                              minimalist\n",
      "4          dress_code                                  casual\n",
      "5       accommodation                    huts with half board\n",
      "6      transportation                          no own vehicle\n",
      "7  special_conditions               off-grid / no electricity\n",
      "8    trip_length_days                                 7+ days\n"
     ]
    }
   ],
   "source": [
    "result = pred_trip(model_name, trip_descr, cut_off = 0.5)\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c4799d6b-6ab5-42da-a992-afe3666d0015",
   "metadata": {},
   "source": [
    "Now use gradio app"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "35e14ac8-4445-4586-a115-081cf1ef2686",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prerequisites\n",
    "from transformers import pipeline\n",
    "import json\n",
    "import pandas as pd\n",
    "import gradio as gr\n",
    "\n",
    "# get candidate labels\n",
    "with open(\"packing_label_structure.json\", \"r\") as file:\n",
    "    candidate_labels = json.load(file)\n",
    "keys_list = list(candidate_labels.keys())\n",
    "\n",
    "# Load packing item data\n",
    "with open(\"packing_templates_self_supported_offgrid_expanded.json\", \"r\") as file:\n",
    "    packing_items = json.load(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "8eefd4cc-c375-4cc0-956b-472b36bafdb7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7860\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "demo = gr.Interface(\n",
    "    fn=pred_trip,\n",
    "    inputs=[\n",
    "        gr.Textbox(label=\"Model name\", value = \"MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli\"),\n",
    "        gr.Textbox(label=\"Trip description\"),\n",
    "        gr.Number(label=\"Activity cut-off\", value = 0.5),\n",
    "    ],\n",
    "    # outputs=\"dataframe\",\n",
    "    outputs=[gr.Dataframe(label=\"DataFrame\"), gr.Textbox(label=\"List of words\")],\n",
    "    title=\"Trip classification\",\n",
    "    description=\"Enter a text describing your trip\",\n",
    ")\n",
    "\n",
    "# Launch the Gradio app\n",
    "if __name__ == \"__main__\":\n",
    "    demo.launch()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "11006b67-bfd5-42a7-99c4-36c3db3affac",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing 9/9"
     ]
    }
   ],
   "source": [
    "test = pred_trip(model_name, trip_descr, cut_off = 0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f6cd8d1-b742-4034-a3e2-e55c3ddd2904",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "5db16415-47c1-42c6-861c-5d26f4b3bb03",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "facebook/bart-large-mnli\n",
      "I am planning a trip to Greece with my boyfriend, where we will visit two islands. We have booked an apartment on each island for a few days and plan to spend most of our time relaxing. Our main goals are to enjoy the beach, try delicious local food, and possibly go on a hike—if it’s not too hot. We will be relying solely on public transport. We’re in our late 20s and traveling from the Netherlands.\n"
     ]
    }
   ],
   "source": [
    "print(model_name)\n",
    "print(trip_descr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bbca6bc6-bd97-4d43-8a6e-fe338f2735f2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (huggingface_env)",
   "language": "python",
   "name": "huggingface_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.20"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}