Spaces:

cortex359
/

germeval2025

Runtime error

App Files Files Community

Christian Rene Thelen commited on Aug 4

Commit

963cb02

1 Parent(s): 21f86d4

Initial Commit

Browse files

Files changed (19) hide show

.gitignore +222 -0
.python-version +1 -0
Dockerfile +79 -0
README.md +50 -0
docker-compose.yml +46 -0
requirements.txt +263 -0
setup_env.sh +21 -0
subtask2_final_gradio.py +618 -0
subtask_1/exp019-4.py +224 -0
subtask_1/grid_cv_results.exp019-2.csv +16 -0
subtask_1/grid_cv_results.exp019-3.csv +26 -0
subtask_1/grid_cv_results.exp019-4.csv +49 -0
subtask_1/submission_subtask1-2.ipynb +608 -0
subtask_1/submission_subtask1.ipynb +719 -0
subtask_2/exp027-1.py +736 -0
subtask_2/exp027-2.py +736 -0
subtask_2/exp027-2_retraining.py +736 -0
subtask_2/submission_subtask2-2.ipynb +0 -0
subtask_2/submission_subtask2.ipynb +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,222 @@

+### VisualStudioCode template
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+# Local History for Visual Studio Code
+.history/
+# Built Visual Studio Code Extensions
+*.vsix
+### JupyterNotebooks template
+# gitignore template for Jupyter Notebooks
+# website: http://jupyter.org/
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+# IPython
+profile_default/
+ipython_config.py
+# Remove previous ipynb_checkpoints
+#   git rm -r .ipynb_checkpoints/
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+*.pkl
+*.zip
+share-GermEval2025-data
+opt/
+experiments/*/trainer
+*.pth
+experiments/*/wandb
+experiments/*/trainer*/
+experiments/*/models_*/
+*.npy
+experiments/exp*/*_exp_fold?/
+experiments/exp*/exp*-fold-?/
+experiments/exp???/wandb_logs/
+experiments/exp007-large/fp16/
+experiments/exp008/exp008-2/
+experiments/exp008/exp008-3/
+experiments/exp008/exp008-4/
+experiments/exp008/exp008/
+experiments/exp011/exp011-9/
+experiments/exp012-fixed/exp012-fixed-2/
+experiments/exp012-fixed/exp012-fixed-3/
+experiments/exp012-fixed/exp012-fixed-4/
+experiments/exp012-fixed/exp012-fixed-5/
+experiments/exp012-fixed/exp012-fixed/
+experiments/exp012/exp012-2/
+experiments/exp012/exp012-3/
+experiments/exp012/exp012/
+experiments/exp013/exp013-2/
+experiments/exp013/exp013/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12.9

Dockerfile ADDED Viewed

	@@ -0,0 +1,79 @@

+# Use NVIDIA CUDA base image with Python 3.12.9
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHON_VERSION=3.12.9
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    software-properties-common \
+    build-essential \
+    libssl-dev \
+    libffi-dev \
+    libsqlite3-dev \
+    libreadline-dev \
+    libbz2-dev \
+    libncurses5-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libgdbm-dev \
+    liblzma-dev \
+    git \
+    wget \
+    curl \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python 3.12.9 from source
+RUN cd /tmp && \
+    wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz && \
+    tar xzf Python-${PYTHON_VERSION}.tgz && \
+    cd Python-${PYTHON_VERSION} && \
+    ./configure --enable-optimizations --with-ensurepip=install && \
+    make -j $(nproc) && \
+    make altinstall && \
+    cd / && \
+    rm -rf /tmp/Python-${PYTHON_VERSION}*
+# Create symlinks for python3.12
+RUN ln -sf /usr/local/bin/python3.12 /usr/bin/python3
+RUN ln -sf /usr/local/bin/python3.12 /usr/bin/python
+RUN ln -sf /usr/local/bin/pip3.12 /usr/bin/pip
+# Upgrade pip
+RUN python3 -m pip install --upgrade pip
+# Set work directory
+WORKDIR /app
+# Copy requirements file
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY subtask2_final_gradio.py .
+# Create directory for model weights
+RUN mkdir -p experiments/exp027
+# Set CUDA environment variables
+ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
+ENV CUDA_VISIBLE_DEVICES=0
+# Expose port
+EXPOSE 7860
+# Create non-root user for security
+RUN useradd -m -u 1002 appuser && chown -R appuser:appuser /app
+USER appuser
+# Command to run the application
+CMD ["python", "subtask2_final_gradio.py"]

README.md ADDED Viewed

	@@ -0,0 +1,50 @@

+# AIxcellent Vibes at GermEval 2025 Shared Task on Candy Speech Detection
+## Results
+| Subtask | Submission | Model              | (strict) F1 Score |
+|---------|------------|--------------------|------------------:|
+|       1 |          1 | Qwen3-Embedding-8B |             0.875 |
+|       1 |          2 | XLM-RoBERTa-Large  |             0.891 |
+|       2 |          1 | GBERT-Large        |             0.623 |
+|       2 |          2 | XLM-RoBERTa-Large  |             0.631 |
+## Setup
+```bash
+python_version="$(cat .python-version)"
+# install the interpreter if it’s missing
+pyenv install -s "${python_version}"
+# select python version for current shell
+pyenv shell "${python_version}"
+# create venv if missing
+if [[ ! -d venv ]]; then
+  python -m venv venv
+fi
+# activate venv & install packages
+source venv/bin/activate
+pip install -U pip setuptools wheel
+pip install -r requirements.txt
+```
+Diese Repository enthält den Code, mit dem die Untersuchungen der Bachelorarbeit **Flauschdetektion (GermEval 2025)**
+im Studiengang Angewandte Mathematik und Informatik (dual) B. Sc. an der Fachhochschule Aachen durchgeführt wurden.
+---
+**Studiengang**
+Angewandte Mathematik und Informatik B.Sc. ([AMI](https://www.fh-aachen.de/studium/angewandte-mathematik-und-informatik-bsc)) an der [FH Aachen](https://www.fh-aachen.de/), University of Applied Sciences.
+**Ausbildung mit IHK Abschluss**
+Mathematisch technische/-r Softwareentwickler/-in ([MaTSE](https://www.matse-ausbildung.de/startseite.html)) am Lehr- und Forschungsgebiet Igenieurhydrologie ([LFI](https://lfi.rwth-aachen.de/)) der [RWTH Aachen](https://www.rwth-aachen.de/) University.

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,46 @@

+services:
+  span-classifier:
+    container_name: span-classifier-app
+    build:
+      context: .
+      dockerfile: Dockerfile
+      network: host
+    image: 8e6b331d0418
+    ports:
+      - "7860:7860"
+    volumes:
+      # Mount model weights directory
+      - ./experiments:/app/experiments:ro
+      # Mount cache directory for Hugging Face models
+      - /home/cthelen/.cache/huggingface:/home/appuser/.cache/huggingface
+      # Mount logs directory
+      - ./logs:/app/logs
+    environment:
+      - PYTHONUNBUFFERED=1
+      - CUDA_DEVICE_ORDER=PCI_BUS_ID
+      - CUDA_VISIBLE_DEVICES=0
+      - GRADIO_SERVER_NAME=0.0.0.0
+      - GRADIO_SERVER_PORT=7860
+      - TRANSFORMERS_CACHE=/home/appuser/.cache/huggingface
+      - TORCH_HOME=/home/appuser/.cache/torch
+    runtime: nvidia
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    labels:
+      - "traefik.enable=true"
+      - "traefik.http.routers.demo.rule=Host(`span-classifier.gpu2.lfi.rwth-aachen.de`)"
+      - "traefik.http.routers.demo.tls=true"
+      - "traefik.http.routers.demo.tls.certresolver=letsencrypt"
+      - "com.centurylinklabs.watchtower.enable=false"
+    networks:
+      - web
+networks:
+  web:
+    external: true

requirements.txt ADDED Viewed

	@@ -0,0 +1,263 @@

+absl-py==2.2.2
+accelerate==1.7.0
+aiofiles==24.1.0
+annotated-types==0.7.0
+anyio==4.9.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asgiref==3.8.1
+asttokens==3.0.0
+astunparse==1.6.3
+async-lru==2.0.5
+attrs==25.3.0
+azure-ai-inference==1.0.0b9
+azure-ai-ml==1.27.0
+azure-common==1.1.28
+azure-core==1.34.0
+azure-core-tracing-opentelemetry==1.0.0b12
+azure-identity==1.22.0
+azure-mgmt-core==1.5.0
+azure-monitor-opentelemetry==1.6.8
+azure-monitor-opentelemetry-exporter==1.0.0b36
+azure-storage-blob==12.25.1
+azure-storage-file-datalake==12.20.0
+azure-storage-file-share==12.21.0
+babel==2.17.0
+beautifulsoup4==4.13.4
+bleach==6.2.0
+blis==1.2.1
+catalogue==2.0.10
+certifi==2025.4.26
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.1.8
+cloudpathlib==0.21.0
+colorama==0.4.6
+comm==0.2.2
+confection==0.1.5
+contourpy==1.3.2
+cryptography==44.0.3
+cupy-cuda12x==12.3.0
+cycler==0.12.1
+cymem==2.0.11
+de_core_news_sm @ https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.8.0/de_core_news_sm-3.8.0-py3-none-any.whl#sha256=fec69fec52b1780f2d269d5af7582a5e28028738bd3190532459aeb473bfa3e7
+debugpy==1.8.14
+decorator==5.2.1
+defusedxml==0.7.1
+Deprecated==1.2.18
+docker-pycreds==0.4.0
+executing==2.2.0
+fastapi==0.115.14
+fastjsonschema==2.21.1
+fastrlock==0.8.3
+ffmpy==0.6.0
+filelock==3.18.0
+fixedint==0.1.6
+flatbuffers==25.2.10
+fonttools==4.57.0
+fqdn==1.5.1
+fsspec==2025.3.2
+gast==0.6.0
+gitdb==4.0.12
+GitPython==3.1.44
+google-pasta==0.2.0
+gradio==5.35.0
+gradio_client==1.10.4
+groovy==0.1.2
+grpcio==1.71.0
+h11==0.16.0
+h5py==3.13.0
+hf-xet==1.1.0
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.31.1
+idna==3.10
+imbalanced-learn==0.13.0
+imblearn==0.0
+importlib_metadata==8.6.1
+ipykernel==6.29.5
+ipython==8.36.0
+ipython_pygments_lexers==1.1.1
+isodate==0.7.2
+isoduration==20.11.0
+jedi==0.19.2
+Jinja2==3.1.6
+joblib==1.5.0
+json5==0.12.0
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2025.4.1
+jupyter-events==0.12.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.15.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.4.2
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+keras==3.9.2
+kiwisolver==1.4.8
+langcodes==3.5.0
+language_data==1.3.0
+libclang==18.1.1
+marisa-trie==1.2.1
+Markdown==3.8
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+matplotlib==3.10.1
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.1.3
+ml_dtypes==0.5.1
+mpmath==1.3.0
+msal==1.32.3
+msal-extensions==1.3.1
+msrest==0.7.1
+murmurhash==1.0.12
+namex==0.0.9
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.4.2
+notebook==7.4.2
+notebook_shim==0.2.4
+numpy==1.26.4
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
+oauthlib==3.2.2
+opentelemetry-api==1.31.1
+opentelemetry-instrumentation==0.52b1
+opentelemetry-instrumentation-asgi==0.52b1
+opentelemetry-instrumentation-dbapi==0.52b1
+opentelemetry-instrumentation-django==0.52b1
+opentelemetry-instrumentation-fastapi==0.52b1
+opentelemetry-instrumentation-flask==0.52b1
+opentelemetry-instrumentation-psycopg2==0.52b1
+opentelemetry-instrumentation-requests==0.52b1
+opentelemetry-instrumentation-urllib==0.52b1
+opentelemetry-instrumentation-urllib3==0.52b1
+opentelemetry-instrumentation-wsgi==0.52b1
+opentelemetry-resource-detector-azure==0.1.5
+opentelemetry-sdk==1.31.1
+opentelemetry-semantic-conventions==0.52b1
+opentelemetry-util-http==0.52b1
+opt_einsum==3.4.0
+optree==0.15.0
+orjson==3.10.18
+overrides==7.7.0
+packaging==25.0
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==11.2.1
+platformdirs==4.3.8
+preshed==3.0.9
+prometheus_client==0.21.1
+prompt_toolkit==3.0.51
+protobuf==5.29.4
+psutil==6.1.1
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pydantic==2.11.4
+pydantic_core==2.33.2
+pydash==8.0.5
+pydub==0.25.1
+Pygments==2.19.1
+PyJWT==2.10.1
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-json-logger==3.3.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==26.4.0
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-oauthlib==2.0.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==14.0.0
+rpds-py==0.24.0
+ruff==0.12.1
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.3
+seaborn==0.13.2
+semantic-version==2.10.0
+Send2Trash==1.8.3
+sentry-sdk==2.28.0
+setproctitle==1.3.6
+setuptools==80.3.1
+shellingham==1.5.4
+six==1.17.0
+sklearn-compat==0.1.3
+smart-open==7.1.0
+smmap==5.0.2
+sniffio==1.3.1
+soupsieve==2.7
+spacy==3.8.5
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.5.1
+stack-data==0.6.3
+starlette==0.46.2
+strictyaml==1.7.3
+sympy==1.14.0
+tensorboard==2.19.0
+tensorboard-data-server==0.7.2
+tensorflow==2.19.0
+termcolor==3.1.0
+terminado==0.18.1
+tf_keras==2.19.0
+thinc==8.3.4
+threadpoolctl==3.6.0
+tinycss2==1.4.0
+tokenizers==0.21.1
+tomlkit==0.13.3
+torch==2.7.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.51.3
+triton==3.3.0
+typer==0.15.3
+types-python-dateutil==2.9.0.20241206
+typing-inspection==0.4.0
+typing_extensions==4.13.2
+tzdata==2025.2
+uri-template==1.3.0
+urllib3==2.4.0
+uvicorn==0.35.0
+wandb==0.19.11
+wasabi==1.1.3
+wcwidth==0.2.13
+weasel==0.4.1
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+Werkzeug==3.1.3
+wheel==0.45.1
+wrapt==1.17.2
+zipp==3.21.0

setup_env.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/usr/bin/env bash
+set -e
+python_version="$(cat .python-version)"
+# 1. Install the interpreter if it’s missing
+pyenv install -s "${python_version}"
+# select python version for current shell
+pyenv shell "${python_version}"
+# create venv if missing
+if [[ ! -d venv ]]; then
+  python -m venv venv
+fi
+# 3. Activate venv & install packages
+source venv/bin/activate
+pip install -U pip setuptools wheel
+pip install -r requirements.txt

subtask2_final_gradio.py ADDED Viewed

	@@ -0,0 +1,618 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from transformers import (
+    AutoTokenizer,
+    BertForTokenClassification,
+    AutoModelForTokenClassification,
+    pipeline
+)
+import torch
+import os
+import seaborn as sns
+from matplotlib.colors import to_hex
+import html
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+class SpanClassifierWithStrictF1:
+    def __init__(self, model_name="deepset/gbert-base"):
+        self.model_name = model_name
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.labels =[
+            "O",
+            "B-positive feedback", "B-compliment", "B-affection declaration", "B-encouragement", "B-gratitude", "B-agreement", "B-ambiguous", "B-implicit", "B-group membership", "B-sympathy",
+            "I-positive feedback", "I-compliment", "I-affection declaration", "I-encouragement", "I-gratitude", "I-agreement", "I-ambiguous", "I-implicit", "I-group membership", "I-sympathy"
+        ]
+        self.label2id = {label: i for i, label in enumerate(self.labels)}
+        self.id2label = {i: label for i, label in enumerate(self.labels)}
+    def create_dataset(self, comments_df, spans_df):
+        """Erstelle Dataset mit BIO-Labels und speichere Evaluation-Daten"""
+        examples = []
+        eval_data = []  # Für Strict F1 Berechnung
+        spans_grouped = spans_df.groupby(['document', 'comment_id'])
+        for _, row in comments_df.iterrows():
+            text = row['comment']
+            document = row['document']
+            comment_id = row['comment_id']
+            key = (document, comment_id)
+            # True spans für diesen Kommentar
+            if key in spans_grouped.groups:
+                true_spans = [(span_type, int(start), int(end))
+                              for span_type, start, end in
+                              spans_grouped.get_group(key)[['type', 'start', 'end']].values]
+            else:
+                true_spans = []
+            # Tokenisierung
+            tokenized = self.tokenizer(text, truncation=True, max_length=512,
+                                       return_offsets_mapping=True)
+            # BIO-Labels erstellen
+            labels = self._create_bio_labels(tokenized['offset_mapping'],
+                                             spans_grouped.get_group(key)[['start', 'end', 'type']].values
+                                             if key in spans_grouped.groups else [])
+            examples.append({
+                'input_ids': tokenized['input_ids'],
+                'attention_mask': tokenized['attention_mask'],
+                'labels': labels
+            })
+            # Evaluation-Daten speichern
+            eval_data.append({
+                'text': text,
+                'offset_mapping': tokenized['offset_mapping'],
+                'true_spans': true_spans,
+                'document': document,
+                'comment_id': comment_id
+            })
+        return examples, eval_data
+    def _create_bio_labels(self, offset_mapping, spans):
+        """Erstelle BIO-Labels für Tokens"""
+        labels = [0] * len(offset_mapping)  # 0 = "O"
+        for start, end, type_label in spans:
+            for i, (token_start, token_end) in enumerate(offset_mapping):
+                if token_start is None:  # Spezielle Tokens
+                    continue
+                # Token überlappt mit Span
+                if token_start < end and token_end > start:
+                    if token_start <= start:
+                        labels[i] = self.label2id[f'B-{type_label}'] # B-compliment
+                    else:
+                        labels[i] = self.label2id[f'I-{type_label}'] # I-compliment
+        return labels
+    def compute_metrics(self, eval_pred):
+        """Berechne Strict F1 für Trainer"""
+        predictions, labels = eval_pred
+        predictions = np.argmax(predictions, axis=2)
+        # Konvertiere Vorhersagen zu Spans
+        batch_pred_spans = []
+        batch_true_spans = []
+        for i, (pred_seq, label_seq) in enumerate(zip(predictions, labels)):
+            # Evaluation-Daten für dieses Beispiel
+            if i < len(self.current_eval_data):
+                eval_item = self.current_eval_data[i]
+                text = eval_item['text']
+                offset_mapping = eval_item['offset_mapping']
+                true_spans = eval_item['true_spans']
+                # Filtere gültige Vorhersagen (keine Padding-Tokens)
+                valid_predictions = []
+                valid_offsets = []
+                for j, (pred_label, true_label) in enumerate(zip(pred_seq, label_seq)):
+                    if true_label != -100 and j < len(offset_mapping):
+                        valid_predictions.append(pred_label)
+                        valid_offsets.append(offset_mapping[j])
+                # Konvertiere zu Spans
+                pred_spans = self._predictions_to_spans(valid_predictions, valid_offsets, text)
+                pred_spans_tuples = [(span['type'], span['start'], span['end']) for span in pred_spans]
+                batch_pred_spans.append(pred_spans_tuples)
+                batch_true_spans.append(true_spans)
+        # Berechne Strict F1
+        strict_f1, strict_precision, strict_recall, tp, fp, fn = self._calculate_strict_f1(
+            batch_true_spans, batch_pred_spans
+        )
+        torch.cuda.memory.empty_cache()
+        return {
+            "strict_f1": torch.tensor(strict_f1),
+            "strict_precision": torch.tensor(strict_precision),
+            "strict_recall": torch.tensor(strict_recall),
+            "true_positives": torch.tensor(tp),
+            "false_positives": torch.tensor(fp),
+            "false_negatives": torch.tensor(fn)
+        }
+    def _calculate_strict_f1(self, true_spans_list, pred_spans_list):
+        """Berechne Strict F1 über alle Kommentare"""
+        tp, fp, fn = 0, 0, 0
+        for true_spans, pred_spans in zip(true_spans_list, pred_spans_list):
+            # Finde exakte Matches (Typ und Span müssen übereinstimmen)
+            matches = self._find_exact_matches(true_spans, pred_spans)
+            tp += len(matches)
+            fp += len(pred_spans) - len(matches)
+            fn += len(true_spans) - len(matches)
+        # Berechne Metriken
+        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
+        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
+        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
+        return f1, precision, recall, tp, fp, fn
+    def _find_exact_matches(self, true_spans, pred_spans):
+        """Finde exakte Matches zwischen True und Predicted Spans"""
+        matches = []
+        used_pred = set()
+        for true_span in true_spans:
+            for i, pred_span in enumerate(pred_spans):
+                if i not in used_pred and true_span == pred_span:
+                    matches.append((true_span, pred_span))
+                    used_pred.add(i)
+                    break
+        return matches
+    def _predictions_to_spans(self, predicted_labels, offset_mapping, text):
+        """Konvertiere Token-Vorhersagen zu Spans"""
+        spans = []
+        current_span = None
+        for i, label_id in enumerate(predicted_labels):
+            if i >= len(offset_mapping):
+                break
+            label = self.id2label[label_id]
+            token_start, token_end = offset_mapping[i]
+            if token_start is None:
+                continue
+            if label.startswith('B-'):
+                if current_span:
+                    spans.append(current_span)
+                current_span = {
+                    'type': label[2:],
+                    'start': token_start,
+                    'end': token_end,
+                    'text': text[token_start:token_end]
+                }
+            elif label.startswith('I-') and current_span:
+                current_span['end'] = token_end
+                current_span['text'] = text[current_span['start']:current_span['end']]
+            else:
+                if current_span:
+                    spans.append(current_span)
+                    current_span = None
+        if current_span:
+            spans.append(current_span)
+        return spans
+    def predict(self, texts):
+        """Vorhersage für neue Texte"""
+        if not hasattr(self, 'model'):
+            raise ValueError("Modell muss erst trainiert werden!")
+        predictions = []
+        device = next(self.model.parameters()).device
+        for text in texts:
+            # Tokenisierung
+            inputs = self.tokenizer(text, return_tensors="pt", truncation=True,
+                                    max_length=512, return_offsets_mapping=True)
+            offset_mapping = inputs.pop('offset_mapping')
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Vorhersage
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            predicted_labels = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
+            # Spans extrahieren
+            spans = self._predictions_to_spans(predicted_labels, offset_mapping[0], text)
+            predictions.append({'text': text, 'spans': spans})
+        return predictions
+    def evaluate_strict_f1(self, comments_df, spans_df):
+        """Evaluiere Strict F1 auf Test-Daten"""
+        if not hasattr(self, 'model'):
+            raise ValueError("Modell muss erst trainiert werden!")
+        print("Evaluiere Strict F1...")
+        # Vorhersagen für alle Kommentare
+        texts = comments_df['comment'].tolist()
+        predictions = self.predict(texts)
+        # Organisiere True Spans
+        spans_grouped = spans_df.groupby(['document', 'comment_id'])
+        true_spans_dict = {}
+        pred_spans_dict = {}
+        for i, (_, row) in enumerate(comments_df.iterrows()):
+            key = (row['document'], row['comment_id'])
+            # True spans
+            if key in spans_grouped.groups:
+                true_spans = [(span_type, int(start), int(end))
+                              for span_type, start, end in
+                              spans_grouped.get_group(key)[['type', 'start', 'end']].values]
+            else:
+                true_spans = []
+            # Predicted spans
+            pred_spans = [(span['type'], span['start'], span['end'])
+                          for span in predictions[i]['spans']]
+            true_spans_dict[key] = true_spans
+            pred_spans_dict[key] = pred_spans
+        # Berechne Strict F1
+        all_true_spans = list(true_spans_dict.values())
+        all_pred_spans = list(pred_spans_dict.values())
+        f1, precision, recall, tp, fp, fn = self._calculate_strict_f1(all_true_spans, all_pred_spans)
+        print(f"\nStrict F1 Ergebnisse:")
+        print(f"Precision: {precision:.4f}")
+        print(f"Recall:    {recall:.4f}")
+        print(f"F1-Score:  {f1:.4f}")
+        print(f"True Positives: {tp}, False Positives: {fp}, False Negatives: {fn}")
+        return {
+            'strict_f1': f1,
+            'strict_precision': precision,
+            'strict_recall': recall,
+            'true_positives': tp,
+            'false_positives': fp,
+            'false_negatives': fn
+        }
+def convert_spans(row):
+    spans = row['predicted_spans']
+    document = row['document']
+    comment_id = row['comment_id']
+    return [{'document': document, 'comment_id': comment_id, 'type': span['type'], 'start': span['start'], 'end': span['end']} for span in spans]
+def pred_to_spans(row):
+    predicted_labels, offset_mapping, text = row['predicted_labels'], row['offset_mapping'], row['comment']
+    return [classifier._predictions_to_spans(predicted_labels, offset_mapping, text)]
+def create_highlighted_html(text, spans):
+    """Erstelle HTML mit hervorgehobenen Spans"""
+    if not spans:
+        return html.escape(text)
+    # Definiere Farben für verschiedene Span-Typen
+    colors = {
+        'positive feedback': '#FFE5E5',
+        'compliment': '#E5F3FF',
+        'affection declaration': '#FFE5F3',
+        'encouragement': '#E5FFE5',
+        'gratitude': '#FFF5E5',
+        'agreement': '#F0E5FF',
+        'ambiguous': '#E5E5E5',
+        'implicit': '#E5FFFF',
+        'group membership': '#FFFFE5',
+        'sympathy': '#F5E5FF'
+    }
+    colors = {
+        'positive feedback': '#8dd3c7',  # tealfarbenes Pastell
+        'compliment': '#ffffb3',  # helles Pastellgelb
+        'affection declaration': '#bebada',  # fliederfarbenes Pastell
+        'encouragement': '#fb8072',  # lachsfarbenes Pastell
+        'gratitude': '#80b1d3',  # himmelblaues Pastell
+        'agreement': '#fdb462',  # pfirsichfarbenes Pastell
+        'ambiguous': '#d9d9d9',  # neutrales Pastellgrau
+        'implicit': '#fccde5',  # roséfarbenes Pastell
+        'group membership': '#b3de69',  # lindgrünes Pastell
+        'sympathy': '#bc80bd'  # lavendelfarbenes Pastell
+    }
+    # Sortiere Spans nach Start-Position
+    sorted_spans = sorted(spans, key=lambda x: x['start'])
+    html_parts = []
+    last_end = 0
+    for span in sorted_spans:
+        # Text vor dem Span
+        if span['start'] > last_end:
+            html_parts.append(html.escape(text[last_end:span['start']]))
+        # Hervorgehobener Span
+        color = colors.get(span['type'], '#EEEEEE')
+        span_text = html.escape(text[span['start']:span['end']])
+        html_parts.append(
+            f'<span style="background-color: {color}; padding: 2px 4px; border-radius: 3px; margin: 1px; display: inline-block;" title="{span["type"]}">{span_text}</span>')
+        last_end = span['end']
+    # Restlicher Text
+    if last_end < len(text):
+        html_parts.append(html.escape(text[last_end:]))
+    return ''.join(html_parts)
+def create_legend():
+    """Erstelle eine Legende für die Span-Typen"""
+    #colors = {
+    #    'positive feedback': '#FFE5E5',
+    #    'compliment': '#E5F3FF',
+    #    'affection declaration': '#FFE5F3',
+    #    'encouragement': '#E5FFE5',
+    #    'gratitude': '#FFF5E5',
+    #    'agreement': '#F0E5FF',
+    #    'ambiguous': '#E5E5E5',
+    #    'implicit': '#E5FFFF',
+    #    'group membership': '#FFFFE5',
+    #    'sympathy': '#F5E5FF'
+    #}
+    colors = {
+        'positive feedback': '#8dd3c7',  # tealfarbenes Pastell
+        'compliment': '#ffffb3',  # helles Pastellgelb
+        'affection declaration': '#bebada',  # fliederfarbenes Pastell
+        'encouragement': '#fb8072',  # lachsfarbenes Pastell
+        'gratitude': '#80b1d3',  # himmelblaues Pastell
+        'agreement': '#fdb462',  # pfirsichfarbenes Pastell
+        'ambiguous': '#d9d9d9',  # neutrales Pastellgrau
+        'implicit': '#fccde5',  # roséfarbenes Pastell
+        'group membership': '#b3de69',  # lindgrünes Pastell
+        'sympathy': '#bc80bd'  # lavendelfarbenes Pastell
+    }
+    legend_html = "<div style='margin: 10px 0;'><h4>Candy Speech Types:</h4>"
+    for span_type, color in colors.items():
+        legend_html += f'<span style="background-color: {color}; padding: 4px 8px; border-radius: 3px; margin: 2px; display: inline-block;">{span_type}</span>'
+    legend_html += "</div>"
+    return legend_html
+def analyze_text(text):
+    """Analysiere Text und gebe Ergebnisse zurück"""
+    if not text.strip():
+        return "Bitte geben Sie einen Text ein.", "", ""
+    try:
+        # Vorhersage mit dem Classifier
+        predictions = classifier.predict([text])
+        spans = predictions[0]['spans']
+        # Erstelle HTML mit hervorgehobenen Spans
+        highlighted_html = create_highlighted_html(text, spans)
+        # Erstelle Zusammenfassung
+        summary = create_summary(spans)
+        # Erstelle detaillierte Span-Informationen
+        details = create_details(spans, text)
+        return highlighted_html, summary, details
+    except Exception as e:
+        return f"Fehler bei der Analyse: {str(e)}", "", ""
+def create_summary(spans):
+    """Erstelle eine Zusammenfassung der gefundenen Spans"""
+    if not spans:
+        return "Keine Spans gefunden."
+    return ""
+    span_counts = {}
+    for span in spans:
+        span_type = span['type']
+        span_counts[span_type] = span_counts.get(span_type, 0) + 1
+    summary_lines = [f"**Insgesamt {len(spans)} Spans gefunden:**"]
+    for span_type, count in sorted(span_counts.items()):
+        summary_lines.append(f"- {span_type}: {count}")
+    return "\n".join(summary_lines)
+def create_details(spans, text):
+    """Erstelle detaillierte Informationen über die Spans"""
+    if not spans:
+        return "Keine Details verfügbar."
+    details_lines = ["**Span-Informationen:**"]
+    for i, span in enumerate(spans, 1):
+        span_text = text[span['start']:span['end']]
+        details_lines.append(f"{i}. **{span['type']}** ({span['start']}-{span['end']}): \"{span_text}\"")
+    return "\n".join(details_lines)
+def load_example_texts():
+    """Lade Beispieltexte für die Demo"""
+    examples = [
+        "Ich stimme allen zu die denken das Roman und Heiko super sind !!!!",
+        "da geb ich dir recht ich stehe dir bei die sind einfach nur geil !",
+        "OMG, ihr seid einfach der absolute Hammer! 🤩 Eure Videos bringen mich jedes Mal zum Lachen und geben mir so viel Motivation – eure Stimmen klingen mega, eure Parodien sind lustiger als das Original und ihr seht dabei unfassbar toll aus! 😂👌 Bitte macht weiter so! ❤️🎉",
+        "Das ist ein wirklich toller Beitrag! Vielen Dank für diese hilfreichen Informationen.",
+        "Du bist so klug und hilfreich. Ich bin dir sehr dankbar für deine Unterstützung.",
+        "Großartige Arbeit! Das motiviert mich wirklich weiterzumachen.",
+        "Das tut mir leid zu hören. Ich hoffe, es wird bald besser für dich.",
+    ]
+    return examples
+# Erstelle die Gradio-Interface
+def create_gradio_interface():
+    """Erstelle die Gradio-Benutzeroberfläche"""
+    with gr.Blocks(title="Span Classifier Demo", theme=gr.themes.Soft()) as demo:
+        gr.HTML("""
+        <div style="text-align: center; margin: 20px 0;">
+            <h1>🍭 Candy Speech Span Classifier</h1>
+            <p>Analysieren Sie Texte und identifizieren Sie verschiedene Arten positiver Kommunikation.</p>
+        </div>
+        """)
+        # Legende
+        gr.HTML(create_legend())
+        with gr.Row():
+            with gr.Column(scale=2):
+                # Input
+                text_input = gr.Textbox(
+                    label="Text eingeben",
+                    placeholder="Geben Sie hier den Text ein, den Sie analysieren möchten...",
+                    lines=5
+                )
+                # Buttons
+                with gr.Row():
+                    analyze_btn = gr.Button("Analysieren", variant="primary")
+                    clear_btn = gr.Button("Löschen", variant="secondary")
+                # Beispiele
+                gr.Examples(
+                    examples=load_example_texts(),
+                    inputs=text_input,
+                    label="Beispieltexte"
+                )
+                gr.Examples(
+                    examples=[ "Bin wegen dir vegan geworden DANKE🫶 Du bist einzigartig und mach bitte weiter 🤍 🧚‍♀️",
+                        "Danke für deine tolle Arbeit, auch schön, dass du den Permazidbegriff so wunderbar verwendest <3 Das hast du wirklich alles exzellent gemacht!",
+                        "Rafaella Raab ist eine Ikone! Wir sollten alle mehr Tierrechtsaktivismus machen. Höchster Respekt!",
+                    ],
+                    inputs=text_input,
+                    label="Out-of-Distribution Examples (Rafaella Raab)",
+                )
+                gr.Examples(
+                    examples=[
+                        "Tolles Video! Hab es einfach stumm geschaltet und tatsächlich eine gute Zeit gehabt.", #aderserial
+                        "Auf lautlos ballert der Track noch geiler. 🙏🏻",
+                    ],
+                    inputs=text_input,
+                    label="Adversarial Example (Sarcasm)"
+                )
+            with gr.Column(scale=2):
+                # Outputs
+                highlighted_output = gr.HTML(
+                    label="Analysierter Text",
+                    show_label=True
+                )
+                summary_output = gr.Markdown(
+                    label="Zusammenfassung",
+                    show_label=True
+                )
+                details_output = gr.Markdown(
+                    label="Details",
+                    show_label=True
+                )
+        # Info-Bereich
+        with gr.Accordion("ℹ️ Informationen zum Modell", open=False):
+            gr.Markdown("""
+            ### Über dieses Modell
+            Dieses Modell identifiziert verschiedene Arten positiver Kommunikation in Texten:
+            - **Positive Feedback**: Allgemein positive Rückmeldungen
+            - **Compliment**: Direkte Komplimente
+            - **Affection Declaration**: Liebesbekundungen oder Zuneigung
+            - **Encouragement**: Ermutigung und Motivation
+            - **Gratitude**: Dankbarkeit und Wertschätzung
+            - **Agreement**: Zustimmung und Einverständnis
+            - **Ambiguous**: Mehrdeutige positive Aussagen
+            - **Implicit**: Implizite positive Kommunikation
+            - **Group Membership**: Zugehörigkeitsgefühl
+            - **Sympathy**: Mitgefühl und Empathie
+            ### Verwendung
+            1. Geben Sie einen Text in das Eingabefeld ein
+            2. Klicken Sie auf "Analysieren"
+            3. Betrachten Sie die hervorgehobenen Spans im analysierten Text
+            4. Überprüfen Sie die Zusammenfassung und Details
+            """)
+        # Event-Handler
+        analyze_btn.click(
+            fn=analyze_text,
+            inputs=text_input,
+            outputs=[highlighted_output, summary_output, details_output]
+        )
+        clear_btn.click(
+            fn=lambda: ("", "", "", ""),
+            outputs=[text_input, highlighted_output, summary_output, details_output]
+        )
+        # Auto-Analyse bei Beispiel-Auswahl
+        text_input.change(
+            fn=analyze_text,
+            inputs=text_input,
+            outputs=[highlighted_output, summary_output, details_output]
+        )
+    return demo
+if __name__ == "__main__":
+    classifier = SpanClassifierWithStrictF1('xlm-roberta-large')
+    classifier.model = AutoModelForTokenClassification.from_pretrained(
+        'xlm-roberta-large',
+        num_labels=len(classifier.labels),
+        id2label=classifier.id2label,
+        label2id=classifier.label2id
+    )
+    classifier.model.load_state_dict(torch.load('./experiments/exp027/exp027-2_retraining_final_model.pth'))
+    classifier.model.eval()
+    print("Modell geladen! Starte Gradio-Interface...")
+    # Erstelle und starte die Demo
+    demo = create_gradio_interface()
+    # Starte die Demo
+    demo.launch(
+        server_name="0.0.0.0",  # Für externen Zugriff
+        server_port=7860,
+        debug=True,
+        show_error=True
+    )

subtask_1/exp019-4.py ADDED Viewed

	@@ -0,0 +1,224 @@

+#!/usr/bin/env python
+# coding: utf-8
+### Experiment 019-4
+# - Model: Qwen/Qwen3-Embedding-8B
+import os
+from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, make_scorer, classification_report
+from sklearn.model_selection import StratifiedKFold, train_test_split, GridSearchCV
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+import time
+import pickle
+import numpy as np
+import pandas as pd
+import torch
+from torch import Tensor
+from transformers import AutoModel, AutoTokenizer
+from transformers.utils import is_flash_attn_2_available
+import wandb
+from wandb import AlertLevel
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = '1'
+os.environ["WANDB_PROJECT"] = "GermEval2025-Substask1"
+os.environ["WANDB_LOG_MODEL"] = "false"
+if torch.cuda.is_available():
+    device = torch.device('cuda')
+else:
+    device = torch.device('cpu')
+    print("CUDA not available, using CPU")
+experiment_name = "exp019-4"
+testing_mode = False
+# Load data
+comments = pd.read_csv("../../share-GermEval2025-data/Data/training data/comments.csv")
+task1 = pd.read_csv("../../share-GermEval2025-data/Data/training data/task1.csv")
+comments = comments.merge(task1, on=["document", "comment_id"])
+# Remove duplicates
+df = comments.drop_duplicates(subset=['comment', 'flausch'])
+df.reset_index(drop=True, inplace=True)
+# Use only a small subset for testing
+if testing_mode:
+    os.environ["WANDB_MODE"] = "offline"
+    testing_mode_sample_size = 1000
+    df = df.sample(n=testing_mode_sample_size, random_state=42).reset_index(drop=True)
+    print(f"Testing mode: using only {testing_mode_sample_size} samples for quick testing.")
+def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
+    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
+    if left_padding:
+        return last_hidden_states[:, -1]
+    else:
+        sequence_lengths = attention_mask.sum(dim=1) - 1
+        batch_size = last_hidden_states.shape[0]
+        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
+class Qwen3Embedder:
+    def __init__(self, model_name='Qwen/Qwen3-Embedding-8B', instruction=None, max_length=1024):
+        if instruction is None:
+            instruction = 'Classify a given comment as either flausch (a positive, supportive expression) or non-flausch.'
+        self.instruction = instruction
+        if is_flash_attn_2_available():
+            self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16)
+        else:
+            self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16)
+        self.model = self.model.cuda()
+        self.model.eval()
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
+        self.max_length = max_length
+    def get_detailed_instruct(self, query: str) -> str:
+        return f'Instruct: {self.instruction}\nQuery:{query}'
+    def encode_batch(self, texts, batch_size=32):
+        """Encode texts in batches to handle memory efficiently"""
+        all_embeddings = []
+        for i in range(0, len(texts), batch_size):
+            batch_texts = [self.get_detailed_instruct(comment) for comment in texts[i:i + batch_size]]
+            # Tokenize batch
+            inputs = self.tokenizer(
+                batch_texts,
+                padding=True,
+                truncation=True,
+                max_length=self.max_length,
+                return_tensors='pt'
+            ).to(device)
+            # Get embeddings
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                # Mean pooling
+                embeddings = last_token_pool(outputs.last_hidden_state, inputs['attention_mask'])
+                #embeddings = embeddings.float()
+            all_embeddings.append(embeddings.cpu().numpy())
+        # Normalize embeddings (sollte ich?)
+        #import torch.nn.functional as F
+        #output = F.normalize(all_embeddings, p=2, dim=1)
+        return np.vstack(all_embeddings)
+# Initialize embedder
+print("Loading Qwen3 Embeddings v3...")
+embedder = Qwen3Embedder(instruction='Classify a given comment as either flausch (a positive, supportive expression) or non-flausch')
+X, y = df["comment"], df["flausch"].map(dict(yes=1, no=0))
+# load embeddings if they exist
+embeddings_file = f'{"testing_" if testing_mode else ""}Qwen3-Embedding-8B-{experiment_name}.npy'
+if os.path.exists(embeddings_file):
+    print(f"Loading existing embeddings from {embeddings_file}")
+    X_embeddings = np.load(embeddings_file)
+else:
+    print("Embeddings not found, generating new embeddings...")
+    # Encode texts in batches to avoid memory issues
+    X_embeddings = embedder.encode_batch(X.tolist(), batch_size=64)
+    print(f"Generated embeddings with shape: {X_embeddings.shape}")
+    # save embeddings to avoid recomputation
+    np.save(embeddings_file, X_embeddings)
+wandb.init(
+    project=os.environ["WANDB_PROJECT"],
+    dir='./wandb_logs',
+    name=f"{experiment_name}",
+)
+# 5-fold stratified cross-validation
+kf_splits = 5
+pipe = Pipeline([
+    ("scaler", StandardScaler()),
+    ("svm", SVC(random_state=42, cache_size=2000))
+])
+param_grid = [
+    {
+        # Fitting 5 folds for each of 25 candidates, totalling 125 fits
+        'svm__kernel': ['rbf'],
+        'svm__C': [5, 6, 7, 8, 9, 10],
+        'svm__gamma': [0.00008, 0.0001, 0.0002, 1/4096, 0.0003, 0.0004, 0.0005, 0.0006]
+        # wähle diesen Bereich, da wir mit Qwen3-Embedding-8B 4096 Dimensionen haben
+        # und wir bei auto bei 1/4096 also ca. 2.4e-4 landen würden
+    },
+#    {
+#        'kernel': ['poly'],
+#        'C': [0.1, 1, 10, 100],
+#        'degree': [2, 3, 4],
+#        'gamma': ['scale', 'auto', 0.001, 0.01],
+#        'coef0': [0.0, 0.1, 0.5, 1]
+#    }
+]
+f1_pos_scorer = make_scorer(f1_score, pos_label=1, average='binary')
+X_train = X_embeddings
+y_train = y
+# 5‐fach StratifiedCV für die Grid‐Search
+cv_inner = StratifiedKFold(n_splits=kf_splits, shuffle=True, random_state=42)
+grid = GridSearchCV(
+    estimator=pipe,
+    param_grid=param_grid,
+    cv=cv_inner,
+    scoring=f1_pos_scorer,
+    n_jobs=63,
+    verbose=3,
+    return_train_score=True
+)
+grid.fit(X_train, y_train)
+# 6. Ergebnisse ausgeben
+print("Best F1 (pos) auf CV:", grid.best_score_)
+print("Beste Parameter:", grid.best_params_)
+print("Best estimator:", grid.best_estimator_)
+with open(f'scores.{experiment_name}.txt', 'a') as f:
+    f.write(f'[{time.strftime("%Y-%m-%d %H:%M:%S")}] {kf_splits}Fold CV\n')
+    f.write(f'[{experiment_name}] Best F1 (pos) auf CV: {grid.best_score_}\n')
+    f.write(f'[{experiment_name}] Beste Parameter: {grid.best_params_}\n')
+    f.write(f'[{experiment_name}] Best estimator: {grid.best_estimator_}\n')
+results = pd.DataFrame(grid.cv_results_).sort_values("rank_test_score")
+print("grid.cv_results_:")
+print(results)
+results.to_csv(f'grid_cv_results.{experiment_name}.csv', index=False)
+with open(f"grid_cv.{experiment_name}.pkl", "wb") as f:
+    pickle.dump(grid, f)
+print(f"GridSearchCV results saved to grid_cv_results.{experiment_name}.csv")
+print(f"Training completed with {len(X_train)} samples...")
+print("Experiment completed!")
+wandb.alert(
+    title=f'Experiment {experiment_name} finished!',
+    text=f'Best F1 (pos): {grid.best_score_:.4f}\nBest Params: {grid.best_params_}',
+    level=AlertLevel.INFO
+)
+wandb.finish()
+print("Notification sent via Weights & Biases.")

subtask_1/grid_cv_results.exp019-2.csv ADDED Viewed

	@@ -0,0 +1,16 @@

+mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_svm__C,param_svm__gamma,param_svm__kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
+2353.793872785568,376.68300732502587,668.3643176555634,245.2577496652275,10,0.0001,rbf,"{'svm__C': 10, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.905587668593449,0.8904684975767366,0.8992898644286637,0.8973607038123167,0.8943348185868873,0.8974083105996108,0.00506066727524964,1,0.9858907931446792,0.9876582530456246,0.9873357228195938,0.9868829000715478,0.985828025477707,0.9867191389118304,0.0007442000606650537
+2135.3830691814424,220.07048372247345,630.6276105880737,168.76826649669474,100,0.0001,rbf,"{'svm__C': 100, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8957055214723927,0.8829236739974127,0.8949742268041238,0.8936866208701341,0.8826170622193714,0.889981421072687,0.0059239769597075765,2,0.9961636828644501,0.9964048893504833,0.9962481040951545,0.9965663179749261,0.9964031652146111,0.996357231899925,0.0001396196230179917
+2107.9709944725037,201.0339803887574,590.9469698905945,148.89389411692406,100,1e-05,rbf,"{'svm__C': 100, 'svm__gamma': 1e-05, 'svm__kernel': 'rbf'}",0.8903143040410519,0.8850981654328934,0.8919093851132686,0.8930323846908734,0.8893141945773525,0.8899336867710879,0.002735551843186545,3,0.9536455818445195,0.9543955602026863,0.9526727404660162,0.9510804080649048,0.9523425530199178,0.9528273687196089,0.001134859016259991
+2023.2640014648437,363.8169219637371,634.6866162300109,296.19671434247226,1,0.0001,rbf,"{'svm__C': 1, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8889618922470434,0.8828711256117455,0.8905785123966942,0.8863031914893617,0.8907672301690507,0.887896390382779,0.002978669586634141,4,0.9137704918032787,0.9158359209021082,0.9118032786885246,0.9141129361771676,0.9165093177900008,0.914406389072216,0.0016572588242453185
+2190.105693435669,279.15396660484754,409.17747988700864,88.07139447254528,10,1e-05,rbf,"{'svm__C': 10, 'svm__gamma': 1e-05, 'svm__kernel': 'rbf'}",0.8841483426320972,0.8777777777777778,0.8815572418343781,0.8781624500665779,0.8868660598179454,0.8817023744257553,0.0034814075912781846,5,0.8997370151216305,0.9031255113729341,0.9008447469859756,0.9009422367882015,0.9013087496913326,0.9011916519920149,0.0011001857505116694
+1812.7185018062592,487.2391337311374,417.01636271476747,166.52173260741165,100,1e-06,rbf,"{'svm__C': 100, 'svm__gamma': 1e-06, 'svm__kernel': 'rbf'}",0.8814449917898194,0.8751633986928105,0.8784676354029062,0.8762920973657886,0.8861418347430059,0.8795019915988661,0.003951188905961493,6,0.896869093598488,0.8999426370564615,0.8977850697292863,0.8984509466437177,0.8973240016467682,0.8980743497349442,0.0010706699096367878
+1993.3692329406738,456.7097004392084,522.3544264793396,304.12100473275353,100,1e-07,rbf,"{'svm__C': 100, 'svm__gamma': 1e-07, 'svm__kernel': 'rbf'}",0.8652246256239601,0.8579842931937173,0.8614257161892072,0.8590559089387345,0.8706190632165084,0.8628619214324255,0.004606414952929956,7,0.866143034311699,0.8684057971014493,0.8666226477385276,0.8668761369274021,0.8654660137770769,0.866702725971231,0.0009777164172269123
+2247.661126232147,581.4474017109956,708.4357552051545,236.8812027154758,10,1e-06,rbf,"{'svm__C': 10, 'svm__gamma': 1e-06, 'svm__kernel': 'rbf'}",0.8649367930805056,0.8577036310107949,0.8610463178940353,0.8590559089387345,0.8702490170380078,0.8625983335924156,0.004536892956190344,8,0.8661925239827986,0.8684057971014493,0.8667161838738962,0.8669698222405953,0.8654660137770769,0.8667500681951633,0.0009747337581279379
+2407.543846988678,345.9172929856225,600.8789489269257,135.20490614298708,1,1e-05,rbf,"{'svm__C': 1, 'svm__gamma': 1e-05, 'svm__kernel': 'rbf'}",0.8651348651348651,0.8569558101472995,0.8611388611388612,0.8588669125041904,0.8698787282858079,0.8623950354422047,0.004628674161723668,9,0.8663853727144867,0.8689746562862349,0.8666721703954429,0.8674997930977406,0.8655427290092185,0.8670149443006248,0.0011624819670864098
+3116.9194386959075,482.1913324221538,808.8905973434448,337.4328413836179,10,1e-07,rbf,"{'svm__C': 10, 'svm__gamma': 1e-07, 'svm__kernel': 'rbf'}",0.839123102866779,0.8371010638297872,0.835820895522388,0.8388851121685927,0.8458445040214477,0.8393549356817989,0.0034628992476326325,10,0.8390572390572391,0.8410299704516674,0.8408499202149996,0.839344262295082,0.8390630266262218,0.839868883729042,0.0008824914001438402
+3009.1526700019836,742.3166871679517,681.0144076347351,150.1665691333959,100,1e-08,rbf,"{'svm__C': 100, 'svm__gamma': 1e-08, 'svm__kernel': 'rbf'}",0.839123102866779,0.8371010638297872,0.835820895522388,0.8388851121685927,0.8458445040214477,0.8393549356817989,0.0034628992476326325,10,0.8390572390572391,0.8409321175278622,0.8409472623446422,0.839344262295082,0.8390630266262218,0.8398687815702095,0.0008805415253987308
+2988.9541951179503,539.1745917351178,573.9239889621734,154.38397714010605,1,1e-06,rbf,"{'svm__C': 1, 'svm__gamma': 1e-06, 'svm__kernel': 'rbf'}",0.839123102866779,0.8367143332224809,0.836104513064133,0.8391703502210133,0.8454575930271538,0.839313978480312,0.003312397500333861,12,0.8390572390572391,0.8408783783783784,0.8407696832198975,0.8393172454384933,0.8388401888064734,0.8397725469800964,0.0008723990422233897
+4479.428878545761,394.52193518745116,530.3013439178467,238.64268098954136,10,1e-08,rbf,"{'svm__C': 10, 'svm__gamma': 1e-08, 'svm__kernel': 'rbf'}",0.7659099367324154,0.7609828741623231,0.7684839432412248,0.7571860816944024,0.7789240972733972,0.7662973866207526,0.007424619711579136,13,0.7651345291479821,0.7695741119583411,0.7656876456876457,0.7679003161614283,0.7625374251497006,0.7661668056210196,0.002411737416643407
+4651.474856758117,292.18073753504444,416.76064705848694,180.45304034635336,1,1e-07,rbf,"{'svm__C': 1, 'svm__gamma': 1e-07, 'svm__kernel': 'rbf'}",0.7659099367324154,0.7609828741623231,0.7675635276532138,0.7571860816944024,0.7789240972733972,0.7661133035031504,0.007379397530916294,14,0.7651345291479821,0.7694167984373547,0.7653422868867749,0.7678571428571429,0.7626531948732341,0.7660807904404977,0.002344081336836005
+4201.746336603164,491.5078042590254,626.0860621452332,239.76149256091043,1,1e-08,rbf,"{'svm__C': 1, 'svm__gamma': 1e-08, 'svm__kernel': 'rbf'}",0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,0.0,0.0,0.0,0.0,0.0,0.0,0.0

subtask_1/grid_cv_results.exp019-3.csv ADDED Viewed

	@@ -0,0 +1,26 @@

+mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_svm__C,param_svm__gamma,param_svm__kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
+5455.408059644699,398.6684175930872,1819.24121799469,702.8565515706526,5,0.0001,rbf,"{'svm__C': 5, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8922446890197082,0.9103590803409971,0.9039256198347108,0.8975026014568158,0.8993743482794577,0.900681267786338,0.0061183935076840005,1,0.9708551706506455,0.9694563616571684,0.9677872938770292,0.9689401216778738,0.9681381957773513,0.9690354287280136,0.0010826445588126672
+6815.947726678848,669.4113202679961,1307.7390199661254,718.2998951073738,5,0.000244140625,rbf,"{'svm__C': 5, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8961272121056681,0.9077242094349404,0.9043793728945323,0.8957629321549259,0.8962091503267974,0.9000405753833727,0.005023063960942,2,0.9937460114869177,0.9923479148067849,0.9926672192820252,0.9922114402451481,0.9921511071405782,0.9926247385922908,0.0005883589375966587
+5478.7035518169405,651.6782882254886,1624.6494281768798,581.1884804375871,10,0.0001,rbf,"{'svm__C': 10, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.892802450229709,0.9092311648238621,0.9037915914366779,0.8953367875647669,0.8985959438377535,0.8999515875785538,0.005917778255965958,3,0.9869931140015302,0.9857397504456328,0.9857560727457714,0.9850822389391815,0.9851469369541659,0.9857436226172563,0.0006862741243773296
+5552.503013324737,436.61624342735956,1298.7537933349608,304.41444617181907,10,5e-05,rbf,"{'svm__C': 10, 'svm__gamma': 5e-05, 'svm__kernel': 'rbf'}",0.8918435182817693,0.9078233927188226,0.9030428055698814,0.8969286829776159,0.896551724137931,0.899238024737204,0.005575175271795643,4,0.9582905544147844,0.9566334725345326,0.9559974342527261,0.9574892276030613,0.9567956795679567,0.9570412736746123,0.0007845487956189628
+6738.8976334095005,581.7043147683858,2363.1046784877776,1093.7550833856315,10,0.000244140625,rbf,"{'svm__C': 10, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8940754039497307,0.9058854031630801,0.9022556390977443,0.8963367108339828,0.895631702851164,0.8988369719791404,0.004484757587607084,5,0.9951474907419231,0.9944476354585488,0.9947664028593312,0.9948272558911808,0.99457111834962,0.9947519806601207,0.00023984985392036138
+5479.927654600144,524.2887796010415,1723.1177164077758,872.0661863515872,1,0.000244140625,rbf,"{'svm__C': 1, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.892395240558717,0.9070010449320794,0.9032594524119948,0.8916929547844374,0.8938753959873285,0.8976448177349114,0.006265052008456617,6,0.9525471942073959,0.9506923773780251,0.9504745915929489,0.9507538989193037,0.9485536788973015,0.9506043481989949,0.0012670316908732473
+7194.264643144607,434.09262123997644,2276.0355013370513,1194.8076020168799,50,0.000244140625,rbf,"{'svm__C': 50, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8937371663244353,0.9031420410283043,0.8979698073919833,0.8946135831381733,0.89501312335958,0.8968951442484953,0.003432170407915575,7,0.9961039790509038,0.9951456310679612,0.9960398569238631,0.9957185762668541,0.995463548655038,0.9956943183929241,0.0003583628407093519
+4697.035187005997,526.5802792297949,989.723811674118,148.799116364752,5,5e-05,rbf,"{'svm__C': 5, 'svm__gamma': 5e-05, 'svm__kernel': 'rbf'}",0.890495867768595,0.9052686218531015,0.8997668997668997,0.892203035060178,0.8958825072121689,0.8967233863321885,0.005333041625787206,8,0.9384954033406707,0.9362170562714498,0.9359148112294289,0.9368972882014109,0.935051479634786,0.9365152077355493,0.0011540390227375986
+7055.811586093902,962.5495699591098,2110.023814868927,705.5011720606061,100,0.000244140625,rbf,"{'svm__C': 100, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8933436134669751,0.9034267912772586,0.8952033368091762,0.89421573736321,0.8953030700603516,0.8962985097953944,0.003635163114263367,9,0.9962953500255494,0.9954005366040629,0.9964230965763924,0.9957827476038339,0.9955913360168679,0.9958986133653414,0.0003970912539776255
+8686.581301164628,767.646570958569,2040.4449747562408,609.7467567837456,5,0.0005,rbf,"{'svm__C': 5, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8894668400520156,0.9034120734908136,0.9006831318970048,0.893740136770121,0.8925531914893617,0.8959710747398633,0.005225629609478866,10,0.9954676029364826,0.9948285769009768,0.9954664453100057,0.9950820719167146,0.9949543335249409,0.9951598061178242,0.0002633404117486148
+9230.11362876892,405.54152137987444,1368.0186800956726,507.5534238465096,10,0.0005,rbf,"{'svm__C': 10, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8886576482830385,0.9028871391076115,0.8997104501184522,0.8933929981574098,0.8917265230114392,0.8952749517355901,0.005244823031429965,11,0.995977782034093,0.9951462511176395,0.9960398569238631,0.9955907725733274,0.9954641282821185,0.9956437581862083,0.0003319800601900671
+8433.545366239548,764.8027335950326,2101.967122411728,348.04558910621176,1,0.0005,rbf,"{'svm__C': 1, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8870925684485007,0.9047993705743509,0.9014752370916754,0.8885941644562334,0.8912579957356077,0.8946438672612735,0.007139846007850025,12,0.9774985575998462,0.9765354532632389,0.9757940573770492,0.9755628247065615,0.9760348583877996,0.976285150266899,0.0006871071840619478
+5641.2619892120365,301.3642532545948,1245.2856984615325,258.0022220586702,100,1e-05,rbf,"{'svm__C': 100, 'svm__gamma': 1e-05, 'svm__kernel': 'rbf'}",0.8904214559386974,0.9010819165378671,0.8979907264296755,0.8900414937759336,0.8935837245696401,0.8946238634503627,0.004307724355640959,13,0.9508512688724703,0.9488647327458674,0.9488413890493613,0.949270237253263,0.9493206259256874,0.94942965076933,0.0007380354112406489
+8291.015897274017,703.4766614629165,1496.3943771839142,462.8280066865296,50,0.0005,rbf,"{'svm__C': 50, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8880228630813198,0.90154896298241,0.8979161171194935,0.893516078017923,0.8913738019169329,0.8944755646236159,0.0047759662781075955,14,0.9963594558344511,0.9954005366040629,0.9964226395809378,0.9958463799603808,0.9956555072834142,0.9959369038526494,0.00039738087640584543
+6325.810354614257,330.9400455647892,1642.774357700348,688.5875334404985,50,0.0001,rbf,"{'svm__C': 50, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.889171974522293,0.9000258331180574,0.8982655966865131,0.8940874035989718,0.8903966597077244,0.8943894935267119,0.0042437951054770245,15,0.9954028859660324,0.9945745835194996,0.9951493489915751,0.9949543335249409,0.9946995338144198,0.9949561371632936,0.0002994484991425243
+5036.9365752696995,632.7213571409109,1517.7294404506683,426.3296032210096,50,1e-05,rbf,"{'svm__C': 50, 'svm__gamma': 1e-05, 'svm__kernel': 'rbf'}",0.8891170431211499,0.9033428349313294,0.8969258589511754,0.8912591050988553,0.8910994764397906,0.89434886370846,0.005196335158243723,16,0.9354442649434572,0.9340737392651901,0.9350515463917526,0.9344272885845338,0.9342130797593635,0.9346419837888595,0.0005223698139574696
+8931.353061866761,1234.4122853976255,2149.3760446071624,1033.6010444948738,100,0.0005,rbf,"{'svm__C': 100, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8868071818891491,0.9014972419227738,0.8973885518332894,0.8928759894459103,0.8914316125598722,0.8940001155301989,0.00504554389640692,17,0.9964862965565706,0.9954647077610987,0.9964226395809378,0.9958463799603808,0.9956555072834142,0.9959751062284804,0.0004100785745162828
+5712.9752474784855,422.97193138513984,1671.5140540599823,728.145449370879,50,5e-05,rbf,"{'svm__C': 50, 'svm__gamma': 5e-05, 'svm__kernel': 'rbf'}",0.8886069525501142,0.9009240246406571,0.8988937483920761,0.8875160875160876,0.8914285714285715,0.8934738769055013,0.005445093016473903,18,0.9919041244342449,0.9908888180949347,0.9909565660425423,0.9908151549942594,0.990498054970984,0.991012543707393,0.00047265604462407793
+4886.372617149353,497.6637188714639,1340.4910155296325,512.4187982291547,1,0.0001,rbf,"{'svm__C': 1, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8866943866943867,0.9011277209546289,0.8970934799685781,0.8871903004744334,0.8881248346998148,0.8920461445583683,0.005925332557641618,19,0.9178055319427189,0.9155316919853327,0.9151811949069539,0.9175931981687377,0.9152409559879848,0.9162705145983455,0.0011745839476710812
+5400.760613489151,277.7619437511136,1815.8284684658051,495.1351854762197,100,0.0001,rbf,"{'svm__C': 100, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8901910828025478,0.8979064357715172,0.890272373540856,0.8911196911196911,0.8878431372549019,0.8914665440979028,0.0033992479680527554,20,0.9960403627538639,0.9950820719167146,0.9959767545820295,0.9955282994761723,0.9954647077610987,0.9956184392979758,0.0003537959259237909
+5831.041041278839,500.2366199022952,1163.3661043167115,651.9972938828764,100,5e-05,rbf,"{'svm__C': 100, 'svm__gamma': 5e-05, 'svm__kernel': 'rbf'}",0.8865194211728865,0.8980957282552754,0.8943298969072165,0.885523613963039,0.88518614944025,0.8899309619477336,0.00528376021564831,21,0.9947650663942799,0.9939378469784953,0.9945118059987237,0.9943170934167678,0.9939982122334312,0.9943060050043396,0.00031096336881715784
+5106.945345258713,501.77199871724264,1510.7054524421692,525.6496772631232,10,1e-05,rbf,"{'svm__C': 10, 'svm__gamma': 1e-05, 'svm__kernel': 'rbf'}",0.8815584415584415,0.895577074064381,0.8924928066963118,0.8827404479578392,0.8807193864057128,0.8866176313365373,0.006167763694053975,22,0.9048678356451191,0.9032511798636602,0.9037820889672742,0.9035087719298246,0.9037405179178656,0.9038300788647486,0.0005523813046370195
+5257.808340215683,524.2623404859893,1091.6346819400787,557.959192374461,1,5e-05,rbf,"{'svm__C': 1, 'svm__gamma': 5e-05, 'svm__kernel': 'rbf'}",0.8762402088772846,0.89304531085353,0.8867825171142707,0.8729693741677763,0.8740978348035284,0.880627049163278,0.007907186646214762,23,0.8958237118163225,0.8928783578641674,0.893843725335438,0.8947437755236465,0.8950138139718458,0.894460676902284,0.0010127519339155767
+4648.525324726104,577.1027936552093,1347.1555349826813,729.69553292323,5,1e-05,rbf,"{'svm__C': 5, 'svm__gamma': 1e-05, 'svm__kernel': 'rbf'}",0.8768909754825248,0.8896497234658941,0.8837453971593898,0.8710191082802548,0.8731642189586115,0.878893884669335,0.006900406643147761,24,0.8914851485148515,0.8886542142432843,0.8901597318083219,0.8914504017915953,0.8917691953417988,0.8907037383399704,0.001166173136675557
+5639.493494796753,385.0617052361121,1839.803660440445,683.8481740973009,1,1e-05,rbf,"{'svm__C': 1, 'svm__gamma': 1e-05, 'svm__kernel': 'rbf'}",0.8623949579831933,0.87595874107379,0.8709677419354839,0.8563815614175326,0.8553763440860215,0.8642158692992042,0.008078516269848037,25,0.8684158087014281,0.863926576217079,0.8676792465344565,0.8695018547959724,0.8706286771997092,0.8680304326897289,0.0022816415968571016

subtask_1/grid_cv_results.exp019-4.csv ADDED Viewed

	@@ -0,0 +1,49 @@

+mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_svm__C,param_svm__gamma,param_svm__kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
+2962.254550600052,556.4586160586341,952.6687445163727,88.22149099477036,5,0.0002,rbf,"{'svm__C': 5, 'svm__gamma': 0.0002, 'svm__kernel': 'rbf'}",0.8956142600666838,0.9105480868665977,0.9049095607235143,0.8974292391586601,0.8989292243405589,0.901486074231203,0.005501162177555585,1,0.9913298482723447,0.9899337410805301,0.9902541563156889,0.9903680551125853,0.9896012759170654,0.9902974153396429,0.0005813171498605362
+2907.5319063186644,622.9412053502689,893.9788520812988,156.92165447691596,6,0.0001,rbf,"{'svm__C': 6, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8931492842535788,0.9111053852099974,0.903359173126615,0.89893478825669,0.9000520562207184,0.90132013741352,0.005898635485442219,2,0.9759805800434393,0.9748242811501597,0.9740516416958878,0.973975318114969,0.9735463258785942,0.97447562937661,0.0008579179998278246
+2294.846938943863,497.95159046395213,750.493603515625,166.74923962097026,5,0.0001,rbf,"{'svm__C': 5, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8922446890197082,0.9103590803409971,0.9039256198347108,0.8975026014568158,0.8993743482794577,0.900681267786338,0.0061183935076840005,3,0.9708551706506455,0.9694563616571684,0.9677872938770292,0.9689401216778738,0.9681381957773513,0.9690354287280136,0.0010826445588126672
+3643.660668420792,527.2542232689798,969.6882706165313,72.65267607425072,6,0.0002,rbf,"{'svm__C': 6, 'svm__gamma': 0.0002, 'svm__kernel': 'rbf'}",0.8967989756722151,0.9092788834324115,0.9033092037228542,0.8959792477302205,0.8967859942513718,0.9004304609618146,0.005154897910679173,4,0.9929829038019903,0.9916491362274494,0.9919683834778175,0.9918927545483562,0.991261083115392,0.991950852234201,0.0005719487931925857
+2822.824296474457,146.78322313502832,943.8164008617401,103.05704858326389,7,0.0001,rbf,"{'svm__C': 7, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8936061381074168,0.9099330931549151,0.9022440030951767,0.8980544747081712,0.8982035928143712,0.9004082603760102,0.005491122365808926,5,0.9795709908069459,0.9783732057416268,0.9782331975560081,0.9780332056194125,0.9779054916985952,0.9784232182845176,0.0005960103340822639
+3752.820015335083,675.8436439014653,1054.1311081886292,142.1847999664584,5,0.000244140625,rbf,"{'svm__C': 5, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8961272121056681,0.9077242094349404,0.9043793728945323,0.8957629321549259,0.8962091503267974,0.9000405753833727,0.005023063960942,6,0.9937460114869177,0.9923479148067849,0.9926672192820252,0.9922114402451481,0.9921511071405782,0.9926247385922908,0.0005883589375966587
+2866.750690841675,62.310380211696746,1010.1708914756775,72.51314774382548,8,0.0001,rbf,"{'svm__C': 8, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8921819110884006,0.9099794238683128,0.9029927760577915,0.8963730569948186,0.8984903695991671,0.9000035075216981,0.006084435249086387,7,0.9825948358304112,0.9816466989548814,0.9812881873727087,0.9812428225086129,0.9811055789608069,0.9815756247254843,0.0005400302566906774
+2726.576060628891,1076.4445437635895,767.500729751587,324.01429729941805,10,0.0001,rbf,"{'svm__C': 10, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.892802450229709,0.9092311648238621,0.9037915914366779,0.8953367875647669,0.8985959438377535,0.8999515875785538,0.005917778255965958,8,0.9869931140015302,0.9857397504456328,0.9857560727457714,0.9850822389391815,0.9851469369541659,0.9857436226172563,0.0006862741243773296
+2161.447349357605,573.8507236892065,765.411279296875,242.26387746003647,6,8e-05,rbf,"{'svm__C': 6, 'svm__gamma': 8e-05, 'svm__kernel': 'rbf'}",0.8908207619534646,0.9096074380165289,0.9025270758122743,0.8983315954118873,0.8982785602503912,0.8999130862889093,0.0061425816597368895,9,0.9661136378194862,0.9648627853295717,0.9627400768245838,0.9644253573488879,0.9637039887136079,0.9643691692072274,0.0011301674037890955
+2511.2280893802645,195.0702765837904,851.77890791893,159.33818171498493,8,8e-05,rbf,"{'svm__C': 8, 'svm__gamma': 8e-05, 'svm__kernel': 'rbf'}",0.8913876820853565,0.9091377091377091,0.9012122775341759,0.8978955572876072,0.8995837669094693,0.8998433985908635,0.005720466311006615,10,0.973877498882289,0.9732217038409919,0.9724454649827784,0.97250990921877,0.9717940518068436,0.9727697257463346,0.000714987388108625
+3198.1750497817993,566.8190693243176,1058.364576435089,61.431693130672315,6,0.000244140625,rbf,"{'svm__C': 6, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8962506420133539,0.9072058061171592,0.9038611039129308,0.8949557982319293,0.8966780015694481,0.8997902703689643,0.004840437232223766,11,0.9940012763241863,0.9933014354066986,0.9936842105263158,0.9934239928493903,0.9932311621966794,0.9935284154606542,0.00028236608448091314
+3066.5988575458528,147.7978752019348,831.5851910114288,63.516287946945454,9,0.0001,rbf,"{'svm__C': 9, 'svm__gamma': 0.0001, 'svm__kernel': 'rbf'}",0.8921819110884006,0.9088568486096807,0.903975219411461,0.8948186528497409,0.8988822459059007,0.8997429755730367,0.006052009180756879,12,0.9850803366488141,0.9839490445859873,0.9834626637832337,0.9832259710440717,0.9836086485107468,0.9838653329145707,0.0006511612138484461
+2721.2118691921232,273.9275976646404,1010.9925980567932,33.626638488332404,7,8e-05,rbf,"{'svm__C': 7, 'svm__gamma': 8e-05, 'svm__kernel': 'rbf'}",0.891332140117617,0.9085287297088379,0.9025270758122743,0.8983623602807382,0.897342365815529,0.8996185343469992,0.005714445566010723,13,0.9709135076391996,0.968944099378882,0.9676553311173612,0.968633977723723,0.9677336747759283,0.9687761181270188,0.0011798056774958321
+3768.9346249580385,784.9161006658384,1195.0321260929109,90.27548545731533,5,0.0003,rbf,"{'svm__C': 5, 'svm__gamma': 0.0003, 'svm__kernel': 'rbf'}",0.8932338564445588,0.9079563182527302,0.9044155844155845,0.8965876530346445,0.8954128440366973,0.8995212512368429,0.0056589962755815805,14,0.9941923543302061,0.9936200076559908,0.9939378469784953,0.9938705146213765,0.9935492112154308,0.9938339869602999,0.0002312715774820335
+2439.5963623046873,788.1385495478739,745.0145160675049,163.93709476186407,5,8e-05,rbf,"{'svm__C': 5, 'svm__gamma': 8e-05, 'svm__kernel': 'rbf'}",0.8918503331624807,0.9092319627618308,0.9031758326878389,0.895903991651448,0.897288842544317,0.899490192561583,0.006075528952760006,15,0.9595427690726945,0.9579475308641975,0.9568119104151961,0.9581778406897439,0.9577120822622108,0.9580384266608087,0.0008833557807307179
+2662.4520683288574,110.2790439021587,789.4306183815003,35.48304290980892,9,8e-05,rbf,"{'svm__C': 9, 'svm__gamma': 8e-05, 'svm__kernel': 'rbf'}",0.8912155260469867,0.9084362139917695,0.9014447884416925,0.8966770508826584,0.8995314940135346,0.8994610146753285,0.00566097280654277,16,0.9770709586766303,0.9758121130895399,0.975796178343949,0.9754569858110699,0.9752826211917992,0.9758837714225976,0.0006270165226622817
+3648.960864543915,158.20137698659786,1278.5936987876892,172.71756799035126,8,0.0003,rbf,"{'svm__C': 8, 'svm__gamma': 0.0003, 'svm__kernel': 'rbf'}",0.8933436134669751,0.9070554543087738,0.9040312093628089,0.8966414996094767,0.8953030700603516,0.8992749693616773,0.005311306707498723,17,0.9951474907419231,0.9944476354585488,0.9949569103096074,0.9948272558911808,0.99457111834962,0.9947900821501762,0.00025383825787875344
+3668.4558837890627,330.5950233154227,1209.3678759098052,193.34476642369032,6,0.0003,rbf,"{'svm__C': 6, 'svm__gamma': 0.0003, 'svm__kernel': 'rbf'}",0.8930041152263375,0.9070554543087738,0.9038961038961039,0.8970358814352574,0.8953030700603516,0.8992589249853647,0.005329214832900393,18,0.9946380697050938,0.9940654712526322,0.9943185445260134,0.9943805874840358,0.9941229078829692,0.9943051161701488,0.00020364863996692793
+3648.1113197803497,250.88385803011812,1053.2344131469727,110.24198198721432,7,0.0003,rbf,"{'svm__C': 7, 'svm__gamma': 0.0003, 'svm__kernel': 'rbf'}",0.8932887631781949,0.9067708333333333,0.9041309431021044,0.8969286829776159,0.8950682056663168,0.8992374856515131,0.00526873832264323,19,0.9950833280122597,0.9943207198009061,0.9947029165868914,0.9947002107145138,0.9944433799578464,0.9946501110144835,0.000262378229639201
+3336.6828705310822,182.4338639019245,984.4061690330506,65.03067948107055,7,0.0002,rbf,"{'svm__C': 7, 'svm__gamma': 0.0002, 'svm__kernel': 'rbf'}",0.8957212400717397,0.9084798345398138,0.9010362694300518,0.8948871009602907,0.8958660387231816,0.8991980967450155,0.0051245558432841155,20,0.993746809596733,0.992601097078709,0.9931113662456946,0.9925311203319502,0.9926597306440289,0.992930024779423,0.00045615770854314354
+2673.180758523941,1032.395217646984,545.5460843086242,130.47572525732375,10,8e-05,rbf,"{'svm__C': 10, 'svm__gamma': 8e-05, 'svm__kernel': 'rbf'}",0.8913265306122449,0.9084362139917695,0.9013649240278135,0.8966053381705105,0.8982565703877179,0.8991979154380111,0.005649926952866353,21,0.9796361315033514,0.9784328739152629,0.9783065080475857,0.9780332056194125,0.977840219681972,0.978449787753517,0.0006282261620036984
+3687.9748188495637,226.27635714477285,1235.9286698818207,96.43253951529712,7,0.000244140625,rbf,"{'svm__C': 7, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8953309389430477,0.9064039408866995,0.9020725388601036,0.894723160904601,0.8967320261437909,0.8990525211476486,0.004497601815320273,22,0.9943827396910507,0.9938118022328548,0.9941289087428207,0.9939339761190218,0.9937412185464299,0.9939997290664356,0.0002323153723652084
+3475.950292634964,250.68307679317394,1108.8572846889497,173.86681006560963,8,0.000244140625,rbf,"{'svm__C': 8, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8942505133470225,0.9061203319502075,0.9024896265560166,0.8958712022851207,0.8959205020920502,0.8989304352460834,0.004575647064168655,23,0.9947650663942799,0.9941289087428207,0.9943185445260134,0.9943805874840358,0.994187160651549,0.9943560535597398,0.00022334722839619742
+4691.708357810974,715.7987333362521,856.614222574234,169.3726110893645,9,0.0003,rbf,"{'svm__C': 9, 'svm__gamma': 0.0003, 'svm__kernel': 'rbf'}",0.8923156001028013,0.9057291666666667,0.9040312093628089,0.8972691807542262,0.8950682056663168,0.8988826725105641,0.005170335906126429,24,0.9954670241971525,0.9946380697050938,0.9952116452786822,0.9949543335249409,0.9947630604164006,0.9950068266244539,0.00030062988617190713
+3234.2388828754424,1459.5692858954721,924.4178524971009,524.3748061613071,10,0.000244140625,rbf,"{'svm__C': 10, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8940754039497307,0.9058854031630801,0.9022556390977443,0.8963367108339828,0.895631702851164,0.8988369719791404,0.004484757587607084,25,0.9951474907419231,0.9944476354585488,0.9947664028593312,0.9948272558911808,0.99457111834962,0.9947519806601207,0.00023984985392036138
+3163.7975586414336,1065.2188857087929,820.229798078537,589.5385645528229,10,0.0003,rbf,"{'svm__C': 10, 'svm__gamma': 0.0003, 'svm__kernel': 'rbf'}",0.8925449871465295,0.9052083333333333,0.9034608378870674,0.8975559022360895,0.8953579858379229,0.8988256092881886,0.004802234756394428,26,0.9954676029364826,0.9948285769009768,0.9954664453100057,0.995017884517118,0.9948265951331673,0.99512142095955,0.00029061433860554813
+3278.97476811409,170.465214883947,1106.7234085559844,51.65991928293311,8,0.0002,rbf,"{'svm__C': 8, 'svm__gamma': 0.0002, 'svm__kernel': 'rbf'}",0.8955453149001537,0.9073498964803313,0.9007514900233221,0.8937938197870683,0.8958660387231816,0.8986613119828114,0.00492089047840589,27,0.9939378469784953,0.9933014354066986,0.9935574408368948,0.9934239928493903,0.9932311621966794,0.9934903756536316,0.00024980994185843
+3841.6124336719513,530.3719300313371,717.6561746120453,451.3315178843931,9,0.000244140625,rbf,"{'svm__C': 9, 'svm__gamma': 0.000244140625, 'svm__kernel': 'rbf'}",0.8935078265332307,0.9063553826199741,0.9022049286640726,0.8952974798648999,0.8953427524856097,0.8985416740335573,0.004910620976859314,28,0.994956266360212,0.9942572741194488,0.99457527602272,0.9946988567413936,0.9943791517629024,0.9945733650013354,0.0002449479565334618
+3785.3250827789307,157.957270731452,1000.0055857658386,413.19894731125567,9,0.0002,rbf,"{'svm__C': 9, 'svm__gamma': 0.0002, 'svm__kernel': 'rbf'}",0.8950332821300563,0.90641158221303,0.9001297016861219,0.8930980799169694,0.8964976476738108,0.8982340587239976,0.004692626665297457,29,0.9943827396910507,0.9936834045811268,0.9940020418580908,0.9938705146213765,0.9937412185464299,0.993935983859615,0.0002489861331004921
+1625.4079483509063,415.4024195664831,842.2647498607636,533.5642210948797,10,0.0002,rbf,"{'svm__C': 10, 'svm__gamma': 0.0002, 'svm__kernel': 'rbf'}",0.894413121476166,0.9059431524547804,0.9004665629860031,0.893959035519834,0.8962633916906193,0.8982090528254805,0.004498221612137673,30,0.994573890839451,0.9939386205576469,0.9941916129444054,0.9941893876508524,0.9939951450108598,0.9941777314006431,0.0002226129606986996
+4175.494106626511,240.66795475761913,1410.2359414100647,135.26701239594428,6,0.0004,rbf,"{'svm__C': 6, 'svm__gamma': 0.0004, 'svm__kernel': 'rbf'}",0.8919896640826873,0.9042386185243328,0.9044752682543836,0.89527291721076,0.8942917547568711,0.8980536445658069,0.005256372751673358,31,0.9953393347379174,0.9947650663942799,0.9951481103166496,0.9948907906501469,0.9947630604164006,0.9949812725030789,0.00022741679006577428
+4326.916931676865,602.9984138032598,1227.9458584785461,197.47256597026075,5,0.0004,rbf,"{'svm__C': 5, 'svm__gamma': 0.0004, 'svm__kernel': 'rbf'}",0.8917592353397055,0.9055715406748627,0.9037153322867608,0.89527291721076,0.8935270805812418,0.8979692212186661,0.005592488656824322,32,0.9950839558194471,0.9943199948943774,0.9948940515700792,0.9947637292464878,0.9946346448645886,0.9947392752789961,0.0002569443271676517
+4182.006548070907,118.61878371306456,1325.9245444774629,128.4962088893567,7,0.0004,rbf,"{'svm__C': 7, 'svm__gamma': 0.0004, 'svm__kernel': 'rbf'}",0.8915289256198347,0.9040020925974366,0.9027522935779817,0.8955067920585162,0.8942917547568711,0.8976163717221279,0.004893169295618382,33,0.9955305835780871,0.9948285769009768,0.9955300127713921,0.9951456310679612,0.9948907906501469,0.9951851189937129,0.0003012003422766297
+4359.134440898895,349.4379644221477,1247.1464223861694,48.51511167190543,8,0.0004,rbf,"{'svm__C': 8, 'svm__gamma': 0.0004, 'svm__kernel': 'rbf'}",0.8907259106174116,0.9040020925974366,0.9022280471821756,0.8952181865691142,0.8940554821664465,0.8972459438265169,0.005045167553380767,34,0.9956582811901418,0.9948285769009768,0.9955300127713921,0.9952733776188043,0.9952091983391887,0.9952998893641007,0.000287215826893503
+3258.862170982361,983.5605167556715,587.3299376487732,331.17491359144447,10,0.0004,rbf,"{'svm__C': 10, 'svm__gamma': 0.0004, 'svm__kernel': 'rbf'}",0.8907259106174116,0.9040020925974366,0.9021767637031209,0.894929430214323,0.8934707903780069,0.8970609975020599,0.0051364648753549295,35,0.9958500925748579,0.9950833280122597,0.9957849022863712,0.9954647077610987,0.995399948888321,0.9955165959045816,0.0002782906092371906
+4844.620493507386,626.0919379166494,911.782252407074,323.6546317691595,9,0.0004,rbf,"{'svm__C': 9, 'svm__gamma': 0.0004, 'svm__kernel': 'rbf'}",0.8909560723514212,0.9037656903765691,0.9018887722980063,0.8946955840083617,0.8934707903780069,0.896955381882473,0.0049791449468967654,36,0.9957224031156228,0.9950197931298684,0.9956577266922094,0.9952733776188043,0.9952091983391887,0.9953764997791387,0.00027003750858763207
+4893.450245141983,756.5570053534453,1404.103228712082,151.04221061712673,5,0.0005,rbf,"{'svm__C': 5, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8894668400520156,0.9034120734908136,0.9006831318970048,0.893740136770121,0.8925531914893617,0.8959710747398633,0.005225629609478866,37,0.9954676029364826,0.9948285769009768,0.9954664453100057,0.9950820719167146,0.9949543335249409,0.9951598061178242,0.0002633404117486148
+4990.464423799514,170.82569174595943,1480.2943919181823,186.67598755544077,6,0.0005,rbf,"{'svm__C': 6, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8890044190278139,0.9034120734908136,0.9009198423127464,0.8936842105263157,0.8920212765957447,0.8958083643906868,0.005460610492136122,38,0.9957224031156228,0.994956266360212,0.9955300127713921,0.9953369530501437,0.9952091983391887,0.9953509667273119,0.00026309903658882153
+5007.180242681503,231.11542984640803,1395.9259331703186,204.12036576004334,8,0.0005,rbf,"{'svm__C': 8, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8889466840052016,0.9031241795746915,0.9004739336492891,0.8944459068175836,0.8914893617021277,0.8956960131497788,0.005344498234287286,39,0.995977782034093,0.9950833280122597,0.9958485022673564,0.9955277280858676,0.9954641282821185,0.995580293736339,0.0003140589926224988
+4690.542550802231,156.2481940371033,1510.894291639328,161.45358965306164,7,0.0005,rbf,"{'svm__C': 7, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8890044190278139,0.9031241795746915,0.900815574848724,0.8936842105263157,0.8917265230114392,0.8956709813977968,0.005403074479030133,40,0.9957229492499202,0.9950833280122597,0.9956577266922094,0.9954647077610987,0.9953363572478119,0.99545301379266,0.00023029789270078538
+3863.1712747097017,1272.979608552642,731.4842251300812,347.05442064609605,9,0.0005,rbf,"{'svm__C': 9, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8889466840052016,0.9028871391076115,0.9,0.8944459068175836,0.8917265230114392,0.895601250588367,0.005158774221364523,41,0.995977782034093,0.9950833280122597,0.9959126325201175,0.9955271565495207,0.9954641282821185,0.9955930054796219,0.0003258622335451535
+2941.8006259441377,963.9592947713663,792.06405377388,413.3915357813076,10,0.0005,rbf,"{'svm__C': 10, 'svm__gamma': 0.0005, 'svm__kernel': 'rbf'}",0.8886576482830385,0.9028871391076115,0.8997104501184522,0.8933929981574098,0.8917265230114392,0.8952749517355901,0.005244823031429965,42,0.995977782034093,0.9951462511176395,0.9960398569238631,0.9955907725733274,0.9954641282821185,0.9956437581862083,0.0003319800601900671
+5125.378367471695,385.2361899541604,1681.7096691131592,317.80513982062877,5,0.0006,rbf,"{'svm__C': 5, 'svm__gamma': 0.0006, 'svm__kernel': 'rbf'}",0.8873165618448637,0.899736147757256,0.8978835978835978,0.8901273885350318,0.8895442359249329,0.8929215863891364,0.0049333587475242895,43,0.9957224031156228,0.9950197931298684,0.9955300127713921,0.9953357612932081,0.9952721696907744,0.9953760280001731,0.00023790681760390187
+5443.34237332344,248.4197468645117,1615.5747085094451,101.54156492988628,6,0.0006,rbf,"{'svm__C': 6, 'svm__gamma': 0.0006, 'svm__kernel': 'rbf'}",0.8867330886208705,0.9000263782643102,0.8973001588141875,0.8903054448871182,0.8892464467685707,0.8927223034710113,0.005061615312935921,44,0.995977782034093,0.9950833280122597,0.9957849022863712,0.9955265848670757,0.9953993610223643,0.9955543916444327,0.0003095491025041155
+4886.9807908058165,1271.2093796793506,935.9216484069824,248.52890337673645,9,0.0006,rbf,"{'svm__C': 9, 'svm__gamma': 0.0006, 'svm__kernel': 'rbf'}",0.8862683438155137,0.9000263782643102,0.8967161016949152,0.8906001062134891,0.8897827835880934,0.8926787427152643,0.00498173742220226,45,0.9960408684546616,0.9951462511176395,0.9960398569238631,0.995590208985748,0.995463548655038,0.99565614682739,0.000345450331712534
+5317.718171501159,69.30253901488373,1633.7499773979187,123.257398427158,7,0.0006,rbf,"{'svm__C': 7, 'svm__gamma': 0.0006, 'svm__kernel': 'rbf'}",0.8865006553079947,0.9000263782643102,0.8964786867884564,0.8906001062134891,0.8895442359249329,0.8926300124998366,0.004913859511392688,46,0.9960413740263057,0.99514687100894,0.9959126325201175,0.9955265848670757,0.995463548655038,0.9956182022154954,0.00032256104982186627
+5630.75842347145,173.39260110695608,1854.6558534622193,343.7979980330878,8,0.0006,rbf,"{'svm__C': 8, 'svm__gamma': 0.0006, 'svm__kernel': 'rbf'}",0.8862683438155137,0.9000263782643102,0.8964786867884564,0.8903054448871182,0.8895442359249329,0.8925246179360663,0.004997188016010959,47,0.9959772683736671,0.9950827000447027,0.9960398569238631,0.995590208985748,0.995463548655038,0.9956307165966038,0.00035139780020874577
+3351.3005242347717,605.7478038761916,610.0130533695221,370.216124260849,10,0.0006,rbf,"{'svm__C': 10, 'svm__gamma': 0.0006, 'svm__kernel': 'rbf'}",0.8862683438155137,0.9000263782643102,0.8964238410596026,0.8897156524049961,0.8897827835880934,0.892443399826503,0.005019885972826287,48,0.9960408684546616,0.9951462511176395,0.9960398569238631,0.9957180290151467,0.995463548655038,0.9956817108332698,0.00034435263106865976

subtask_1/submission_subtask1-2.ipynb ADDED Viewed

	@@ -0,0 +1,608 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d10bfa50537af75f",
+   "metadata": {},
+   "source": [
+    "## Experiment exp027-2\n",
+    "xlm-roberta-large, Batch Size: 32, Learning Rate: 2e-5, Warmup Steps: 500"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "9748a35a024779ae",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:06:52.194727Z",
+     "start_time": "2025-06-27T22:06:52.191088Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from transformers import (\n",
+    "    AutoTokenizer,\n",
+    "    BertForTokenClassification,\n",
+    "    AutoModelForTokenClassification\n",
+    ")\n",
+    "import torch\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = '1'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "4ae3d9e4c556a288",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:07:26.334867Z",
+     "start_time": "2025-06-27T22:07:26.325629Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "test_comments_spans = pd.read_csv(\"./submissions/task2-predicted.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "156c9b1c48a954b4",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:07:30.302897Z",
+     "start_time": "2025-06-27T22:07:30.290021Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>document</th>\n",
+       "      <th>comment_id</th>\n",
+       "      <th>type</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>2</td>\n",
+       "      <td>compliment</td>\n",
+       "      <td>0</td>\n",
+       "      <td>21</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>4</td>\n",
+       "      <td>affection declaration</td>\n",
+       "      <td>0</td>\n",
+       "      <td>19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>5</td>\n",
+       "      <td>affection declaration</td>\n",
+       "      <td>0</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>5</td>\n",
+       "      <td>affection declaration</td>\n",
+       "      <td>26</td>\n",
+       "      <td>56</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>5</td>\n",
+       "      <td>positive feedback</td>\n",
+       "      <td>57</td>\n",
+       "      <td>71</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5498</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>affection declaration</td>\n",
+       "      <td>0</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5499</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>positive feedback</td>\n",
+       "      <td>30</td>\n",
+       "      <td>59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5500</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>positive feedback</td>\n",
+       "      <td>64</td>\n",
+       "      <td>104</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5501</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>affection declaration</td>\n",
+       "      <td>105</td>\n",
+       "      <td>106</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5502</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>affection declaration</td>\n",
+       "      <td>105</td>\n",
+       "      <td>114</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5503 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     document  comment_id                   type  start  end\n",
+       "0     NDY-004           2             compliment      0   21\n",
+       "1     NDY-004           4  affection declaration      0   19\n",
+       "2     NDY-004           5  affection declaration      0   25\n",
+       "3     NDY-004           5  affection declaration     26   56\n",
+       "4     NDY-004           5      positive feedback     57   71\n",
+       "...       ...         ...                    ...    ...  ...\n",
+       "5498  NDY-203         526  affection declaration      0   17\n",
+       "5499  NDY-203         526      positive feedback     30   59\n",
+       "5500  NDY-203         526      positive feedback     64  104\n",
+       "5501  NDY-203         526  affection declaration    105  106\n",
+       "5502  NDY-203         526  affection declaration    105  114\n",
+       "\n",
+       "[5503 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_comments_spans"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "2b63b3b12b9648f6",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:07:50.819958Z",
+     "start_time": "2025-06-27T22:07:50.699928Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>document</th>\n",
+       "      <th>comment_id</th>\n",
+       "      <th>comment</th>\n",
+       "      <th>predicted_labels</th>\n",
+       "      <th>predicted_probs</th>\n",
+       "      <th>offset_mapping</th>\n",
+       "      <th>text_tokens</th>\n",
+       "      <th>predicted_spans</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Lol i love lochis</td>\n",
+       "      <td>[0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
+       "      <td>[[0.99999654, 1.7456429e-07, 1.6115715e-07, 1....</td>\n",
+       "      <td>[[0, 0], [0, 1], [1, 3], [4, 5], [6, 10], [11,...</td>\n",
+       "      <td>[▁L, ol, ▁i, ▁love, ▁loc, his]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>2</td>\n",
+       "      <td>ihr singt voll gut :)</td>\n",
+       "      <td>[0, 2, 12, 12, 12, 12, 12, 0]</td>\n",
+       "      <td>[[0.9999976, 1.1218729e-07, 1.239344e-07, 1.50...</td>\n",
+       "      <td>[[0, 0], [0, 3], [4, 8], [8, 9], [10, 14], [15...</td>\n",
+       "      <td>[▁ihr, ▁sing, t, ▁voll, ▁gut, ▁:)]</td>\n",
+       "      <td>[{'type': 'compliment', 'start': 0, 'end': 21,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Junge fick dich</td>\n",
+       "      <td>[0, 0, 0, 0, 0, 0]</td>\n",
+       "      <td>[[0.9999981, 5.8623616e-08, 1.05891374e-07, 1....</td>\n",
+       "      <td>[[0, 0], [0, 4], [4, 5], [6, 10], [11, 15], [0...</td>\n",
+       "      <td>[▁Jung, e, ▁fick, ▁dich]</td>\n",
+       "      <td>[]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Ihr seit die besten</td>\n",
+       "      <td>[0, 3, 13, 13, 13, 0]</td>\n",
+       "      <td>[[0.99999774, 1.6417343e-07, 1.384722e-07, 1.1...</td>\n",
+       "      <td>[[0, 0], [0, 3], [4, 8], [9, 12], [13, 19], [0...</td>\n",
+       "      <td>[▁Ihr, ▁seit, ▁die, ▁besten]</td>\n",
+       "      <td>[{'type': 'affection declaration', 'start': 0,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>5</td>\n",
+       "      <td>ihr seit die ALLER besten ich finde euch soooo...</td>\n",
+       "      <td>[0, 3, 13, 13, 13, 13, 13, 3, 13, 13, 13, 13, ...</td>\n",
+       "      <td>[[0.99999785, 1.2960982e-07, 1.4320104e-07, 1....</td>\n",
+       "      <td>[[0, 0], [0, 3], [4, 8], [9, 12], [13, 17], [1...</td>\n",
+       "      <td>[▁ihr, ▁seit, ▁die, ▁ALLE, R, ▁besten, ▁ich, ▁...</td>\n",
+       "      <td>[{'type': 'affection declaration', 'start': 0,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9224</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>522</td>\n",
+       "      <td>hihi kannst du mich grüßen 💕 👋 😍 Achso wusstes...</td>\n",
+       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 11, 0, 11, 11, ...</td>\n",
+       "      <td>[[0.99999774, 1.8107521e-07, 1.0220851e-07, 9....</td>\n",
+       "      <td>[[0, 0], [0, 4], [5, 11], [12, 14], [15, 19], ...</td>\n",
+       "      <td>[▁hihi, ▁kannst, ▁du, ▁mich, ▁gr, üß, en, ▁, 💕...</td>\n",
+       "      <td>[{'type': 'positive feedback', 'start': 27, 'e...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9225</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>523</td>\n",
+       "      <td>#Glocke aktiviert 👑 Ich liebe deine Videos 💍 💎...</td>\n",
+       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 11, 11, 11, 11,...</td>\n",
+       "      <td>[[0.9999976, 1.1908668e-07, 8.492378e-08, 6.60...</td>\n",
+       "      <td>[[0, 0], [0, 1], [1, 2], [2, 6], [6, 7], [8, 1...</td>\n",
+       "      <td>[▁#, G, lock, e, ▁aktiv, iert, ▁, 👑, ▁Ich, ▁li...</td>\n",
+       "      <td>[{'type': 'positive feedback', 'start': 20, 'e...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9226</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>524</td>\n",
+       "      <td>Bist die beste ❤ Bitte Grüße mich 💕 ❤ 😘 😍</td>\n",
+       "      <td>[0, 3, 13, 13, 13, 13, 0, 0, 0, 1, 1, 11, 11, ...</td>\n",
+       "      <td>[[0.9999974, 2.1362885e-07, 1.2580301e-07, 9.5...</td>\n",
+       "      <td>[[0, 0], [0, 3], [3, 4], [5, 8], [9, 14], [15,...</td>\n",
+       "      <td>[▁Bis, t, ▁die, ▁beste, ▁❤, ▁Bitte, ▁Grüße, ▁m...</td>\n",
+       "      <td>[{'type': 'affection declaration', 'start': 0,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9227</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>525</td>\n",
+       "      <td>Hi Bonny ❤️ War letztens auf'm Flughafen , und...</td>\n",
+       "      <td>[0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
+       "      <td>[[0.99999523, 6.63842e-07, 2.0147786e-07, 1.16...</td>\n",
+       "      <td>[[0, 0], [0, 2], [3, 6], [6, 8], [9, 10], [10,...</td>\n",
+       "      <td>[▁Hi, ▁Bon, ny, ▁❤, ️, ▁War, ▁letzten, s, ▁auf...</td>\n",
+       "      <td>[{'type': 'positive feedback', 'start': 9, 'en...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9228</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>du bist die beste ich bin neu ich hab dich sof...</td>\n",
+       "      <td>[0, 3, 13, 13, 13, 0, 0, 0, 1, 11, 11, 11, 11,...</td>\n",
+       "      <td>[[0.999997, 3.4811254e-07, 7.750037e-08, 7.272...</td>\n",
+       "      <td>[[0, 0], [0, 2], [3, 7], [8, 11], [12, 17], [1...</td>\n",
+       "      <td>[▁du, ▁bist, ▁die, ▁beste, ▁ich, ▁bin, ▁neu, ▁...</td>\n",
+       "      <td>[{'type': 'affection declaration', 'start': 0,...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>9229 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     document  comment_id                                            comment  \\\n",
+       "0     NDY-004           1                                  Lol i love lochis   \n",
+       "1     NDY-004           2                              ihr singt voll gut :)   \n",
+       "2     NDY-004           3                                    Junge fick dich   \n",
+       "3     NDY-004           4                                Ihr seit die besten   \n",
+       "4     NDY-004           5  ihr seit die ALLER besten ich finde euch soooo...   \n",
+       "...       ...         ...                                                ...   \n",
+       "9224  NDY-203         522  hihi kannst du mich grüßen 💕 👋 😍 Achso wusstes...   \n",
+       "9225  NDY-203         523  #Glocke aktiviert 👑 Ich liebe deine Videos 💍 💎...   \n",
+       "9226  NDY-203         524          Bist die beste ❤ Bitte Grüße mich 💕 ❤ 😘 😍   \n",
+       "9227  NDY-203         525  Hi Bonny ❤️ War letztens auf'm Flughafen , und...   \n",
+       "9228  NDY-203         526  du bist die beste ich bin neu ich hab dich sof...   \n",
+       "\n",
+       "                                       predicted_labels  \\\n",
+       "0                              [0, 0, 0, 0, 0, 0, 0, 0]   \n",
+       "1                         [0, 2, 12, 12, 12, 12, 12, 0]   \n",
+       "2                                    [0, 0, 0, 0, 0, 0]   \n",
+       "3                                 [0, 3, 13, 13, 13, 0]   \n",
+       "4     [0, 3, 13, 13, 13, 13, 13, 3, 13, 13, 13, 13, ...   \n",
+       "...                                                 ...   \n",
+       "9224  [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 11, 0, 11, 11, ...   \n",
+       "9225  [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 11, 11, 11, 11,...   \n",
+       "9226  [0, 3, 13, 13, 13, 13, 0, 0, 0, 1, 1, 11, 11, ...   \n",
+       "9227  [0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0,...   \n",
+       "9228  [0, 3, 13, 13, 13, 0, 0, 0, 1, 11, 11, 11, 11,...   \n",
+       "\n",
+       "                                        predicted_probs  \\\n",
+       "0     [[0.99999654, 1.7456429e-07, 1.6115715e-07, 1....   \n",
+       "1     [[0.9999976, 1.1218729e-07, 1.239344e-07, 1.50...   \n",
+       "2     [[0.9999981, 5.8623616e-08, 1.05891374e-07, 1....   \n",
+       "3     [[0.99999774, 1.6417343e-07, 1.384722e-07, 1.1...   \n",
+       "4     [[0.99999785, 1.2960982e-07, 1.4320104e-07, 1....   \n",
+       "...                                                 ...   \n",
+       "9224  [[0.99999774, 1.8107521e-07, 1.0220851e-07, 9....   \n",
+       "9225  [[0.9999976, 1.1908668e-07, 8.492378e-08, 6.60...   \n",
+       "9226  [[0.9999974, 2.1362885e-07, 1.2580301e-07, 9.5...   \n",
+       "9227  [[0.99999523, 6.63842e-07, 2.0147786e-07, 1.16...   \n",
+       "9228  [[0.999997, 3.4811254e-07, 7.750037e-08, 7.272...   \n",
+       "\n",
+       "                                         offset_mapping  \\\n",
+       "0     [[0, 0], [0, 1], [1, 3], [4, 5], [6, 10], [11,...   \n",
+       "1     [[0, 0], [0, 3], [4, 8], [8, 9], [10, 14], [15...   \n",
+       "2     [[0, 0], [0, 4], [4, 5], [6, 10], [11, 15], [0...   \n",
+       "3     [[0, 0], [0, 3], [4, 8], [9, 12], [13, 19], [0...   \n",
+       "4     [[0, 0], [0, 3], [4, 8], [9, 12], [13, 17], [1...   \n",
+       "...                                                 ...   \n",
+       "9224  [[0, 0], [0, 4], [5, 11], [12, 14], [15, 19], ...   \n",
+       "9225  [[0, 0], [0, 1], [1, 2], [2, 6], [6, 7], [8, 1...   \n",
+       "9226  [[0, 0], [0, 3], [3, 4], [5, 8], [9, 14], [15,...   \n",
+       "9227  [[0, 0], [0, 2], [3, 6], [6, 8], [9, 10], [10,...   \n",
+       "9228  [[0, 0], [0, 2], [3, 7], [8, 11], [12, 17], [1...   \n",
+       "\n",
+       "                                            text_tokens  \\\n",
+       "0                        [▁L, ol, ▁i, ▁love, ▁loc, his]   \n",
+       "1                    [▁ihr, ▁sing, t, ▁voll, ▁gut, ▁:)]   \n",
+       "2                              [▁Jung, e, ▁fick, ▁dich]   \n",
+       "3                          [▁Ihr, ▁seit, ▁die, ▁besten]   \n",
+       "4     [▁ihr, ▁seit, ▁die, ▁ALLE, R, ▁besten, ▁ich, ▁...   \n",
+       "...                                                 ...   \n",
+       "9224  [▁hihi, ▁kannst, ▁du, ▁mich, ▁gr, üß, en, ▁, 💕...   \n",
+       "9225  [▁#, G, lock, e, ▁aktiv, iert, ▁, 👑, ▁Ich, ▁li...   \n",
+       "9226  [▁Bis, t, ▁die, ▁beste, ▁❤, ▁Bitte, ▁Grüße, ▁m...   \n",
+       "9227  [▁Hi, ▁Bon, ny, ▁❤, ️, ▁War, ▁letzten, s, ▁auf...   \n",
+       "9228  [▁du, ▁bist, ▁die, ▁beste, ▁ich, ▁bin, ▁neu, ▁...   \n",
+       "\n",
+       "                                        predicted_spans  \n",
+       "0                                                    []  \n",
+       "1     [{'type': 'compliment', 'start': 0, 'end': 21,...  \n",
+       "2                                                    []  \n",
+       "3     [{'type': 'affection declaration', 'start': 0,...  \n",
+       "4     [{'type': 'affection declaration', 'start': 0,...  \n",
+       "...                                                 ...  \n",
+       "9224  [{'type': 'positive feedback', 'start': 27, 'e...  \n",
+       "9225  [{'type': 'positive feedback', 'start': 20, 'e...  \n",
+       "9226  [{'type': 'affection declaration', 'start': 0,...  \n",
+       "9227  [{'type': 'positive feedback', 'start': 9, 'en...  \n",
+       "9228  [{'type': 'affection declaration', 'start': 0,...  \n",
+       "\n",
+       "[9229 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_comments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "263a51fec4f4672",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:09:58.052637Z",
+     "start_time": "2025-06-27T22:09:57.997729Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "test_comments['has_spans'] = test_comments.apply(lambda x: len(x['predicted_spans']) > 0, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "5fa67bbeb303ca3a",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:10:35.264094Z",
+     "start_time": "2025-06-27T22:10:35.260301Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "test_comments['flausch'] = test_comments['has_spans'].map({True: 'yes', False: 'no'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "fd7679e665286b70",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:11:57.164479Z",
+     "start_time": "2025-06-27T22:11:57.150708Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "test_comments[[\"document\",\"comment_id\",\"flausch\"]].to_csv(f'./submissions/task1-predicted.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "id": "bd9d8b153b8d27ed",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:12:25.303426Z",
+     "start_time": "2025-06-27T22:12:24.850361Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cp './submissions/task1-predicted.csv' './submissions/subtask1_submission2.csv'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "id": "5a2738b19dcd4292",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-27T22:12:43.388207Z",
+     "start_time": "2025-06-27T22:12:42.945847Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "document,comment_id,flausch\r\n",
+      "NDY-004,1,no\r\n",
+      "NDY-004,2,yes\r\n",
+      "NDY-004,3,no\r\n",
+      "NDY-004,4,yes\r\n",
+      "NDY-004,5,yes\r\n",
+      "NDY-004,6,yes\r\n",
+      "NDY-004,7,no\r\n",
+      "NDY-004,8,yes\r\n",
+      "NDY-004,9,no\r\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!head -n 10 './submissions/task1-predicted.csv'"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

subtask_1/submission_subtask1.ipynb ADDED Viewed

	@@ -0,0 +1,719 @@

+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Experiment 019-4\n",
+    "\n",
+    "SVM mit RBF Kernel, C=5 und Gamma=0.0002"
+   ],
+   "id": "8d9679176b5367c7"
+  },
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2025-06-23T18:30:56.081332Z",
+     "start_time": "2025-06-23T18:30:55.935044Z"
+    }
+   },
+   "source": [
+    "import os\n",
+    "from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, make_scorer, classification_report\n",
+    "from sklearn.model_selection import StratifiedKFold, train_test_split, GridSearchCV\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.svm import SVC\n",
+    "import time\n",
+    "import pickle\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import torch\n",
+    "from torch import Tensor\n",
+    "from transformers import AutoModel, AutoTokenizer\n",
+    "from transformers.utils import is_flash_attn_2_available\n",
+    "import wandb\n",
+    "from wandb import AlertLevel\n",
+    "\n",
+    "\n",
+    "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'\n",
+    "os.environ[\"CUDA_DEVICE_ORDER\"] = \"PCI_BUS_ID\"\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = '1'\n",
+    "os.environ[\"WANDB_PROJECT\"] = \"GermEval2025-Substask1\"\n",
+    "os.environ[\"WANDB_LOG_MODEL\"] = \"false\"\n",
+    "\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device('cuda')\n",
+    "else:\n",
+    "    device = torch.device('cpu')\n",
+    "    print(\"CUDA not available, using CPU\")\n",
+    "\n",
+    "experiment_name = \"exp019-4\"\n",
+    "\n",
+    "testing_mode = False\n",
+    "\n",
+    "# Load data\n",
+    "comments = pd.read_csv(\"./share-GermEval2025-data/Data/training data/comments.csv\")\n",
+    "task1 = pd.read_csv(\"./share-GermEval2025-data/Data/training data/task1.csv\")\n",
+    "comments = comments.merge(task1, on=[\"document\", \"comment_id\"])\n",
+    "\n",
+    "# Remove duplicates\n",
+    "df = comments.drop_duplicates(subset=['comment', 'flausch'])\n",
+    "df.reset_index(drop=True, inplace=True)"
+   ],
+   "outputs": [],
+   "execution_count": 2
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "def last_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:\n",
+    "    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])\n",
+    "    if left_padding:\n",
+    "        return last_hidden_states[:, -1]\n",
+    "    else:\n",
+    "        sequence_lengths = attention_mask.sum(dim=1) - 1\n",
+    "        batch_size = last_hidden_states.shape[0]\n",
+    "        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]\n",
+    "\n",
+    "class Qwen3Embedder:\n",
+    "    def __init__(self, model_name='Qwen/Qwen3-Embedding-8B', instruction=None, max_length=1024):\n",
+    "        if instruction is None:\n",
+    "            instruction = 'Classify a given comment as either flausch (a positive, supportive expression) or non-flausch.'\n",
+    "        self.instruction = instruction\n",
+    "\n",
+    "        if is_flash_attn_2_available():\n",
+    "            self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation=\"flash_attention_2\", torch_dtype=torch.float16)\n",
+    "        else:\n",
+    "            self.model = AutoModel.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16)\n",
+    "\n",
+    "        self.model = self.model.cuda()\n",
+    "        self.model.eval()\n",
+    "\n",
+    "        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')\n",
+    "        self.max_length = max_length\n",
+    "\n",
+    "    def get_detailed_instruct(self, query: str) -> str:\n",
+    "        return f'Instruct: {self.instruction}\\nQuery:{query}'\n",
+    "\n",
+    "    def encode_batch(self, texts, batch_size=32):\n",
+    "        \"\"\"Encode texts in batches to handle memory efficiently\"\"\"\n",
+    "        all_embeddings = []\n",
+    "\n",
+    "        for i in range(0, len(texts), batch_size):\n",
+    "            batch_texts = [self.get_detailed_instruct(comment) for comment in texts[i:i + batch_size]]\n",
+    "\n",
+    "            # Tokenize batch\n",
+    "            inputs = self.tokenizer(\n",
+    "                batch_texts,\n",
+    "                padding=True,\n",
+    "                truncation=True,\n",
+    "                max_length=self.max_length,\n",
+    "                return_tensors='pt'\n",
+    "            ).to(device)\n",
+    "\n",
+    "            # Get embeddings\n",
+    "            with torch.no_grad():\n",
+    "                outputs = self.model(**inputs)\n",
+    "                # Mean pooling\n",
+    "                embeddings = last_token_pool(outputs.last_hidden_state, inputs['attention_mask'])\n",
+    "                #embeddings = embeddings.float()\n",
+    "\n",
+    "            all_embeddings.append(embeddings.cpu().numpy())\n",
+    "\n",
+    "        # Normalize embeddings (sollte ich?)\n",
+    "        #import torch.nn.functional as F\n",
+    "        #output = F.normalize(all_embeddings, p=2, dim=1)\n",
+    "        return np.vstack(all_embeddings)\n",
+    "\n",
+    "# Initialize embedder\n",
+    "print(\"Loading Qwen3 Embeddings v3...\")\n",
+    "embedder = Qwen3Embedder(instruction='Classify a given comment as either flausch (a positive, supportive expression) or non-flausch')\n",
+    "\n",
+    "X, y = df[\"comment\"], df[\"flausch\"].map(dict(yes=1, no=0))\n",
+    "\n",
+    "# load embeddings if they exist\n",
+    "embeddings_file = f'Qwen3-Embedding-8B-{experiment_name}.npy'\n",
+    "if os.path.exists(embeddings_file):\n",
+    "    print(f\"Loading existing embeddings from {embeddings_file}\")\n",
+    "    X_embeddings = np.load(embeddings_file)\n",
+    "else:\n",
+    "    print(\"Embeddings not found, generating new embeddings...\")\n",
+    "    # Encode texts in batches to avoid memory issues\n",
+    "    X_embeddings = embedder.encode_batch(X.tolist(), batch_size=64)\n",
+    "    print(f\"Generated embeddings with shape: {X_embeddings.shape}\")\n",
+    "\n",
+    "    # save embeddings to avoid recomputation\n",
+    "    np.save(embeddings_file, X_embeddings)\n",
+    "\n",
+    "pipe = Pipeline([\n",
+    "    (\"scaler\", StandardScaler()),\n",
+    "    (\"svm\", SVC(random_state=42, C=5, gamma=0.0002, cache_size=2000))\n",
+    "])\n",
+    "\n",
+    "f1_pos_scorer = make_scorer(f1_score, pos_label=1, average='binary')\n",
+    "\n",
+    "X_train = X_embeddings\n",
+    "y_train = y\n",
+    "\n",
+    "pipe.fit(X_train, y_train)"
+   ],
+   "id": "59ef5a54cb69530f",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-23T18:30:59.602524Z",
+     "start_time": "2025-06-23T18:30:59.570290Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "test_data: pd.DataFrame = pd.read_csv(\"./share-GermEval2025-data/Data/test data/comments.csv\")\n",
+    "test_data"
+   ],
+   "id": "a842bfa29d59c84b",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "     document  comment_id                                            comment\n",
+       "0     NDY-004           1                                  Lol i love lochis\n",
+       "1     NDY-004           2                              ihr singt voll gut :)\n",
+       "2     NDY-004           3                                    Junge fick dich\n",
+       "3     NDY-004           4                                Ihr seit die besten\n",
+       "4     NDY-004           5  ihr seit die ALLER besten ich finde euch soooo...\n",
+       "...       ...         ...                                                ...\n",
+       "9224  NDY-203         522  hihi kannst du mich grüßen 💕 👋 😍 Achso wusstes...\n",
+       "9225  NDY-203         523  #Glocke aktiviert 👑 Ich liebe deine Videos 💍 💎...\n",
+       "9226  NDY-203         524          Bist die beste ❤ Bitte Grüße mich 💕 ❤ 😘 😍\n",
+       "9227  NDY-203         525  Hi Bonny ❤️ War letztens auf'm Flughafen , und...\n",
+       "9228  NDY-203         526  du bist die beste ich bin neu ich hab dich sof...\n",
+       "\n",
+       "[9229 rows x 3 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>document</th>\n",
+       "      <th>comment_id</th>\n",
+       "      <th>comment</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Lol i love lochis</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>2</td>\n",
+       "      <td>ihr singt voll gut :)</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Junge fick dich</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Ihr seit die besten</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>5</td>\n",
+       "      <td>ihr seit die ALLER besten ich finde euch soooo...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9224</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>522</td>\n",
+       "      <td>hihi kannst du mich grüßen 💕 👋 😍 Achso wusstes...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9225</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>523</td>\n",
+       "      <td>#Glocke aktiviert 👑 Ich liebe deine Videos 💍 💎...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9226</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>524</td>\n",
+       "      <td>Bist die beste ❤ Bitte Grüße mich 💕 ❤ 😘 😍</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9227</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>525</td>\n",
+       "      <td>Hi Bonny ❤️ War letztens auf'm Flughafen , und...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9228</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>du bist die beste ich bin neu ich hab dich sof...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>9229 rows × 3 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 3
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-23T19:22:07.211246Z",
+     "start_time": "2025-06-23T19:17:34.390901Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "X_test_data = embedder.encode_batch(test_data['comment'].tolist(), batch_size=64)",
+   "id": "b2f18769fe09b609",
+   "outputs": [],
+   "execution_count": 6
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-23T19:25:42.858436Z",
+     "start_time": "2025-06-23T19:22:07.287233Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "y_prediction = pipe.predict(X_test_data)",
+   "id": "3a7abacf1694b415",
+   "outputs": [],
+   "execution_count": 7
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-23T19:31:30.676051Z",
+     "start_time": "2025-06-23T19:31:30.667660Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "test_data['flausch'] = y_prediction\n",
+    "test_data['flausch'] = test_data['flausch'].map({1: 'yes', 0: 'no'})\n",
+    "test_data"
+   ],
+   "id": "d342aed9b9070ad4",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "     document  comment_id                                            comment  \\\n",
+       "0     NDY-004           1                                  Lol i love lochis   \n",
+       "1     NDY-004           2                              ihr singt voll gut :)   \n",
+       "2     NDY-004           3                                    Junge fick dich   \n",
+       "3     NDY-004           4                                Ihr seit die besten   \n",
+       "4     NDY-004           5  ihr seit die ALLER besten ich finde euch soooo...   \n",
+       "...       ...         ...                                                ...   \n",
+       "9224  NDY-203         522  hihi kannst du mich grüßen 💕 👋 😍 Achso wusstes...   \n",
+       "9225  NDY-203         523  #Glocke aktiviert 👑 Ich liebe deine Videos 💍 💎...   \n",
+       "9226  NDY-203         524          Bist die beste ❤ Bitte Grüße mich 💕 ❤ 😘 😍   \n",
+       "9227  NDY-203         525  Hi Bonny ❤️ War letztens auf'm Flughafen , und...   \n",
+       "9228  NDY-203         526  du bist die beste ich bin neu ich hab dich sof...   \n",
+       "\n",
+       "     flausch  \n",
+       "0         no  \n",
+       "1        yes  \n",
+       "2         no  \n",
+       "3        yes  \n",
+       "4        yes  \n",
+       "...      ...  \n",
+       "9224      no  \n",
+       "9225     yes  \n",
+       "9226     yes  \n",
+       "9227     yes  \n",
+       "9228     yes  \n",
+       "\n",
+       "[9229 rows x 4 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>document</th>\n",
+       "      <th>comment_id</th>\n",
+       "      <th>comment</th>\n",
+       "      <th>flausch</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Lol i love lochis</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>2</td>\n",
+       "      <td>ihr singt voll gut :)</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Junge fick dich</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Ihr seit die besten</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>5</td>\n",
+       "      <td>ihr seit die ALLER besten ich finde euch soooo...</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9224</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>522</td>\n",
+       "      <td>hihi kannst du mich grüßen 💕 👋 😍 Achso wusstes...</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9225</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>523</td>\n",
+       "      <td>#Glocke aktiviert 👑 Ich liebe deine Videos 💍 💎...</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9226</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>524</td>\n",
+       "      <td>Bist die beste ❤ Bitte Grüße mich 💕 ❤ 😘 😍</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9227</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>525</td>\n",
+       "      <td>Hi Bonny ❤️ War letztens auf'm Flughafen , und...</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9228</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>du bist die beste ich bin neu ich hab dich sof...</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>9229 rows × 4 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 11
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-23T19:33:51.519362Z",
+     "start_time": "2025-06-23T19:33:51.512704Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "test_data[['document', 'comment_id', 'flausch']]",
+   "id": "ac4077f355d0a379",
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "     document  comment_id flausch\n",
+       "0     NDY-004           1      no\n",
+       "1     NDY-004           2     yes\n",
+       "2     NDY-004           3      no\n",
+       "3     NDY-004           4     yes\n",
+       "4     NDY-004           5     yes\n",
+       "...       ...         ...     ...\n",
+       "9224  NDY-203         522      no\n",
+       "9225  NDY-203         523     yes\n",
+       "9226  NDY-203         524     yes\n",
+       "9227  NDY-203         525     yes\n",
+       "9228  NDY-203         526     yes\n",
+       "\n",
+       "[9229 rows x 3 columns]"
+      ],
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>document</th>\n",
+       "      <th>comment_id</th>\n",
+       "      <th>flausch</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>1</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>2</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>3</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>4</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NDY-004</td>\n",
+       "      <td>5</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9224</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>522</td>\n",
+       "      <td>no</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9225</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>523</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9226</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>524</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9227</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>525</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9228</th>\n",
+       "      <td>NDY-203</td>\n",
+       "      <td>526</td>\n",
+       "      <td>yes</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>9229 rows × 3 columns</p>\n",
+       "</div>"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 12
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-23T19:34:57.446239Z",
+     "start_time": "2025-06-23T19:34:57.431741Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "test_data[['document', 'comment_id', 'flausch']].to_csv(f'./submissions/subtask1_submission1.csv', index=False)",
+   "id": "ce927f8936231813",
+   "outputs": [],
+   "execution_count": 16
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-06-23T19:37:22.875657Z",
+     "start_time": "2025-06-23T19:37:22.653931Z"
+    }
+   },
+   "cell_type": "code",
+   "source": "!head -n 10 './submissions/subtask1_submission1.csv'",
+   "id": "e358ae2660d91769",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "document,comment_id,flausch\r\n",
+      "NDY-004,1,no\r\n",
+      "NDY-004,2,yes\r\n",
+      "NDY-004,3,no\r\n",
+      "NDY-004,4,yes\r\n",
+      "NDY-004,5,yes\r\n",
+      "NDY-004,6,yes\r\n",
+      "NDY-004,7,no\r\n",
+      "NDY-004,8,no\r\n",
+      "NDY-004,9,no\r\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    }
+   ],
+   "execution_count": 19
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "!cp './submissions/subtask1_submission1.csv' './submissions/task1-predicted.csv'",
+   "id": "e820c01a833df1db",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    " Score für Subtask 1:\n",
+    "\n",
+    " → 0.88"
+   ],
+   "id": "c441568bcdde6462"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

subtask_2/exp027-1.py ADDED Viewed

	@@ -0,0 +1,736 @@

+import os
+import pickle
+import sys
+import time
+import numpy as np
+import pandas as pd
+import torch
+import wandb
+from datasets import Dataset
+from multiset import *
+from sklearn.model_selection import train_test_split, StratifiedKFold
+from transformers import (
+    AutoTokenizer,
+    AutoModelForTokenClassification,
+    TrainingArguments,
+    Trainer,
+    DataCollatorForTokenClassification,
+    EarlyStoppingCallback
+)
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+os.environ["WANDB_PROJECT"]="GermEval2025-Substask2"
+os.environ["WANDB_LOG_MODEL"]="false"
+experiment_name = 'exp027-1'
+ALL_LABELS = ["affection declaration","agreement","ambiguous",
+              "compliment","encouragement","gratitude","group membership",
+              "implicit","positive feedback","sympathy"]
+def fine_grained_flausch_by_label(gold, predicted):
+    gold['cid']= gold['document']+"_"+gold['comment_id'].apply(str)
+    predicted['cid']= predicted['document']+"_"+predicted['comment_id'].apply(str)
+    # annotation sets (predicted)
+    pred_spans = Multiset()
+    pred_spans_loose = Multiset()
+    pred_types = Multiset()
+    # annotation sets (gold)
+    gold_spans = Multiset()
+    gold_spans_loose = Multiset()
+    gold_types = Multiset()
+    for row in predicted.itertuples(index=False):
+        pred_spans.add((row.cid,row.type,row.start,row.end))
+        pred_spans_loose.add((row.cid,row.start,row.end))
+        pred_types.add((row.cid,row.type))
+    for row in gold.itertuples(index=False):
+        gold_spans.add((row.cid,row.type,row.start,row.end))
+        gold_spans_loose.add((row.cid,row.start,row.end))
+        gold_types.add((row.cid,row.type))
+    # precision = true_pos / true_pos + false_pos
+    # recall = true_pos / true_pos + false_neg
+    # f_1 = 2 * prec * rec / (prec + rec)
+    results = {'TOTAL': {'STRICT': {},'SPANS': {},'TYPES': {}}}
+    # label-wise evaluation (only for strict and type)
+    for label in ALL_LABELS:
+        results[label] = {'STRICT': {},'TYPES': {}}
+        gold_spans_x = set(filter(lambda x: x[1].__eq__(label), gold_spans))
+        pred_spans_x = set(filter(lambda x: x[1].__eq__(label), pred_spans))
+        gold_types_x = set(filter(lambda x: x[1].__eq__(label), gold_types))
+        pred_types_x = set(filter(lambda x: x[1].__eq__(label), pred_types))
+        # strict: spans + type must match
+        ### NOTE: x and y / x returns 0 if x = 0 and y/x otherwise (test for zero division)
+        strict_p = float(len(pred_spans_x)) and float( len(gold_spans_x.intersection(pred_spans_x))) / len(pred_spans_x)
+        strict_r = float(len(gold_spans_x)) and float( len(gold_spans_x.intersection(pred_spans_x))) / len(gold_spans_x)
+        strict_f = (strict_p + strict_r) and 2 * strict_p * strict_r / (strict_p + strict_r)
+        results[label]['STRICT']['prec'] = strict_p
+        results[label]['STRICT']['rec'] = strict_r
+        results[label]['STRICT']['f1'] = strict_f
+        # detection mode: only types must match (per post)
+        types_p = float(len(pred_types_x)) and float( len(gold_types_x.intersection(pred_types_x))) / len(pred_types_x)
+        types_r = float(len(gold_types_x)) and float( len(gold_types_x.intersection(pred_types_x))) / len(gold_types_x)
+        types_f = (types_p + types_r) and 2 * types_p * types_r / (types_p + types_r)
+        results[label]['TYPES']['prec'] = types_p
+        results[label]['TYPES']['rec'] = types_r
+        results[label]['TYPES']['f1'] = types_f
+    # Overall evaluation
+    # strict: spans + type must match
+    strict_p = float(len(pred_spans)) and float( len(gold_spans.intersection(pred_spans))) / len(pred_spans)
+    strict_r = float(len(gold_spans)) and float( len(gold_spans.intersection(pred_spans))) / len(gold_spans)
+    strict_f = (strict_p + strict_r) and 2 * strict_p * strict_r / (strict_p + strict_r)
+    results['TOTAL']['STRICT']['prec'] = strict_p
+    results['TOTAL']['STRICT']['rec'] = strict_r
+    results['TOTAL']['STRICT']['f1'] = strict_f
+    # spans: spans must match
+    spans_p = float(len(pred_spans_loose)) and float( len(gold_spans_loose.intersection(pred_spans_loose))) / len(pred_spans_loose)
+    spans_r = float(len(gold_spans_loose)) and float( len(gold_spans_loose.intersection(pred_spans_loose))) / len(gold_spans_loose)
+    spans_f = (spans_p + spans_r) and 2 * spans_p * spans_r / (spans_p + spans_r)
+    results['TOTAL']['SPANS']['prec'] = spans_p
+    results['TOTAL']['SPANS']['rec'] = spans_r
+    results['TOTAL']['SPANS']['f1'] = spans_f
+    # detection mode: only types must match (per post)
+    types_p = float(len(pred_types)) and float( len(gold_types.intersection(pred_types))) / len(pred_types)
+    types_r = float(len(gold_types)) and float( len(gold_types.intersection(pred_types))) / len(gold_types)
+    types_f = (types_p + types_r) and 2 * types_p * types_r / (types_p + types_r)
+    results['TOTAL']['TYPES']['prec'] = types_p
+    results['TOTAL']['TYPES']['rec'] = types_r
+    results['TOTAL']['TYPES']['f1'] = types_f
+    return results
+class SpanClassifierWithStrictF1:
+    def __init__(self, model_name="deepset/gbert-base"):
+        self.model_name = model_name
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.labels =[
+            "O",
+            "B-positive feedback", "B-compliment", "B-affection declaration", "B-encouragement", "B-gratitude", "B-agreement", "B-ambiguous", "B-implicit", "B-group membership", "B-sympathy",
+            "I-positive feedback", "I-compliment", "I-affection declaration", "I-encouragement", "I-gratitude", "I-agreement", "I-ambiguous", "I-implicit", "I-group membership", "I-sympathy"
+        ]
+        self.label2id = {label: i for i, label in enumerate(self.labels)}
+        self.id2label = {i: label for i, label in enumerate(self.labels)}
+    def create_dataset(self, comments_df, spans_df):
+        """Erstelle Dataset mit BIO-Labels und speichere Evaluation-Daten"""
+        examples = []
+        eval_data = []  # Für Strict F1 Berechnung
+        spans_grouped = spans_df.groupby(['document', 'comment_id'])
+        for _, row in comments_df.iterrows():
+            text = row['comment']
+            document = row['document']
+            comment_id = row['comment_id']
+            key = (document, comment_id)
+            # True spans für diesen Kommentar
+            if key in spans_grouped.groups:
+                true_spans = [(span_type, int(start), int(end))
+                              for span_type, start, end in
+                              spans_grouped.get_group(key)[['type', 'start', 'end']].values]
+            else:
+                true_spans = []
+            # Tokenisierung
+            tokenized = self.tokenizer(text, truncation=True, max_length=512,
+                                       return_offsets_mapping=True)
+            # BIO-Labels erstellen
+            labels = self._create_bio_labels(tokenized['offset_mapping'],
+                                             spans_grouped.get_group(key)[['start', 'end', 'type']].values
+                                             if key in spans_grouped.groups else [])
+            examples.append({
+                'input_ids': tokenized['input_ids'],
+                'attention_mask': tokenized['attention_mask'],
+                'labels': labels
+            })
+            # Evaluation-Daten speichern
+            eval_data.append({
+                'text': text,
+                'offset_mapping': tokenized['offset_mapping'],
+                'true_spans': true_spans,
+                'document': document,
+                'comment_id': comment_id
+            })
+        return examples, eval_data
+    def _create_bio_labels(self, offset_mapping, spans):
+        """Erstelle BIO-Labels für Tokens"""
+        labels = [0] * len(offset_mapping)  # 0 = "O"
+        for start, end, type_label in spans:
+            for i, (token_start, token_end) in enumerate(offset_mapping):
+                if token_start is None:  # Spezielle Tokens
+                    continue
+                # Token überlappt mit Span
+                if token_start < end and token_end > start:
+                    if token_start <= start:
+                        if labels[i] != 0:
+                            # dont overwrite labels if spans are overlapping; just skip the span
+                            break
+                        labels[i] = self.label2id[f'B-{type_label}'] # B-compliment
+                    else:
+                        labels[i] = self.label2id[f'I-{type_label}'] # I-compliment
+        return labels
+    def _predictions_to_dataframe(self, predictions_list, comments_df_subset):
+        """Konvertiere Vorhersagen zu DataFrame für Flausch-Metrik"""
+        pred_data = []
+        for i, pred in enumerate(predictions_list):
+            if i < len(comments_df_subset):
+                row = comments_df_subset.iloc[i]
+                document = row['document']
+                comment_id = row['comment_id']
+                for span in pred['spans']:
+                    pred_data.append({
+                        'document': document,
+                        'comment_id': comment_id,
+                        'type': span['type'],
+                        'start': span['start'],
+                        'end': span['end']
+                    })
+        return pd.DataFrame(pred_data)
+    # --- helper that builds a DataFrame of spans from eval data + predictions ---
+    def _build_span_dfs(self, eval_data, batch_pred_spans):
+        """
+        eval_data: list of dicts with keys document, comment_id, true_spans
+        batch_pred_spans: list of lists of (type, start, end)
+        returns (gold_df, pred_df) suitable for fine_grained_flausch_by_label
+        """
+        rows_gold = []
+        rows_pred = []
+        for item, pred_spans in zip(eval_data, batch_pred_spans):
+            doc = item['document']
+            cid = item['comment_id']
+            # gold
+            for t, s, e in item['true_spans']:
+                rows_gold.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': t,
+                    'start': s,
+                    'end':   e
+                })
+            # pred
+            for t, s, e in pred_spans:
+                rows_pred.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': t,
+                    'start': s,
+                    'end':   e
+                })
+        gold_df = pd.DataFrame(rows_gold, columns=['document','comment_id','type','start','end'])
+        pred_df = pd.DataFrame(rows_pred, columns=['document','comment_id','type','start','end'])
+        return gold_df, pred_df
+    def compute_metrics(self, eval_pred):
+        """
+        Called by the HF-Trainer at each evaluation step.
+        We collect batch predictions, reconstruct gold/pred spans,
+        call fine_grained_flausch_by_label and return the TOTAL/STRICT metrics.
+        """
+        logits, labels = eval_pred
+        preds = np.argmax(logits, axis=2)
+        # reconstruct spans per example in this batch
+        batch_pred_spans = []
+        for i, (p_seq, lab_seq) in enumerate(zip(preds, labels)):
+            # skip padding (-100)
+            valid_preds = []
+            valid_offsets = []
+            offsets = self.current_eval_data[i]['offset_mapping']
+            for j,(p,l) in enumerate(zip(p_seq, lab_seq)):
+                if l != -100:
+                    valid_preds.append(int(p))
+                    valid_offsets.append(offsets[j])
+            # convert to spans
+            pred_spans = self._predictions_to_spans(valid_preds, valid_offsets,
+                                                    self.current_eval_data[i]['text'])
+            # to (type, start, end)-tuples
+            batch_pred_spans.append([(sp['type'], sp['start'], sp['end'])
+                                     for sp in pred_spans])
+        # build the gold/pred DataFrames
+        gold_df, pred_df = self._build_span_dfs(self.current_eval_data,
+                                                batch_pred_spans)
+        # call your fine-grained metrics
+        results = fine_grained_flausch_by_label(gold_df, pred_df)
+        # extract the TOTAL/STRICT metrics
+        total = results['TOTAL']['STRICT']
+        return {
+            'strict_prec': torch.tensor(total['prec'], dtype=torch.float32),
+            'strict_rec':  torch.tensor(total['rec'],  dtype=torch.float32),
+            'strict_f1':   torch.tensor(total['f1'],   dtype=torch.float32),
+        }
+    def evaluate_by_label(self, comments_df, spans_df):
+        """
+        Replace evaluate_strict_f1. Runs a full pass over all comments,
+        uses self.predict() to get spans, then calls your fine_grained_flausch_by_label
+        and prints & returns the TOTAL metrics.
+        """
+        # 1) run predictions
+        texts = comments_df['comment'].tolist()
+        docs =  comments_df['document'].tolist()
+        cids =  comments_df['comment_id'].tolist()
+        preds = self.predict(texts)
+        # 2) build gold and pred lists
+        gold_rows = []
+        for (_, row) in comments_df.iterrows():
+            key = (row['document'], row['comment_id'])
+            # get all true spans for this comment_id
+            group = spans_df[
+                (spans_df.document==row['document']) &
+                (spans_df.comment_id==row['comment_id'])
+            ]
+            for _, sp in group.iterrows():
+                gold_rows.append({
+                    'document': row['document'],
+                    'comment_id': row['comment_id'],
+                    'type': sp['type'],
+                    'start': sp['start'],
+                    'end': sp['end']
+                })
+        pred_rows = []
+        for doc, cid, p in zip(docs, cids, preds):
+            for sp in p['spans']:
+                pred_rows.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': sp['type'],
+                    'start': sp['start'],
+                    'end': sp['end']
+                })
+        gold_df = pd.DataFrame(gold_rows, columns=['document','comment_id','type','start','end'])
+        pred_df = pd.DataFrame(pred_rows, columns=['document','comment_id','type','start','end'])
+        # 3) call fine-grained
+        results = fine_grained_flausch_by_label(gold_df, pred_df)
+        # 4) extract and print
+        total = results['TOTAL']
+        print("\n=== EVALUATION BY FLAUSCH METRICS ===")
+        for mode in ['STRICT','SPANS','TYPES']:
+            m = total[mode]
+            print(f"{mode:6}  P={m['prec']:.4f}  R={m['rec']:.4f}  F1={m['f1']:.4f}")
+        return results
+    def _predictions_to_spans(self, predicted_labels, offset_mapping, text):
+        """Konvertiere Token-Vorhersagen zu Spans"""
+        spans = []
+        current_span = None
+        for i, label_id in enumerate(predicted_labels):
+            if i >= len(offset_mapping):
+                break
+            label = self.id2label[label_id]
+            token_start, token_end = offset_mapping[i]
+            if token_start is None:
+                continue
+            if label.startswith('B-'):
+                if current_span:
+                    spans.append(current_span)
+                current_span = {
+                    'type': label[2:],
+                    'start': token_start,
+                    'end': token_end,
+                    'text': text[token_start:token_end]
+                }
+            elif label.startswith('I-') and current_span:
+                current_span['end'] = token_end
+                current_span['text'] = text[current_span['start']:current_span['end']]
+            else:
+                if current_span:
+                    spans.append(current_span)
+                    current_span = None
+        if current_span:
+            spans.append(current_span)
+        return spans
+    def predict(self, texts):
+        """Vorhersage für neue Texte"""
+        if not hasattr(self, 'model'):
+            raise ValueError("Modell muss erst trainiert werden!")
+        predictions = []
+        device = next(self.model.parameters()).device
+        for text in texts:
+            # Tokenisierung
+            inputs = self.tokenizer(text, return_tensors="pt", truncation=True,
+                                    max_length=512, return_offsets_mapping=True)
+            offset_mapping = inputs.pop('offset_mapping')
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Vorhersage
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            predicted_labels = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
+            # Spans extrahieren
+            spans = self._predictions_to_spans(predicted_labels, offset_mapping[0], text)
+            predictions.append({'text': text, 'spans': spans})
+        return predictions
+    def train(self, comments_df, spans_df, experiment_name):
+        wandb.init(project=os.environ["WANDB_PROJECT"], name=f"{experiment_name}",
+                   group=experiment_name)
+        # Dataset neu erstellen für diesen Fold
+        examples, eval_data = self.create_dataset(comments_df, spans_df)
+        train_examples, val_examples = train_test_split(examples, test_size=0.1, random_state=42)
+        # Evaluation-Daten entsprechend aufteilen
+        train_indices, val_indices = train_test_split(range(len(examples)), test_size=0.1, random_state=42)
+        self.current_eval_data = [eval_data[i] for i in val_indices]
+        test_comments = comments_df.iloc[val_indices].reset_index(drop=True)
+        train_dataset = Dataset.from_list(train_examples)
+        val_dataset = Dataset.from_list(val_examples)
+        # Modell neu initialisieren
+        model = AutoModelForTokenClassification.from_pretrained(
+            self.model_name,
+            num_labels=len(self.labels),
+            id2label=self.id2label,
+            label2id=self.label2id
+        )
+        # Training-Argumente
+        fold_output_dir = f"{experiment_name}"
+        training_args = TrainingArguments(
+            output_dir=fold_output_dir,
+            learning_rate=2e-5,
+            warmup_steps=400,
+            per_device_train_batch_size=32,
+            per_device_eval_batch_size=16,
+            num_train_epochs=20,
+            eval_strategy="steps",
+            eval_steps=40,
+            save_strategy="steps",
+            save_steps=40,
+            load_best_model_at_end=True,
+            metric_for_best_model="strict_f1",
+            greater_is_better=True,
+            logging_steps=10,
+            logging_strategy="steps",
+            report_to="all",
+            disable_tqdm=False,
+            seed=42,
+            save_total_limit=3,
+        )
+        # Trainer
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=train_dataset,
+            eval_dataset=val_dataset,
+            data_collator=DataCollatorForTokenClassification(self.tokenizer),
+            compute_metrics=self.compute_metrics,
+            callbacks=[EarlyStoppingCallback(early_stopping_patience=87)]
+            # 87 steps = 3.0 epochs with 29 steps per epoch
+        )
+        # Training
+        print(f"Training auf {len(train_dataset)} Beispielen")
+        print(f"Validation auf {len(val_dataset)} Beispielen")
+        trainer.train()
+        # Aktuelles Modell speichern
+        self.model = model
+        # Modell evaluieren auf Test-Daten
+        print(f"Evaluierung auf {len(test_comments)} Test-Beispielen")
+        metrics = self.evaluate_by_label(test_comments, spans_df)
+        wandb.log({
+            'strict_f1': metrics['TOTAL']['STRICT']['f1'],
+            'strict_precision': metrics['TOTAL']['STRICT']['prec'],
+            'strict_recall': metrics['TOTAL']['STRICT']['rec'],
+            'spans_f1': metrics['TOTAL']['SPANS']['f1'],
+            'types_f1': metrics['TOTAL']['TYPES']['f1']
+        })
+        # Speichere Modell
+        torch.save(model.state_dict(), f'{fold_output_dir}_model.pth')
+        torch.cuda.memory.empty_cache()
+        wandb.finish()
+        return trainer
+    def cross_validate(self, comments_df, spans_df, n_splits=5, output_dir_prefix="span-classifier-cv"):
+        """Führe n-fache Kreuzvalidierung mit StratifiedKFold durch"""
+        # Erstelle Label für Stratifizierung (basierend auf dem ersten Span types eines Kommentars)
+        strat_labels = []
+        spans_grouped = spans_df.groupby(['document', 'comment_id'])
+        for _, row in comments_df.iterrows():
+            key = (row['document'], row['comment_id'])
+            # 1 wenn Kommentar Spans hat, sonst 0
+            has_spans = spans_grouped.get_group(key).iloc[0]['type'] if key in spans_grouped.groups and len(spans_grouped.get_group(key)) > 0 else 0
+            strat_labels.append(has_spans)
+        # Erstelle StratifiedKFold
+        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
+        # Speichere Metriken für jeden Fold
+        fold_metrics = []
+        # Iteriere über Folds
+        for fold, (train_idx, test_idx) in enumerate(skf.split(range(len(comments_df)), strat_labels)):
+            if '--fold' in sys.argv:
+                fold_arg = int(sys.argv[sys.argv.index('--fold') + 1])
+                if fold + 1 != fold_arg:
+                    continue
+            wandb.init(project=os.environ["WANDB_PROJECT"], name=f"{experiment_name}-fold-{fold+1}",
+                       group=experiment_name)
+            print(f"\n{'='*50}")
+            print(f"Fold {fold+1}/{n_splits}")
+            print(f"{'='*50}")
+            # Kommentare für diesen Fold
+            train_comments = comments_df.iloc[train_idx].reset_index(drop=True)
+            test_comments = comments_df.iloc[test_idx].reset_index(drop=True)
+            # Dataset neu erstellen für diesen Fold
+            examples, eval_data = self.create_dataset(train_comments, spans_df)
+            train_examples, val_examples = train_test_split(examples, test_size=0.1, random_state=42)
+            # Evaluation-Daten entsprechend aufteilen
+            train_indices, val_indices = train_test_split(range(len(examples)), test_size=0.1, random_state=42)
+            self.current_eval_data = [eval_data[i] for i in val_indices]
+            train_dataset = Dataset.from_list(train_examples)
+            val_dataset = Dataset.from_list(val_examples)
+            # Modell neu initialisieren
+            model = AutoModelForTokenClassification.from_pretrained(
+                self.model_name,
+                num_labels=len(self.labels),
+                id2label=self.id2label,
+                label2id=self.label2id
+            )
+            # Training-Argumente
+            fold_output_dir = f"{output_dir_prefix}-fold-{fold+1}"
+            training_args = TrainingArguments(
+                output_dir=fold_output_dir,
+                learning_rate=2e-5,
+                warmup_steps=400,
+                per_device_train_batch_size=32,
+                per_device_eval_batch_size=16,
+                num_train_epochs=15,
+                eval_strategy="steps",
+                eval_steps=40,
+                save_strategy="steps",
+                save_steps=40,
+                load_best_model_at_end=True,
+                metric_for_best_model="strict_f1",
+                greater_is_better=True,
+                logging_steps=10,
+                logging_strategy="steps",
+                report_to="all",
+                disable_tqdm=False,
+                seed=42,
+                save_total_limit=3,
+            )
+            # Trainer
+            trainer = Trainer(
+                model=model,
+                args=training_args,
+                train_dataset=train_dataset,
+                eval_dataset=val_dataset,
+                data_collator=DataCollatorForTokenClassification(self.tokenizer),
+                compute_metrics=self.compute_metrics,
+                callbacks=[EarlyStoppingCallback(early_stopping_patience=87)] # 87 steps = 3.0 epochs with 29 steps per epoch
+            )
+            # Training
+            print(f"Training auf {len(train_dataset)} Beispielen")
+            print(f"Validation auf {len(val_dataset)} Beispielen")
+            trainer.train()
+            # Aktuelles Modell speichern
+            self.model = model
+            # Modell evaluieren auf Test-Daten
+            print(f"Evaluierung auf {len(test_comments)} Test-Beispielen")
+            flausch_results = self.evaluate_by_label(test_comments, spans_df)
+            # Extrahiere Hauptmetriken für fold_metrics
+            metrics = {
+                'strict_f1': flausch_results['TOTAL']['STRICT']['f1'],
+                'strict_precision': flausch_results['TOTAL']['STRICT']['prec'],
+                'strict_recall': flausch_results['TOTAL']['STRICT']['rec'],
+                'spans_f1': flausch_results['TOTAL']['SPANS']['f1'],
+                'spans_precision': flausch_results['TOTAL']['SPANS']['prec'],
+                'spans_recall': flausch_results['TOTAL']['SPANS']['rec'],
+                'types_f1': flausch_results['TOTAL']['TYPES']['f1'],
+                'types_precision': flausch_results['TOTAL']['TYPES']['prec'],
+                'types_recall': flausch_results['TOTAL']['TYPES']['rec'],
+                'full_results': flausch_results
+            }
+            fold_metrics.append(metrics)
+            wandb.log(metrics, step=fold + 1)
+            # Speichere Modell
+            torch.save(model.state_dict(), f'{fold_output_dir}_model.pth')
+            test_predictions = self.predict(test_comments['comment'].tolist())
+            # Speichere Metriken
+            with open(f"test_results.{experiment_name}.fold-{fold+1}.pkl", "wb") as p:
+                pickle.dump((train_comments, test_comments, test_predictions, train_examples, val_examples), p)
+            with open(f"scores.{experiment_name}.txt", 'a') as f:
+                f.write(f'[{time.strftime("%Y-%m-%d %H:%M:%S")}] Fold {fold+1} Ergebnisse:\n')
+                f.write(f"[{experiment_name} fold-{fold+1} {metrics}\n")
+            torch.cuda.memory.empty_cache()
+            wandb.finish()
+        # Zusammenfassung ausgeben
+        print("\n" + "="*50)
+        print("Kreuzvalidierung abgeschlossen")
+        print("="*50)
+        # Berechne Durchschnitts-Metriken
+        avg_f1 = np.mean([m['strict_f1'] for m in fold_metrics])
+        avg_precision = np.mean([m['strict_precision'] for m in fold_metrics])
+        avg_recall = np.mean([m['strict_recall'] for m in fold_metrics])
+        print(f"\nDurchschnittliche Metriken über {n_splits} Folds:")
+        print(f"Precision: {avg_precision:.10f}")
+        print(f"Recall:    {avg_recall:.10f}")
+        print(f"F1-Score:  {avg_f1:.10f}")
+        # Std-Abweichung
+        std_f1 = np.std([m['strict_f1'] for m in fold_metrics])
+        std_precision = np.std([m['strict_precision'] for m in fold_metrics])
+        std_recall = np.std([m['strict_recall'] for m in fold_metrics])
+        print(f"\nStandardabweichung über {n_splits} Folds:")
+        print(f"Precision: {std_precision:.10f}")
+        print(f"Recall:    {std_recall:.10f}")
+        print(f"F1-Score:  {std_f1:.10f}")
+        # Ergebnisse für jeden Fold ausgeben
+        for fold, metrics in enumerate(fold_metrics):
+            print(f"\nFold {fold+1} Ergebnisse:")
+            print(f"Precision: {metrics['strict_precision']:.4f}")
+            print(f"Recall:    {metrics['strict_recall']:.4f}")
+            print(f"F1-Score:  {metrics['strict_f1']:.4f}")
+        return {
+            'fold_metrics': fold_metrics,
+            'avg_metrics': {
+                'strict_f1': avg_f1,
+                'strict_precision': avg_precision,
+                'strict_recall': avg_recall
+            },
+            'std_metrics': {
+                'strict_f1': std_f1,
+                'strict_precision': std_precision,
+                'strict_recall': std_recall
+            }
+        }
+# Daten laden
+comments: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/comments.csv")
+task1: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/task1.csv")
+task2: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/task2.csv")
+comments = comments.merge(task1, on=["document", "comment_id"])
+test_data: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/test data/comments.csv")
+# Wähle Teilmenge der Daten für Experiment (z.B. 17000 Kommentare)
+experiment_data = comments
+# Klassifikator mit Strict F1
+classifier = SpanClassifierWithStrictF1('deepset/gbert-large')
+# 5-fold Cross-Validation durchführen
+cv_results = classifier.cross_validate(
+    experiment_data,
+    task2,
+    n_splits=5,
+    output_dir_prefix=experiment_name
+)
+# write results to text file
+with open(f"scores.{experiment_name}.txt", 'a') as f:
+    f.write(f'[{time.strftime("%Y-%m-%d %H:%M:%S")}] KFold cross validation of {experiment_name}\n')
+    f.write(f'{cv_results}\n')
+# Optional: Finales Modell auf allen Daten trainieren
+trainer = classifier.train(experiment_data, task2, f'{experiment_name}-final')
+torch.save(classifier.model.state_dict(), f'{experiment_name}_final_model.pth')
+# Test-Vorhersage mit finalem Modell
+test_texts = ["Das ist ein toller Kommentar!", "Schlechter Text hier.",
+              "Sehr gutes Video. Danke! Ich finde Dich echt toll!", "Du bist doof!", "Das Licht ist echt gut.",
+              "Team Einhorn", "Macht unbedingt weiter so!", "Das sehe ich ganz genauso.", "Stimmt, Du hast vollkommen Recht!",
+              "Ich bin so dankbar ein #Lochinator zu sein"]
+predictions = classifier.predict(test_texts)
+for pred in predictions:
+    print(f"\nText: {pred['text']}")
+    for span in pred['spans']:
+        print(f"  Span: '{span['text']}' ({span['start']}-{span['end']}) - {span['type']}")

subtask_2/exp027-2.py ADDED Viewed

	@@ -0,0 +1,736 @@

+import os
+import pickle
+import sys
+import time
+import numpy as np
+import pandas as pd
+import torch
+import wandb
+from datasets import Dataset
+from multiset import *
+from sklearn.model_selection import train_test_split, StratifiedKFold
+from transformers import (
+    AutoTokenizer,
+    AutoModelForTokenClassification,
+    TrainingArguments,
+    Trainer,
+    DataCollatorForTokenClassification,
+    EarlyStoppingCallback
+)
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+os.environ["WANDB_PROJECT"]="GermEval2025-Substask2"
+os.environ["WANDB_LOG_MODEL"]="false"
+experiment_name = 'exp027-2'
+ALL_LABELS = ["affection declaration","agreement","ambiguous",
+              "compliment","encouragement","gratitude","group membership",
+              "implicit","positive feedback","sympathy"]
+def fine_grained_flausch_by_label(gold, predicted):
+    gold['cid']= gold['document']+"_"+gold['comment_id'].apply(str)
+    predicted['cid']= predicted['document']+"_"+predicted['comment_id'].apply(str)
+    # annotation sets (predicted)
+    pred_spans = Multiset()
+    pred_spans_loose = Multiset()
+    pred_types = Multiset()
+    # annotation sets (gold)
+    gold_spans = Multiset()
+    gold_spans_loose = Multiset()
+    gold_types = Multiset()
+    for row in predicted.itertuples(index=False):
+        pred_spans.add((row.cid,row.type,row.start,row.end))
+        pred_spans_loose.add((row.cid,row.start,row.end))
+        pred_types.add((row.cid,row.type))
+    for row in gold.itertuples(index=False):
+        gold_spans.add((row.cid,row.type,row.start,row.end))
+        gold_spans_loose.add((row.cid,row.start,row.end))
+        gold_types.add((row.cid,row.type))
+    # precision = true_pos / true_pos + false_pos
+    # recall = true_pos / true_pos + false_neg
+    # f_1 = 2 * prec * rec / (prec + rec)
+    results = {'TOTAL': {'STRICT': {},'SPANS': {},'TYPES': {}}}
+    # label-wise evaluation (only for strict and type)
+    for label in ALL_LABELS:
+        results[label] = {'STRICT': {},'TYPES': {}}
+        gold_spans_x = set(filter(lambda x: x[1].__eq__(label), gold_spans))
+        pred_spans_x = set(filter(lambda x: x[1].__eq__(label), pred_spans))
+        gold_types_x = set(filter(lambda x: x[1].__eq__(label), gold_types))
+        pred_types_x = set(filter(lambda x: x[1].__eq__(label), pred_types))
+        # strict: spans + type must match
+        ### NOTE: x and y / x returns 0 if x = 0 and y/x otherwise (test for zero division)
+        strict_p = float(len(pred_spans_x)) and float( len(gold_spans_x.intersection(pred_spans_x))) / len(pred_spans_x)
+        strict_r = float(len(gold_spans_x)) and float( len(gold_spans_x.intersection(pred_spans_x))) / len(gold_spans_x)
+        strict_f = (strict_p + strict_r) and 2 * strict_p * strict_r / (strict_p + strict_r)
+        results[label]['STRICT']['prec'] = strict_p
+        results[label]['STRICT']['rec'] = strict_r
+        results[label]['STRICT']['f1'] = strict_f
+        # detection mode: only types must match (per post)
+        types_p = float(len(pred_types_x)) and float( len(gold_types_x.intersection(pred_types_x))) / len(pred_types_x)
+        types_r = float(len(gold_types_x)) and float( len(gold_types_x.intersection(pred_types_x))) / len(gold_types_x)
+        types_f = (types_p + types_r) and 2 * types_p * types_r / (types_p + types_r)
+        results[label]['TYPES']['prec'] = types_p
+        results[label]['TYPES']['rec'] = types_r
+        results[label]['TYPES']['f1'] = types_f
+    # Overall evaluation
+    # strict: spans + type must match
+    strict_p = float(len(pred_spans)) and float( len(gold_spans.intersection(pred_spans))) / len(pred_spans)
+    strict_r = float(len(gold_spans)) and float( len(gold_spans.intersection(pred_spans))) / len(gold_spans)
+    strict_f = (strict_p + strict_r) and 2 * strict_p * strict_r / (strict_p + strict_r)
+    results['TOTAL']['STRICT']['prec'] = strict_p
+    results['TOTAL']['STRICT']['rec'] = strict_r
+    results['TOTAL']['STRICT']['f1'] = strict_f
+    # spans: spans must match
+    spans_p = float(len(pred_spans_loose)) and float( len(gold_spans_loose.intersection(pred_spans_loose))) / len(pred_spans_loose)
+    spans_r = float(len(gold_spans_loose)) and float( len(gold_spans_loose.intersection(pred_spans_loose))) / len(gold_spans_loose)
+    spans_f = (spans_p + spans_r) and 2 * spans_p * spans_r / (spans_p + spans_r)
+    results['TOTAL']['SPANS']['prec'] = spans_p
+    results['TOTAL']['SPANS']['rec'] = spans_r
+    results['TOTAL']['SPANS']['f1'] = spans_f
+    # detection mode: only types must match (per post)
+    types_p = float(len(pred_types)) and float( len(gold_types.intersection(pred_types))) / len(pred_types)
+    types_r = float(len(gold_types)) and float( len(gold_types.intersection(pred_types))) / len(gold_types)
+    types_f = (types_p + types_r) and 2 * types_p * types_r / (types_p + types_r)
+    results['TOTAL']['TYPES']['prec'] = types_p
+    results['TOTAL']['TYPES']['rec'] = types_r
+    results['TOTAL']['TYPES']['f1'] = types_f
+    return results
+class SpanClassifierWithStrictF1:
+    def __init__(self, model_name="deepset/gbert-base"):
+        self.model_name = model_name
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
+        self.labels =[
+            "O",
+            "B-positive feedback", "B-compliment", "B-affection declaration", "B-encouragement", "B-gratitude", "B-agreement", "B-ambiguous", "B-implicit", "B-group membership", "B-sympathy",
+            "I-positive feedback", "I-compliment", "I-affection declaration", "I-encouragement", "I-gratitude", "I-agreement", "I-ambiguous", "I-implicit", "I-group membership", "I-sympathy"
+        ]
+        self.label2id = {label: i for i, label in enumerate(self.labels)}
+        self.id2label = {i: label for i, label in enumerate(self.labels)}
+    def create_dataset(self, comments_df, spans_df):
+        """Erstelle Dataset mit BIO-Labels und speichere Evaluation-Daten"""
+        examples = []
+        eval_data = []  # Für Strict F1 Berechnung
+        spans_grouped = spans_df.groupby(['document', 'comment_id'])
+        for _, row in comments_df.iterrows():
+            text = row['comment']
+            document = row['document']
+            comment_id = row['comment_id']
+            key = (document, comment_id)
+            # True spans für diesen Kommentar
+            if key in spans_grouped.groups:
+                true_spans = [(span_type, int(start), int(end))
+                              for span_type, start, end in
+                              spans_grouped.get_group(key)[['type', 'start', 'end']].values]
+            else:
+                true_spans = []
+            # Tokenisierung
+            tokenized = self.tokenizer(text, truncation=True, max_length=512,
+                                       return_offsets_mapping=True)
+            # BIO-Labels erstellen
+            labels = self._create_bio_labels(tokenized['offset_mapping'],
+                                             spans_grouped.get_group(key)[['start', 'end', 'type']].values
+                                             if key in spans_grouped.groups else [])
+            examples.append({
+                'input_ids': tokenized['input_ids'],
+                'attention_mask': tokenized['attention_mask'],
+                'labels': labels
+            })
+            # Evaluation-Daten speichern
+            eval_data.append({
+                'text': text,
+                'offset_mapping': tokenized['offset_mapping'],
+                'true_spans': true_spans,
+                'document': document,
+                'comment_id': comment_id
+            })
+        return examples, eval_data
+    def _create_bio_labels(self, offset_mapping, spans):
+        """Erstelle BIO-Labels für Tokens"""
+        labels = [0] * len(offset_mapping)  # 0 = "O"
+        for start, end, type_label in spans:
+            for i, (token_start, token_end) in enumerate(offset_mapping):
+                if token_start is None:  # Spezielle Tokens
+                    continue
+                # Token überlappt mit Span
+                if token_start < end and token_end > start:
+                    if token_start <= start:
+                        if labels[i] != 0:
+                            # dont overwrite labels if spans are overlapping; just skip the span
+                            break
+                        labels[i] = self.label2id[f'B-{type_label}'] # B-compliment
+                    else:
+                        labels[i] = self.label2id[f'I-{type_label}'] # I-compliment
+        return labels
+    def _predictions_to_dataframe(self, predictions_list, comments_df_subset):
+        """Konvertiere Vorhersagen zu DataFrame für Flausch-Metrik"""
+        pred_data = []
+        for i, pred in enumerate(predictions_list):
+            if i < len(comments_df_subset):
+                row = comments_df_subset.iloc[i]
+                document = row['document']
+                comment_id = row['comment_id']
+                for span in pred['spans']:
+                    pred_data.append({
+                        'document': document,
+                        'comment_id': comment_id,
+                        'type': span['type'],
+                        'start': span['start'],
+                        'end': span['end']
+                    })
+        return pd.DataFrame(pred_data)
+    # --- helper that builds a DataFrame of spans from eval data + predictions ---
+    def _build_span_dfs(self, eval_data, batch_pred_spans):
+        """
+        eval_data: list of dicts with keys document, comment_id, true_spans
+        batch_pred_spans: list of lists of (type, start, end)
+        returns (gold_df, pred_df) suitable for fine_grained_flausch_by_label
+        """
+        rows_gold = []
+        rows_pred = []
+        for item, pred_spans in zip(eval_data, batch_pred_spans):
+            doc = item['document']
+            cid = item['comment_id']
+            # gold
+            for t, s, e in item['true_spans']:
+                rows_gold.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': t,
+                    'start': s,
+                    'end':   e
+                })
+            # pred
+            for t, s, e in pred_spans:
+                rows_pred.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': t,
+                    'start': s,
+                    'end':   e
+                })
+        gold_df = pd.DataFrame(rows_gold, columns=['document','comment_id','type','start','end'])
+        pred_df = pd.DataFrame(rows_pred, columns=['document','comment_id','type','start','end'])
+        return gold_df, pred_df
+    def compute_metrics(self, eval_pred):
+        """
+        Called by the HF-Trainer at each evaluation step.
+        We collect batch predictions, reconstruct gold/pred spans,
+        call fine_grained_flausch_by_label and return the TOTAL/STRICT metrics.
+        """
+        logits, labels = eval_pred
+        preds = np.argmax(logits, axis=2)
+        # reconstruct spans per example in this batch
+        batch_pred_spans = []
+        for i, (p_seq, lab_seq) in enumerate(zip(preds, labels)):
+            # skip padding (-100)
+            valid_preds = []
+            valid_offsets = []
+            offsets = self.current_eval_data[i]['offset_mapping']
+            for j,(p,l) in enumerate(zip(p_seq, lab_seq)):
+                if l != -100:
+                    valid_preds.append(int(p))
+                    valid_offsets.append(offsets[j])
+            # convert to spans
+            pred_spans = self._predictions_to_spans(valid_preds, valid_offsets,
+                                                    self.current_eval_data[i]['text'])
+            # to (type, start, end)-tuples
+            batch_pred_spans.append([(sp['type'], sp['start'], sp['end'])
+                                     for sp in pred_spans])
+        # build the gold/pred DataFrames
+        gold_df, pred_df = self._build_span_dfs(self.current_eval_data,
+                                                batch_pred_spans)
+        # call your fine-grained metrics
+        results = fine_grained_flausch_by_label(gold_df, pred_df)
+        # extract the TOTAL/STRICT metrics
+        total = results['TOTAL']['STRICT']
+        return {
+            'strict_prec': torch.tensor(total['prec'], dtype=torch.float32),
+            'strict_rec':  torch.tensor(total['rec'],  dtype=torch.float32),
+            'strict_f1':   torch.tensor(total['f1'],   dtype=torch.float32),
+        }
+    def evaluate_by_label(self, comments_df, spans_df):
+        """
+        Replace evaluate_strict_f1. Runs a full pass over all comments,
+        uses self.predict() to get spans, then calls your fine_grained_flausch_by_label
+        and prints & returns the TOTAL metrics.
+        """
+        # 1) run predictions
+        texts = comments_df['comment'].tolist()
+        docs =  comments_df['document'].tolist()
+        cids =  comments_df['comment_id'].tolist()
+        preds = self.predict(texts)
+        # 2) build gold and pred lists
+        gold_rows = []
+        for (_, row) in comments_df.iterrows():
+            key = (row['document'], row['comment_id'])
+            # get all true spans for this comment_id
+            group = spans_df[
+                (spans_df.document==row['document']) &
+                (spans_df.comment_id==row['comment_id'])
+            ]
+            for _, sp in group.iterrows():
+                gold_rows.append({
+                    'document': row['document'],
+                    'comment_id': row['comment_id'],
+                    'type': sp['type'],
+                    'start': sp['start'],
+                    'end': sp['end']
+                })
+        pred_rows = []
+        for doc, cid, p in zip(docs, cids, preds):
+            for sp in p['spans']:
+                pred_rows.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': sp['type'],
+                    'start': sp['start'],
+                    'end': sp['end']
+                })
+        gold_df = pd.DataFrame(gold_rows, columns=['document','comment_id','type','start','end'])
+        pred_df = pd.DataFrame(pred_rows, columns=['document','comment_id','type','start','end'])
+        # 3) call fine-grained
+        results = fine_grained_flausch_by_label(gold_df, pred_df)
+        # 4) extract and print
+        total = results['TOTAL']
+        print("\n=== EVALUATION BY FLAUSCH METRICS ===")
+        for mode in ['STRICT','SPANS','TYPES']:
+            m = total[mode]
+            print(f"{mode:6}  P={m['prec']:.4f}  R={m['rec']:.4f}  F1={m['f1']:.4f}")
+        return results
+    def _predictions_to_spans(self, predicted_labels, offset_mapping, text):
+        """Konvertiere Token-Vorhersagen zu Spans"""
+        spans = []
+        current_span = None
+        for i, label_id in enumerate(predicted_labels):
+            if i >= len(offset_mapping):
+                break
+            label = self.id2label[label_id]
+            token_start, token_end = offset_mapping[i]
+            if token_start is None:
+                continue
+            if label.startswith('B-'):
+                if current_span:
+                    spans.append(current_span)
+                current_span = {
+                    'type': label[2:],
+                    'start': token_start,
+                    'end': token_end,
+                    'text': text[token_start:token_end]
+                }
+            elif label.startswith('I-') and current_span:
+                current_span['end'] = token_end
+                current_span['text'] = text[current_span['start']:current_span['end']]
+            else:
+                if current_span:
+                    spans.append(current_span)
+                    current_span = None
+        if current_span:
+            spans.append(current_span)
+        return spans
+    def predict(self, texts):
+        """Vorhersage für neue Texte"""
+        if not hasattr(self, 'model'):
+            raise ValueError("Modell muss erst trainiert werden!")
+        predictions = []
+        device = next(self.model.parameters()).device
+        for text in texts:
+            # Tokenisierung
+            inputs = self.tokenizer(text, return_tensors="pt", truncation=True,
+                                    max_length=512, return_offsets_mapping=True)
+            offset_mapping = inputs.pop('offset_mapping')
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Vorhersage
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            predicted_labels = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
+            # Spans extrahieren
+            spans = self._predictions_to_spans(predicted_labels, offset_mapping[0], text)
+            predictions.append({'text': text, 'spans': spans})
+        return predictions
+    def train(self, comments_df, spans_df, experiment_name):
+        wandb.init(project=os.environ["WANDB_PROJECT"], name=f"{experiment_name}",
+                   group=experiment_name)
+        # Dataset neu erstellen für diesen Fold
+        examples, eval_data = self.create_dataset(comments_df, spans_df)
+        train_examples, val_examples = train_test_split(examples, test_size=0.1, random_state=42)
+        # Evaluation-Daten entsprechend aufteilen
+        train_indices, val_indices = train_test_split(range(len(examples)), test_size=0.1, random_state=42)
+        self.current_eval_data = [eval_data[i] for i in val_indices]
+        test_comments = comments_df.iloc[val_indices].reset_index(drop=True)
+        train_dataset = Dataset.from_list(train_examples)
+        val_dataset = Dataset.from_list(val_examples)
+        # Modell neu initialisieren
+        model = AutoModelForTokenClassification.from_pretrained(
+            self.model_name,
+            num_labels=len(self.labels),
+            id2label=self.id2label,
+            label2id=self.label2id
+        )
+        # Training-Argumente
+        fold_output_dir = f"{experiment_name}"
+        training_args = TrainingArguments(
+            output_dir=fold_output_dir,
+            learning_rate=2e-5,
+            warmup_steps=500,
+            per_device_train_batch_size=32,
+            per_device_eval_batch_size=32,
+            num_train_epochs=20,
+            eval_strategy="steps",
+            eval_steps=40,
+            save_strategy="steps",
+            save_steps=40,
+            load_best_model_at_end=True,
+            metric_for_best_model="strict_f1",
+            greater_is_better=True,
+            logging_steps=10,
+            logging_strategy="steps",
+            report_to="all",
+            disable_tqdm=False,
+            seed=42,
+            save_total_limit=3,
+        )
+        # Trainer
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=train_dataset,
+            eval_dataset=val_dataset,
+            data_collator=DataCollatorForTokenClassification(self.tokenizer),
+            compute_metrics=self.compute_metrics,
+            callbacks=[EarlyStoppingCallback(early_stopping_patience=87)]
+            # 87 steps = 3.0 epochs with 29 steps per epoch
+        )
+        # Training
+        print(f"Training auf {len(train_dataset)} Beispielen")
+        print(f"Validation auf {len(val_dataset)} Beispielen")
+        trainer.train()
+        # Aktuelles Modell speichern
+        self.model = model
+        # Modell evaluieren auf Test-Daten
+        print(f"Evaluierung auf {len(test_comments)} Test-Beispielen")
+        metrics = self.evaluate_by_label(test_comments, spans_df)
+        wandb.log({
+            'strict_f1': metrics['TOTAL']['STRICT']['f1'],
+            'strict_precision': metrics['TOTAL']['STRICT']['prec'],
+            'strict_recall': metrics['TOTAL']['STRICT']['rec'],
+            'spans_f1': metrics['TOTAL']['SPANS']['f1'],
+            'types_f1': metrics['TOTAL']['TYPES']['f1']
+        })
+        # Speichere Modell
+        torch.save(model.state_dict(), f'{fold_output_dir}_model.pth')
+        torch.cuda.memory.empty_cache()
+        wandb.finish()
+        return trainer
+    def cross_validate(self, comments_df, spans_df, n_splits=5, output_dir_prefix="span-classifier-cv"):
+        """Führe n-fache Kreuzvalidierung mit StratifiedKFold durch"""
+        # Erstelle Label für Stratifizierung (basierend auf dem ersten Span types eines Kommentars)
+        strat_labels = []
+        spans_grouped = spans_df.groupby(['document', 'comment_id'])
+        for _, row in comments_df.iterrows():
+            key = (row['document'], row['comment_id'])
+            # 1 wenn Kommentar Spans hat, sonst 0
+            has_spans = spans_grouped.get_group(key).iloc[0]['type'] if key in spans_grouped.groups and len(spans_grouped.get_group(key)) > 0 else 0
+            strat_labels.append(has_spans)
+        # Erstelle StratifiedKFold
+        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
+        # Speichere Metriken für jeden Fold
+        fold_metrics = []
+        # Iteriere über Folds
+        for fold, (train_idx, test_idx) in enumerate(skf.split(range(len(comments_df)), strat_labels)):
+            if '--fold' in sys.argv:
+                fold_arg = int(sys.argv[sys.argv.index('--fold') + 1])
+                if fold + 1 != fold_arg:
+                    continue
+            wandb.init(project=os.environ["WANDB_PROJECT"], name=f"{experiment_name}-fold-{fold+1}",
+                       group=experiment_name)
+            print(f"\n{'='*50}")
+            print(f"Fold {fold+1}/{n_splits}")
+            print(f"{'='*50}")
+            # Kommentare für diesen Fold
+            train_comments = comments_df.iloc[train_idx].reset_index(drop=True)
+            test_comments = comments_df.iloc[test_idx].reset_index(drop=True)
+            # Dataset neu erstellen für diesen Fold
+            examples, eval_data = self.create_dataset(train_comments, spans_df)
+            train_examples, val_examples = train_test_split(examples, test_size=0.1, random_state=42)
+            # Evaluation-Daten entsprechend aufteilen
+            train_indices, val_indices = train_test_split(range(len(examples)), test_size=0.1, random_state=42)
+            self.current_eval_data = [eval_data[i] for i in val_indices]
+            train_dataset = Dataset.from_list(train_examples)
+            val_dataset = Dataset.from_list(val_examples)
+            # Modell neu initialisieren
+            model = AutoModelForTokenClassification.from_pretrained(
+                self.model_name,
+                num_labels=len(self.labels),
+                id2label=self.id2label,
+                label2id=self.label2id
+            )
+            # Training-Argumente
+            fold_output_dir = f"{output_dir_prefix}-fold-{fold+1}"
+            training_args = TrainingArguments(
+                output_dir=fold_output_dir,
+                learning_rate=2e-5,
+                warmup_steps=500,
+                per_device_train_batch_size=32,
+                per_device_eval_batch_size=32,
+                num_train_epochs=15,
+                eval_strategy="steps",
+                eval_steps=40,
+                save_strategy="steps",
+                save_steps=40,
+                load_best_model_at_end=True,
+                metric_for_best_model="strict_f1",
+                greater_is_better=True,
+                logging_steps=10,
+                logging_strategy="steps",
+                report_to="all",
+                disable_tqdm=False,
+                seed=42,
+                save_total_limit=3,
+            )
+            # Trainer
+            trainer = Trainer(
+                model=model,
+                args=training_args,
+                train_dataset=train_dataset,
+                eval_dataset=val_dataset,
+                data_collator=DataCollatorForTokenClassification(self.tokenizer),
+                compute_metrics=self.compute_metrics,
+                callbacks=[EarlyStoppingCallback(early_stopping_patience=87)] # 87 steps = 3.0 epochs with 29 steps per epoch
+            )
+            # Training
+            print(f"Training auf {len(train_dataset)} Beispielen")
+            print(f"Validation auf {len(val_dataset)} Beispielen")
+            trainer.train()
+            # Aktuelles Modell speichern
+            self.model = model
+            # Modell evaluieren auf Test-Daten
+            print(f"Evaluierung auf {len(test_comments)} Test-Beispielen")
+            flausch_results = self.evaluate_by_label(test_comments, spans_df)
+            # Extrahiere Hauptmetriken für fold_metrics
+            metrics = {
+                'strict_f1': flausch_results['TOTAL']['STRICT']['f1'],
+                'strict_precision': flausch_results['TOTAL']['STRICT']['prec'],
+                'strict_recall': flausch_results['TOTAL']['STRICT']['rec'],
+                'spans_f1': flausch_results['TOTAL']['SPANS']['f1'],
+                'spans_precision': flausch_results['TOTAL']['SPANS']['prec'],
+                'spans_recall': flausch_results['TOTAL']['SPANS']['rec'],
+                'types_f1': flausch_results['TOTAL']['TYPES']['f1'],
+                'types_precision': flausch_results['TOTAL']['TYPES']['prec'],
+                'types_recall': flausch_results['TOTAL']['TYPES']['rec'],
+                'full_results': flausch_results
+            }
+            fold_metrics.append(metrics)
+            wandb.log(metrics, step=fold + 1)
+            # Speichere Modell
+            torch.save(model.state_dict(), f'{fold_output_dir}_model.pth')
+            test_predictions = self.predict(test_comments['comment'].tolist())
+            # Speichere Metriken
+            with open(f"test_results.{experiment_name}.fold-{fold+1}.pkl", "wb") as p:
+                pickle.dump((train_comments, test_comments, test_predictions, train_examples, val_examples), p)
+            with open(f"scores.{experiment_name}.txt", 'a') as f:
+                f.write(f'[{time.strftime("%Y-%m-%d %H:%M:%S")}] Fold {fold+1} Ergebnisse:\n')
+                f.write(f"[{experiment_name} fold-{fold+1} {metrics}\n")
+            torch.cuda.memory.empty_cache()
+            wandb.finish()
+        # Zusammenfassung ausgeben
+        print("\n" + "="*50)
+        print("Kreuzvalidierung abgeschlossen")
+        print("="*50)
+        # Berechne Durchschnitts-Metriken
+        avg_f1 = np.mean([m['strict_f1'] for m in fold_metrics])
+        avg_precision = np.mean([m['strict_precision'] for m in fold_metrics])
+        avg_recall = np.mean([m['strict_recall'] for m in fold_metrics])
+        print(f"\nDurchschnittliche Metriken über {n_splits} Folds:")
+        print(f"Precision: {avg_precision:.10f}")
+        print(f"Recall:    {avg_recall:.10f}")
+        print(f"F1-Score:  {avg_f1:.10f}")
+        # Std-Abweichung
+        std_f1 = np.std([m['strict_f1'] for m in fold_metrics])
+        std_precision = np.std([m['strict_precision'] for m in fold_metrics])
+        std_recall = np.std([m['strict_recall'] for m in fold_metrics])
+        print(f"\nStandardabweichung über {n_splits} Folds:")
+        print(f"Precision: {std_precision:.10f}")
+        print(f"Recall:    {std_recall:.10f}")
+        print(f"F1-Score:  {std_f1:.10f}")
+        # Ergebnisse für jeden Fold ausgeben
+        for fold, metrics in enumerate(fold_metrics):
+            print(f"\nFold {fold+1} Ergebnisse:")
+            print(f"Precision: {metrics['strict_precision']:.4f}")
+            print(f"Recall:    {metrics['strict_recall']:.4f}")
+            print(f"F1-Score:  {metrics['strict_f1']:.4f}")
+        return {
+            'fold_metrics': fold_metrics,
+            'avg_metrics': {
+                'strict_f1': avg_f1,
+                'strict_precision': avg_precision,
+                'strict_recall': avg_recall
+            },
+            'std_metrics': {
+                'strict_f1': std_f1,
+                'strict_precision': std_precision,
+                'strict_recall': std_recall
+            }
+        }
+# Daten laden
+comments: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/comments.csv")
+task1: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/task1.csv")
+task2: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/task2.csv")
+comments = comments.merge(task1, on=["document", "comment_id"])
+test_data: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/test data/comments.csv")
+# Wähle Teilmenge der Daten für Experiment (z.B. 17000 Kommentare)
+experiment_data = comments
+# Klassifikator mit Strict F1
+classifier = SpanClassifierWithStrictF1('xlm-roberta-large')
+# 5-fold Cross-Validation durchführen
+cv_results = classifier.cross_validate(
+    experiment_data,
+    task2,
+    n_splits=5,
+    output_dir_prefix=experiment_name
+)
+# write results to text file
+with open(f"scores.{experiment_name}.txt", 'a') as f:
+    f.write(f'[{time.strftime("%Y-%m-%d %H:%M:%S")}] KFold cross validation of {experiment_name}\n')
+    f.write(f'{cv_results}\n')
+# Optional: Finales Modell auf allen Daten trainieren
+trainer = classifier.train(experiment_data, task2, f'{experiment_name}-final')
+torch.save(classifier.model.state_dict(), f'{experiment_name}_final_model.pth')
+# Test-Vorhersage mit finalem Modell
+test_texts = ["Das ist ein toller Kommentar!", "Schlechter Text hier.",
+              "Sehr gutes Video. Danke! Ich finde Dich echt toll!", "Du bist doof!", "Das Licht ist echt gut.",
+              "Team Einhorn", "Macht unbedingt weiter so!", "Das sehe ich ganz genauso.", "Stimmt, Du hast vollkommen Recht!",
+              "Ich bin so dankbar ein #Lochinator zu sein"]
+predictions = classifier.predict(test_texts)
+for pred in predictions:
+    print(f"\nText: {pred['text']}")
+    for span in pred['spans']:
+        print(f"  Span: '{span['text']}' ({span['start']}-{span['end']}) - {span['type']}")

subtask_2/exp027-2_retraining.py ADDED Viewed

	@@ -0,0 +1,736 @@

+import os
+import pickle
+import sys
+import time
+import numpy as np
+import pandas as pd
+import torch
+import wandb
+from datasets import Dataset
+from multiset import *
+from sklearn.model_selection import train_test_split, StratifiedKFold
+from transformers import (
+    AutoTokenizer,
+    AutoModelForTokenClassification,
+    TrainingArguments,
+    Trainer,
+    DataCollatorForTokenClassification,
+    EarlyStoppingCallback
+)
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = '1'
+os.environ["WANDB_PROJECT"]="GermEval2025-Substask2"
+os.environ["WANDB_LOG_MODEL"]="false"
+experiment_name = 'exp027-2_retraining'
+ALL_LABELS = ["affection declaration","agreement","ambiguous",
+              "compliment","encouragement","gratitude","group membership",
+              "implicit","positive feedback","sympathy"]
+def fine_grained_flausch_by_label(gold, predicted):
+    gold['cid']= gold['document']+"_"+gold['comment_id'].apply(str)
+    predicted['cid']= predicted['document']+"_"+predicted['comment_id'].apply(str)
+    # annotation sets (predicted)
+    pred_spans = Multiset()
+    pred_spans_loose = Multiset()
+    pred_types = Multiset()
+    # annotation sets (gold)
+    gold_spans = Multiset()
+    gold_spans_loose = Multiset()
+    gold_types = Multiset()
+    for row in predicted.itertuples(index=False):
+        pred_spans.add((row.cid,row.type,row.start,row.end))
+        pred_spans_loose.add((row.cid,row.start,row.end))
+        pred_types.add((row.cid,row.type))
+    for row in gold.itertuples(index=False):
+        gold_spans.add((row.cid,row.type,row.start,row.end))
+        gold_spans_loose.add((row.cid,row.start,row.end))
+        gold_types.add((row.cid,row.type))
+    # precision = true_pos / true_pos + false_pos
+    # recall = true_pos / true_pos + false_neg
+    # f_1 = 2 * prec * rec / (prec + rec)
+    results = {'TOTAL': {'STRICT': {},'SPANS': {},'TYPES': {}}}
+    # label-wise evaluation (only for strict and type)
+    for label in ALL_LABELS:
+        results[label] = {'STRICT': {},'TYPES': {}}
+        gold_spans_x = set(filter(lambda x: x[1].__eq__(label), gold_spans))
+        pred_spans_x = set(filter(lambda x: x[1].__eq__(label), pred_spans))
+        gold_types_x = set(filter(lambda x: x[1].__eq__(label), gold_types))
+        pred_types_x = set(filter(lambda x: x[1].__eq__(label), pred_types))
+        # strict: spans + type must match
+        ### NOTE: x and y / x returns 0 if x = 0 and y/x otherwise (test for zero division)
+        strict_p = float(len(pred_spans_x)) and float( len(gold_spans_x.intersection(pred_spans_x))) / len(pred_spans_x)
+        strict_r = float(len(gold_spans_x)) and float( len(gold_spans_x.intersection(pred_spans_x))) / len(gold_spans_x)
+        strict_f = (strict_p + strict_r) and 2 * strict_p * strict_r / (strict_p + strict_r)
+        results[label]['STRICT']['prec'] = strict_p
+        results[label]['STRICT']['rec'] = strict_r
+        results[label]['STRICT']['f1'] = strict_f
+        # detection mode: only types must match (per post)
+        types_p = float(len(pred_types_x)) and float( len(gold_types_x.intersection(pred_types_x))) / len(pred_types_x)
+        types_r = float(len(gold_types_x)) and float( len(gold_types_x.intersection(pred_types_x))) / len(gold_types_x)
+        types_f = (types_p + types_r) and 2 * types_p * types_r / (types_p + types_r)
+        results[label]['TYPES']['prec'] = types_p
+        results[label]['TYPES']['rec'] = types_r
+        results[label]['TYPES']['f1'] = types_f
+    # Overall evaluation
+    # strict: spans + type must match
+    strict_p = float(len(pred_spans)) and float( len(gold_spans.intersection(pred_spans))) / len(pred_spans)
+    strict_r = float(len(gold_spans)) and float( len(gold_spans.intersection(pred_spans))) / len(gold_spans)
+    strict_f = (strict_p + strict_r) and 2 * strict_p * strict_r / (strict_p + strict_r)
+    results['TOTAL']['STRICT']['prec'] = strict_p
+    results['TOTAL']['STRICT']['rec'] = strict_r
+    results['TOTAL']['STRICT']['f1'] = strict_f
+    # spans: spans must match
+    spans_p = float(len(pred_spans_loose)) and float( len(gold_spans_loose.intersection(pred_spans_loose))) / len(pred_spans_loose)
+    spans_r = float(len(gold_spans_loose)) and float( len(gold_spans_loose.intersection(pred_spans_loose))) / len(gold_spans_loose)
+    spans_f = (spans_p + spans_r) and 2 * spans_p * spans_r / (spans_p + spans_r)
+    results['TOTAL']['SPANS']['prec'] = spans_p
+    results['TOTAL']['SPANS']['rec'] = spans_r
+    results['TOTAL']['SPANS']['f1'] = spans_f
+    # detection mode: only types must match (per post)
+    types_p = float(len(pred_types)) and float( len(gold_types.intersection(pred_types))) / len(pred_types)
+    types_r = float(len(gold_types)) and float( len(gold_types.intersection(pred_types))) / len(gold_types)
+    types_f = (types_p + types_r) and 2 * types_p * types_r / (types_p + types_r)
+    results['TOTAL']['TYPES']['prec'] = types_p
+    results['TOTAL']['TYPES']['rec'] = types_r
+    results['TOTAL']['TYPES']['f1'] = types_f
+    return results
+class SpanClassifierWithStrictF1:
+    def __init__(self, model_name="deepset/gbert-base"):
+        self.model_name = model_name
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
+        self.labels =[
+            "O",
+            "B-positive feedback", "B-compliment", "B-affection declaration", "B-encouragement", "B-gratitude", "B-agreement", "B-ambiguous", "B-implicit", "B-group membership", "B-sympathy",
+            "I-positive feedback", "I-compliment", "I-affection declaration", "I-encouragement", "I-gratitude", "I-agreement", "I-ambiguous", "I-implicit", "I-group membership", "I-sympathy"
+        ]
+        self.label2id = {label: i for i, label in enumerate(self.labels)}
+        self.id2label = {i: label for i, label in enumerate(self.labels)}
+    def create_dataset(self, comments_df, spans_df):
+        """Erstelle Dataset mit BIO-Labels und speichere Evaluation-Daten"""
+        examples = []
+        eval_data = []  # Für Strict F1 Berechnung
+        spans_grouped = spans_df.groupby(['document', 'comment_id'])
+        for _, row in comments_df.iterrows():
+            text = row['comment']
+            document = row['document']
+            comment_id = row['comment_id']
+            key = (document, comment_id)
+            # True spans für diesen Kommentar
+            if key in spans_grouped.groups:
+                true_spans = [(span_type, int(start), int(end))
+                              for span_type, start, end in
+                              spans_grouped.get_group(key)[['type', 'start', 'end']].values]
+            else:
+                true_spans = []
+            # Tokenisierung
+            tokenized = self.tokenizer(text, truncation=True, max_length=512,
+                                       return_offsets_mapping=True)
+            # BIO-Labels erstellen
+            labels = self._create_bio_labels(tokenized['offset_mapping'],
+                                             spans_grouped.get_group(key)[['start', 'end', 'type']].values
+                                             if key in spans_grouped.groups else [])
+            examples.append({
+                'input_ids': tokenized['input_ids'],
+                'attention_mask': tokenized['attention_mask'],
+                'labels': labels
+            })
+            # Evaluation-Daten speichern
+            eval_data.append({
+                'text': text,
+                'offset_mapping': tokenized['offset_mapping'],
+                'true_spans': true_spans,
+                'document': document,
+                'comment_id': comment_id
+            })
+        return examples, eval_data
+    def _create_bio_labels(self, offset_mapping, spans):
+        """Erstelle BIO-Labels für Tokens"""
+        labels = [0] * len(offset_mapping)  # 0 = "O"
+        for start, end, type_label in spans:
+            for i, (token_start, token_end) in enumerate(offset_mapping):
+                if token_start is None:  # Spezielle Tokens
+                    continue
+                # Token überlappt mit Span
+                if token_start < end and token_end > start:
+                    if token_start <= start:
+                        if labels[i] != 0:
+                            # dont overwrite labels if spans are overlapping; just skip the span
+                            break
+                        labels[i] = self.label2id[f'B-{type_label}'] # B-compliment
+                    else:
+                        labels[i] = self.label2id[f'I-{type_label}'] # I-compliment
+        return labels
+    def _predictions_to_dataframe(self, predictions_list, comments_df_subset):
+        """Konvertiere Vorhersagen zu DataFrame für Flausch-Metrik"""
+        pred_data = []
+        for i, pred in enumerate(predictions_list):
+            if i < len(comments_df_subset):
+                row = comments_df_subset.iloc[i]
+                document = row['document']
+                comment_id = row['comment_id']
+                for span in pred['spans']:
+                    pred_data.append({
+                        'document': document,
+                        'comment_id': comment_id,
+                        'type': span['type'],
+                        'start': span['start'],
+                        'end': span['end']
+                    })
+        return pd.DataFrame(pred_data)
+    # --- helper that builds a DataFrame of spans from eval data + predictions ---
+    def _build_span_dfs(self, eval_data, batch_pred_spans):
+        """
+        eval_data: list of dicts with keys document, comment_id, true_spans
+        batch_pred_spans: list of lists of (type, start, end)
+        returns (gold_df, pred_df) suitable for fine_grained_flausch_by_label
+        """
+        rows_gold = []
+        rows_pred = []
+        for item, pred_spans in zip(eval_data, batch_pred_spans):
+            doc = item['document']
+            cid = item['comment_id']
+            # gold
+            for t, s, e in item['true_spans']:
+                rows_gold.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': t,
+                    'start': s,
+                    'end':   e
+                })
+            # pred
+            for t, s, e in pred_spans:
+                rows_pred.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': t,
+                    'start': s,
+                    'end':   e
+                })
+        gold_df = pd.DataFrame(rows_gold, columns=['document','comment_id','type','start','end'])
+        pred_df = pd.DataFrame(rows_pred, columns=['document','comment_id','type','start','end'])
+        return gold_df, pred_df
+    def compute_metrics(self, eval_pred):
+        """
+        Called by the HF-Trainer at each evaluation step.
+        We collect batch predictions, reconstruct gold/pred spans,
+        call fine_grained_flausch_by_label and return the TOTAL/STRICT metrics.
+        """
+        logits, labels = eval_pred
+        preds = np.argmax(logits, axis=2)
+        # reconstruct spans per example in this batch
+        batch_pred_spans = []
+        for i, (p_seq, lab_seq) in enumerate(zip(preds, labels)):
+            # skip padding (-100)
+            valid_preds = []
+            valid_offsets = []
+            offsets = self.current_eval_data[i]['offset_mapping']
+            for j,(p,l) in enumerate(zip(p_seq, lab_seq)):
+                if l != -100:
+                    valid_preds.append(int(p))
+                    valid_offsets.append(offsets[j])
+            # convert to spans
+            pred_spans = self._predictions_to_spans(valid_preds, valid_offsets,
+                                                    self.current_eval_data[i]['text'])
+            # to (type, start, end)-tuples
+            batch_pred_spans.append([(sp['type'], sp['start'], sp['end'])
+                                     for sp in pred_spans])
+        # build the gold/pred DataFrames
+        gold_df, pred_df = self._build_span_dfs(self.current_eval_data,
+                                                batch_pred_spans)
+        # call your fine-grained metrics
+        results = fine_grained_flausch_by_label(gold_df, pred_df)
+        # extract the TOTAL/STRICT metrics
+        total = results['TOTAL']['STRICT']
+        return {
+            'strict_prec': torch.tensor(total['prec'], dtype=torch.float32),
+            'strict_rec':  torch.tensor(total['rec'],  dtype=torch.float32),
+            'strict_f1':   torch.tensor(total['f1'],   dtype=torch.float32),
+        }
+    def evaluate_by_label(self, comments_df, spans_df):
+        """
+        Replace evaluate_strict_f1. Runs a full pass over all comments,
+        uses self.predict() to get spans, then calls your fine_grained_flausch_by_label
+        and prints & returns the TOTAL metrics.
+        """
+        # 1) run predictions
+        texts = comments_df['comment'].tolist()
+        docs =  comments_df['document'].tolist()
+        cids =  comments_df['comment_id'].tolist()
+        preds = self.predict(texts)
+        # 2) build gold and pred lists
+        gold_rows = []
+        for (_, row) in comments_df.iterrows():
+            key = (row['document'], row['comment_id'])
+            # get all true spans for this comment_id
+            group = spans_df[
+                (spans_df.document==row['document']) &
+                (spans_df.comment_id==row['comment_id'])
+            ]
+            for _, sp in group.iterrows():
+                gold_rows.append({
+                    'document': row['document'],
+                    'comment_id': row['comment_id'],
+                    'type': sp['type'],
+                    'start': sp['start'],
+                    'end': sp['end']
+                })
+        pred_rows = []
+        for doc, cid, p in zip(docs, cids, preds):
+            for sp in p['spans']:
+                pred_rows.append({
+                    'document': doc,
+                    'comment_id': cid,
+                    'type': sp['type'],
+                    'start': sp['start'],
+                    'end': sp['end']
+                })
+        gold_df = pd.DataFrame(gold_rows, columns=['document','comment_id','type','start','end'])
+        pred_df = pd.DataFrame(pred_rows, columns=['document','comment_id','type','start','end'])
+        # 3) call fine-grained
+        results = fine_grained_flausch_by_label(gold_df, pred_df)
+        # 4) extract and print
+        total = results['TOTAL']
+        print("\n=== EVALUATION BY FLAUSCH METRICS ===")
+        for mode in ['STRICT','SPANS','TYPES']:
+            m = total[mode]
+            print(f"{mode:6}  P={m['prec']:.4f}  R={m['rec']:.4f}  F1={m['f1']:.4f}")
+        return results
+    def _predictions_to_spans(self, predicted_labels, offset_mapping, text):
+        """Konvertiere Token-Vorhersagen zu Spans"""
+        spans = []
+        current_span = None
+        for i, label_id in enumerate(predicted_labels):
+            if i >= len(offset_mapping):
+                break
+            label = self.id2label[label_id]
+            token_start, token_end = offset_mapping[i]
+            if token_start is None:
+                continue
+            if label.startswith('B-'):
+                if current_span:
+                    spans.append(current_span)
+                current_span = {
+                    'type': label[2:],
+                    'start': token_start,
+                    'end': token_end,
+                    'text': text[token_start:token_end]
+                }
+            elif label.startswith('I-') and current_span:
+                current_span['end'] = token_end
+                current_span['text'] = text[current_span['start']:current_span['end']]
+            else:
+                if current_span:
+                    spans.append(current_span)
+                    current_span = None
+        if current_span:
+            spans.append(current_span)
+        return spans
+    def predict(self, texts):
+        """Vorhersage für neue Texte"""
+        if not hasattr(self, 'model'):
+            raise ValueError("Modell muss erst trainiert werden!")
+        predictions = []
+        device = next(self.model.parameters()).device
+        for text in texts:
+            # Tokenisierung
+            inputs = self.tokenizer(text, return_tensors="pt", truncation=True,
+                                    max_length=512, return_offsets_mapping=True)
+            offset_mapping = inputs.pop('offset_mapping')
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            # Vorhersage
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            predicted_labels = torch.argmax(outputs.logits, dim=2)[0].cpu().numpy()
+            # Spans extrahieren
+            spans = self._predictions_to_spans(predicted_labels, offset_mapping[0], text)
+            predictions.append({'text': text, 'spans': spans})
+        return predictions
+    def train(self, comments_df, spans_df, experiment_name):
+        wandb.init(project=os.environ["WANDB_PROJECT"], name=f"{experiment_name}",
+                   group=experiment_name)
+        # Dataset neu erstellen für diesen Fold
+        examples, eval_data = self.create_dataset(comments_df, spans_df)
+        train_examples, val_examples = train_test_split(examples, test_size=0.1, random_state=42)
+        # Evaluation-Daten entsprechend aufteilen
+        train_indices, val_indices = train_test_split(range(len(examples)), test_size=0.1, random_state=42)
+        self.current_eval_data = [eval_data[i] for i in val_indices]
+        test_comments = comments_df.iloc[val_indices].reset_index(drop=True)
+        train_dataset = Dataset.from_list(train_examples)
+        val_dataset = Dataset.from_list(val_examples)
+        # Modell neu initialisieren
+        model = AutoModelForTokenClassification.from_pretrained(
+            self.model_name,
+            num_labels=len(self.labels),
+            id2label=self.id2label,
+            label2id=self.label2id
+        )
+        # Training-Argumente
+        fold_output_dir = f"{experiment_name}"
+        training_args = TrainingArguments(
+            output_dir=fold_output_dir,
+            learning_rate=2e-5,
+            warmup_steps=500,
+            per_device_train_batch_size=32,
+            per_device_eval_batch_size=32,
+            num_train_epochs=20,
+            eval_strategy="steps",
+            eval_steps=40,
+            save_strategy="steps",
+            save_steps=40,
+            load_best_model_at_end=True,
+            metric_for_best_model="strict_f1",
+            greater_is_better=True,
+            logging_steps=10,
+            logging_strategy="steps",
+            report_to="all",
+            disable_tqdm=False,
+            seed=42,
+            save_total_limit=3,
+        )
+        # Trainer
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=train_dataset,
+            eval_dataset=val_dataset,
+            data_collator=DataCollatorForTokenClassification(self.tokenizer),
+            compute_metrics=self.compute_metrics,
+            callbacks=[EarlyStoppingCallback(early_stopping_patience=87)]
+            # 87 steps = 3.0 epochs with 29 steps per epoch
+        )
+        # Training
+        print(f"Training auf {len(train_dataset)} Beispielen")
+        print(f"Validation auf {len(val_dataset)} Beispielen")
+        trainer.train()
+        # Aktuelles Modell speichern
+        self.model = model
+        # Modell evaluieren auf Test-Daten
+        print(f"Evaluierung auf {len(test_comments)} Test-Beispielen")
+        metrics = self.evaluate_by_label(test_comments, spans_df)
+        wandb.log({
+            'strict_f1': metrics['TOTAL']['STRICT']['f1'],
+            'strict_precision': metrics['TOTAL']['STRICT']['prec'],
+            'strict_recall': metrics['TOTAL']['STRICT']['rec'],
+            'spans_f1': metrics['TOTAL']['SPANS']['f1'],
+            'types_f1': metrics['TOTAL']['TYPES']['f1']
+        })
+        # Speichere Modell
+        torch.save(model.state_dict(), f'{fold_output_dir}_model.pth')
+        torch.cuda.memory.empty_cache()
+        wandb.finish()
+        return trainer
+    def cross_validate(self, comments_df, spans_df, n_splits=5, output_dir_prefix="span-classifier-cv"):
+        """Führe n-fache Kreuzvalidierung mit StratifiedKFold durch"""
+        # Erstelle Label für Stratifizierung (basierend auf dem ersten Span types eines Kommentars)
+        strat_labels = []
+        spans_grouped = spans_df.groupby(['document', 'comment_id'])
+        for _, row in comments_df.iterrows():
+            key = (row['document'], row['comment_id'])
+            # 1 wenn Kommentar Spans hat, sonst 0
+            has_spans = spans_grouped.get_group(key).iloc[0]['type'] if key in spans_grouped.groups and len(spans_grouped.get_group(key)) > 0 else 0
+            strat_labels.append(has_spans)
+        # Erstelle StratifiedKFold
+        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
+        # Speichere Metriken für jeden Fold
+        fold_metrics = []
+        # Iteriere über Folds
+        for fold, (train_idx, test_idx) in enumerate(skf.split(range(len(comments_df)), strat_labels)):
+            if '--fold' in sys.argv:
+                fold_arg = int(sys.argv[sys.argv.index('--fold') + 1])
+                if fold + 1 != fold_arg:
+                    continue
+            wandb.init(project=os.environ["WANDB_PROJECT"], name=f"{experiment_name}-fold-{fold+1}",
+                       group=experiment_name)
+            print(f"\n{'='*50}")
+            print(f"Fold {fold+1}/{n_splits}")
+            print(f"{'='*50}")
+            # Kommentare für diesen Fold
+            train_comments = comments_df.iloc[train_idx].reset_index(drop=True)
+            test_comments = comments_df.iloc[test_idx].reset_index(drop=True)
+            # Dataset neu erstellen für diesen Fold
+            examples, eval_data = self.create_dataset(train_comments, spans_df)
+            train_examples, val_examples = train_test_split(examples, test_size=0.1, random_state=42)
+            # Evaluation-Daten entsprechend aufteilen
+            train_indices, val_indices = train_test_split(range(len(examples)), test_size=0.1, random_state=42)
+            self.current_eval_data = [eval_data[i] for i in val_indices]
+            train_dataset = Dataset.from_list(train_examples)
+            val_dataset = Dataset.from_list(val_examples)
+            # Modell neu initialisieren
+            model = AutoModelForTokenClassification.from_pretrained(
+                self.model_name,
+                num_labels=len(self.labels),
+                id2label=self.id2label,
+                label2id=self.label2id
+            )
+            # Training-Argumente
+            fold_output_dir = f"{output_dir_prefix}-fold-{fold+1}"
+            training_args = TrainingArguments(
+                output_dir=fold_output_dir,
+                learning_rate=2e-5,
+                warmup_steps=500,
+                per_device_train_batch_size=32,
+                per_device_eval_batch_size=32,
+                num_train_epochs=15,
+                eval_strategy="steps",
+                eval_steps=40,
+                save_strategy="steps",
+                save_steps=40,
+                load_best_model_at_end=True,
+                metric_for_best_model="strict_f1",
+                greater_is_better=True,
+                logging_steps=10,
+                logging_strategy="steps",
+                report_to="all",
+                disable_tqdm=False,
+                seed=42,
+                save_total_limit=3,
+            )
+            # Trainer
+            trainer = Trainer(
+                model=model,
+                args=training_args,
+                train_dataset=train_dataset,
+                eval_dataset=val_dataset,
+                data_collator=DataCollatorForTokenClassification(self.tokenizer),
+                compute_metrics=self.compute_metrics,
+                callbacks=[EarlyStoppingCallback(early_stopping_patience=87)] # 87 steps = 3.0 epochs with 29 steps per epoch
+            )
+            # Training
+            print(f"Training auf {len(train_dataset)} Beispielen")
+            print(f"Validation auf {len(val_dataset)} Beispielen")
+            trainer.train()
+            # Aktuelles Modell speichern
+            self.model = model
+            # Modell evaluieren auf Test-Daten
+            print(f"Evaluierung auf {len(test_comments)} Test-Beispielen")
+            flausch_results = self.evaluate_by_label(test_comments, spans_df)
+            # Extrahiere Hauptmetriken für fold_metrics
+            metrics = {
+                'strict_f1': flausch_results['TOTAL']['STRICT']['f1'],
+                'strict_precision': flausch_results['TOTAL']['STRICT']['prec'],
+                'strict_recall': flausch_results['TOTAL']['STRICT']['rec'],
+                'spans_f1': flausch_results['TOTAL']['SPANS']['f1'],
+                'spans_precision': flausch_results['TOTAL']['SPANS']['prec'],
+                'spans_recall': flausch_results['TOTAL']['SPANS']['rec'],
+                'types_f1': flausch_results['TOTAL']['TYPES']['f1'],
+                'types_precision': flausch_results['TOTAL']['TYPES']['prec'],
+                'types_recall': flausch_results['TOTAL']['TYPES']['rec'],
+                'full_results': flausch_results
+            }
+            fold_metrics.append(metrics)
+            wandb.log(metrics, step=fold + 1)
+            # Speichere Modell
+            torch.save(model.state_dict(), f'{fold_output_dir}_model.pth')
+            test_predictions = self.predict(test_comments['comment'].tolist())
+            # Speichere Metriken
+            with open(f"test_results.{experiment_name}.fold-{fold+1}.pkl", "wb") as p:
+                pickle.dump((train_comments, test_comments, test_predictions, train_examples, val_examples), p)
+            with open(f"scores.{experiment_name}.txt", 'a') as f:
+                f.write(f'[{time.strftime("%Y-%m-%d %H:%M:%S")}] Fold {fold+1} Ergebnisse:\n')
+                f.write(f"[{experiment_name} fold-{fold+1} {metrics}\n")
+            torch.cuda.memory.empty_cache()
+            wandb.finish()
+        # Zusammenfassung ausgeben
+        print("\n" + "="*50)
+        print("Kreuzvalidierung abgeschlossen")
+        print("="*50)
+        # Berechne Durchschnitts-Metriken
+        avg_f1 = np.mean([m['strict_f1'] for m in fold_metrics])
+        avg_precision = np.mean([m['strict_precision'] for m in fold_metrics])
+        avg_recall = np.mean([m['strict_recall'] for m in fold_metrics])
+        print(f"\nDurchschnittliche Metriken über {n_splits} Folds:")
+        print(f"Precision: {avg_precision:.10f}")
+        print(f"Recall:    {avg_recall:.10f}")
+        print(f"F1-Score:  {avg_f1:.10f}")
+        # Std-Abweichung
+        std_f1 = np.std([m['strict_f1'] for m in fold_metrics])
+        std_precision = np.std([m['strict_precision'] for m in fold_metrics])
+        std_recall = np.std([m['strict_recall'] for m in fold_metrics])
+        print(f"\nStandardabweichung über {n_splits} Folds:")
+        print(f"Precision: {std_precision:.10f}")
+        print(f"Recall:    {std_recall:.10f}")
+        print(f"F1-Score:  {std_f1:.10f}")
+        # Ergebnisse für jeden Fold ausgeben
+        for fold, metrics in enumerate(fold_metrics):
+            print(f"\nFold {fold+1} Ergebnisse:")
+            print(f"Precision: {metrics['strict_precision']:.4f}")
+            print(f"Recall:    {metrics['strict_recall']:.4f}")
+            print(f"F1-Score:  {metrics['strict_f1']:.4f}")
+        return {
+            'fold_metrics': fold_metrics,
+            'avg_metrics': {
+                'strict_f1': avg_f1,
+                'strict_precision': avg_precision,
+                'strict_recall': avg_recall
+            },
+            'std_metrics': {
+                'strict_f1': std_f1,
+                'strict_precision': std_precision,
+                'strict_recall': std_recall
+            }
+        }
+# Daten laden
+comments: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/comments.csv")
+task1: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/task1.csv")
+task2: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/training data/task2.csv")
+comments = comments.merge(task1, on=["document", "comment_id"])
+test_data: pd.DataFrame = pd.read_csv("../../share-GermEval2025-data/Data/test data/comments.csv")
+# Wähle Teilmenge der Daten für Experiment (z.B. 17000 Kommentare)
+experiment_data = comments
+# Klassifikator mit Strict F1
+classifier = SpanClassifierWithStrictF1('xlm-roberta-large')
+# 5-fold Cross-Validation durchführen
+#cv_results = classifier.cross_validate(
+#    experiment_data,
+#    task2,
+#    n_splits=5,
+#    output_dir_prefix=experiment_name
+#)
+#
+## write results to text file
+#with open(f"scores.{experiment_name}.txt", 'a') as f:
+#    f.write(f'[{time.strftime("%Y-%m-%d %H:%M:%S")}] KFold cross validation of {experiment_name}\n')
+#    f.write(f'{cv_results}\n')
+# Optional: Finales Modell auf allen Daten trainieren
+trainer = classifier.train(experiment_data, task2, f'{experiment_name}-final')
+torch.save(classifier.model.state_dict(), f'{experiment_name}_final_model.pth')
+# Test-Vorhersage mit finalem Modell
+test_texts = ["Das ist ein toller Kommentar!", "Schlechter Text hier.",
+              "Sehr gutes Video. Danke! Ich finde Dich echt toll!", "Du bist doof!", "Das Licht ist echt gut.",
+              "Team Einhorn", "Macht unbedingt weiter so!", "Das sehe ich ganz genauso.", "Stimmt, Du hast vollkommen Recht!",
+              "Ich bin so dankbar ein #Lochinator zu sein"]
+predictions = classifier.predict(test_texts)
+for pred in predictions:
+    print(f"\nText: {pred['text']}")
+    for span in pred['spans']:
+        print(f"  Span: '{span['text']}' ({span['start']}-{span['end']}) - {span['type']}")

subtask_2/submission_subtask2-2.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

subtask_2/submission_subtask2.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff