Updated API submission
Browse files- .gitignore +17 -0
- Dockerfile +16 -0
- app.py +27 -0
- requirements.txt +7 -0
- tasks/__init__.py +0 -0
- tasks/audio.py +23 -0
- tasks/image.py +23 -0
- tasks/text.py +73 -0
- tasks/utils/__init__.py +0 -0
- tasks/utils/emissions.py +28 -0
- tasks/utils/evaluation.py +19 -0
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1,17 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            .ipynb_checkpoints/sandbox-checkpoint.ipynb
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            auto_evals/
         | 
| 4 | 
            +
            venv/
         | 
| 5 | 
            +
            __pycache__/
         | 
| 6 | 
            +
            .env
         | 
| 7 | 
            +
            .ipynb_checkpoints
         | 
| 8 | 
            +
            *ipynb
         | 
| 9 | 
            +
            .vscode/
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            eval-queue/
         | 
| 12 | 
            +
            eval-results/
         | 
| 13 | 
            +
            eval-queue-bk/
         | 
| 14 | 
            +
            eval-results-bk/
         | 
| 15 | 
            +
            logs/
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            emissions.csv
         | 
    	
        Dockerfile
    ADDED
    
    | @@ -0,0 +1,16 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
         | 
| 2 | 
            +
            # you will also find guides on how best to write your Dockerfile
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            FROM python:3.9
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            RUN useradd -m -u 1000 user
         | 
| 7 | 
            +
            USER user
         | 
| 8 | 
            +
            ENV PATH="/home/user/.local/bin:$PATH"
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            WORKDIR /app
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            COPY --chown=user ./requirements.txt requirements.txt
         | 
| 13 | 
            +
            RUN pip install --no-cache-dir --upgrade -r requirements.txt
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            COPY --chown=user . /app
         | 
| 16 | 
            +
            CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
         | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,27 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from fastapi import FastAPI
         | 
| 2 | 
            +
            from dotenv import load_dotenv
         | 
| 3 | 
            +
            from tasks import text, image, audio
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            # Load environment variables
         | 
| 6 | 
            +
            load_dotenv()
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            app = FastAPI(
         | 
| 9 | 
            +
                title="Frugal AI Challenge API",
         | 
| 10 | 
            +
                description="API for the Frugal AI Challenge evaluation endpoints"
         | 
| 11 | 
            +
            )
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # Include all routers
         | 
| 14 | 
            +
            app.include_router(text.router)
         | 
| 15 | 
            +
            app.include_router(image.router)
         | 
| 16 | 
            +
            app.include_router(audio.router)
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            @app.get("/")
         | 
| 19 | 
            +
            async def root():
         | 
| 20 | 
            +
                return {
         | 
| 21 | 
            +
                    "message": "Welcome to the Frugal AI Challenge API",
         | 
| 22 | 
            +
                    "endpoints": {
         | 
| 23 | 
            +
                        "text": "/text - Text classification task",
         | 
| 24 | 
            +
                        "image": "/image - Image classification task (coming soon)",
         | 
| 25 | 
            +
                        "audio": "/audio - Audio classification task (coming soon)"
         | 
| 26 | 
            +
                    }
         | 
| 27 | 
            +
                } 
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            fastapi>=0.68.0
         | 
| 2 | 
            +
            uvicorn>=0.15.0
         | 
| 3 | 
            +
            codecarbon>=2.3.1
         | 
| 4 | 
            +
            datasets>=2.14.0
         | 
| 5 | 
            +
            scikit-learn>=1.0.2
         | 
| 6 | 
            +
            pydantic>=1.10.0
         | 
| 7 | 
            +
            python-dotenv>=1.0.0
         | 
    	
        tasks/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        tasks/audio.py
    ADDED
    
    | @@ -0,0 +1,23 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from fastapi import APIRouter
         | 
| 2 | 
            +
            from .utils.evaluation import AudioEvaluationRequest
         | 
| 3 | 
            +
            from .utils.emissions import get_space_info
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            router = APIRouter()
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            @router.post("/audio", tags=["Audio Task"])
         | 
| 8 | 
            +
            async def evaluate_audio(request: AudioEvaluationRequest):
         | 
| 9 | 
            +
                """
         | 
| 10 | 
            +
                Placeholder for audio task evaluation.
         | 
| 11 | 
            +
                """
         | 
| 12 | 
            +
                username, space_url = get_space_info()
         | 
| 13 | 
            +
                return {
         | 
| 14 | 
            +
                    "message": "Audio evaluation endpoint not yet implemented",
         | 
| 15 | 
            +
                    "username": username,
         | 
| 16 | 
            +
                    "space_url": space_url,
         | 
| 17 | 
            +
                    "received_config": {
         | 
| 18 | 
            +
                        "dataset_name": request.dataset_name,
         | 
| 19 | 
            +
                        "test_size": request.test_size,
         | 
| 20 | 
            +
                        "test_seed": request.test_seed,
         | 
| 21 | 
            +
                        "model_description": request.model_description
         | 
| 22 | 
            +
                    }
         | 
| 23 | 
            +
                } 
         | 
    	
        tasks/image.py
    ADDED
    
    | @@ -0,0 +1,23 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from fastapi import APIRouter
         | 
| 2 | 
            +
            from .utils.evaluation import ImageEvaluationRequest
         | 
| 3 | 
            +
            from .utils.emissions import get_space_info
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            router = APIRouter()
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            @router.post("/image", tags=["Image Task"])
         | 
| 8 | 
            +
            async def evaluate_image(request: ImageEvaluationRequest):
         | 
| 9 | 
            +
                """
         | 
| 10 | 
            +
                Placeholder for image task evaluation.
         | 
| 11 | 
            +
                """
         | 
| 12 | 
            +
                username, space_url = get_space_info()
         | 
| 13 | 
            +
                return {
         | 
| 14 | 
            +
                    "message": "Image evaluation endpoint not yet implemented",
         | 
| 15 | 
            +
                    "username": username,
         | 
| 16 | 
            +
                    "space_url": space_url,
         | 
| 17 | 
            +
                    "received_config": {
         | 
| 18 | 
            +
                        "dataset_name": request.dataset_name,
         | 
| 19 | 
            +
                        "test_size": request.test_size,
         | 
| 20 | 
            +
                        "test_seed": request.test_seed,
         | 
| 21 | 
            +
                        "model_description": request.model_description
         | 
| 22 | 
            +
                    }
         | 
| 23 | 
            +
                } 
         | 
    	
        tasks/text.py
    ADDED
    
    | @@ -0,0 +1,73 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from fastapi import APIRouter
         | 
| 2 | 
            +
            from datetime import datetime
         | 
| 3 | 
            +
            from datasets import load_dataset
         | 
| 4 | 
            +
            from sklearn.metrics import accuracy_score
         | 
| 5 | 
            +
            import random
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            from .utils.evaluation import TextEvaluationRequest
         | 
| 8 | 
            +
            from .utils.emissions import tracker, clean_emissions_data, get_space_info
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            router = APIRouter()
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            @router.post("/text", tags=["Text Task"])
         | 
| 13 | 
            +
            async def evaluate_text(request: TextEvaluationRequest):
         | 
| 14 | 
            +
                """
         | 
| 15 | 
            +
                Evaluate a text classification model for climate disinformation detection.
         | 
| 16 | 
            +
                """
         | 
| 17 | 
            +
                # Get space info
         | 
| 18 | 
            +
                username, space_url = get_space_info()
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                # Define the label mapping
         | 
| 21 | 
            +
                LABEL_MAPPING = {
         | 
| 22 | 
            +
                    "0_not_relevant": 0,
         | 
| 23 | 
            +
                    "1_not_happening": 1,
         | 
| 24 | 
            +
                    "2_not_human": 2,
         | 
| 25 | 
            +
                    "3_not_bad": 3,
         | 
| 26 | 
            +
                    "4_solutions_harmful_unnecessary": 4,
         | 
| 27 | 
            +
                    "5_science_unreliable": 5,
         | 
| 28 | 
            +
                    "6_proponents_biased": 6,
         | 
| 29 | 
            +
                    "7_fossil_fuels_needed": 7
         | 
| 30 | 
            +
                }
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Load and prepare the dataset
         | 
| 33 | 
            +
                dataset = load_dataset(request.dataset_name)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                # Convert string labels to integers
         | 
| 36 | 
            +
                dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                # Split dataset
         | 
| 39 | 
            +
                train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
         | 
| 40 | 
            +
                test_dataset = train_test["test"]
         | 
| 41 | 
            +
                
         | 
| 42 | 
            +
                # Start tracking emissions
         | 
| 43 | 
            +
                tracker.start()
         | 
| 44 | 
            +
                tracker.start_task("inference")
         | 
| 45 | 
            +
                
         | 
| 46 | 
            +
                # Make random predictions (placeholder for actual model inference)
         | 
| 47 | 
            +
                true_labels = test_dataset["label"]
         | 
| 48 | 
            +
                predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
         | 
| 49 | 
            +
                
         | 
| 50 | 
            +
                # Stop tracking emissions
         | 
| 51 | 
            +
                emissions_data = tracker.stop_task()
         | 
| 52 | 
            +
                
         | 
| 53 | 
            +
                # Calculate accuracy
         | 
| 54 | 
            +
                accuracy = accuracy_score(true_labels, predictions)
         | 
| 55 | 
            +
                
         | 
| 56 | 
            +
                # Prepare results dictionary
         | 
| 57 | 
            +
                results = {
         | 
| 58 | 
            +
                    "username": username,
         | 
| 59 | 
            +
                    "space_url": space_url,
         | 
| 60 | 
            +
                    "submission_timestamp": datetime.now().isoformat(),
         | 
| 61 | 
            +
                    "model_description": request.model_description,
         | 
| 62 | 
            +
                    "accuracy": float(accuracy),
         | 
| 63 | 
            +
                    "energy_consumed_wh": emissions_data.energy_consumed * 1000,
         | 
| 64 | 
            +
                    "emissions_gco2eq": emissions_data.emissions * 1000,
         | 
| 65 | 
            +
                    "emissions_data": clean_emissions_data(emissions_data),
         | 
| 66 | 
            +
                    "dataset_config": {
         | 
| 67 | 
            +
                        "dataset_name": request.dataset_name,
         | 
| 68 | 
            +
                        "test_size": request.test_size,
         | 
| 69 | 
            +
                        "test_seed": request.test_seed
         | 
| 70 | 
            +
                    }
         | 
| 71 | 
            +
                }
         | 
| 72 | 
            +
                
         | 
| 73 | 
            +
                return results 
         | 
    	
        tasks/utils/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        tasks/utils/emissions.py
    ADDED
    
    | @@ -0,0 +1,28 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from codecarbon import EmissionsTracker
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            # Initialize tracker
         | 
| 5 | 
            +
            tracker = EmissionsTracker(allow_multiple_runs=True)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            class EmissionsData:
         | 
| 8 | 
            +
                def __init__(self, energy_consumed: float, emissions: float):
         | 
| 9 | 
            +
                    self.energy_consumed = energy_consumed
         | 
| 10 | 
            +
                    self.emissions = emissions
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            def clean_emissions_data(emissions_data):
         | 
| 13 | 
            +
                """Remove unwanted fields from emissions data"""
         | 
| 14 | 
            +
                data_dict = emissions_data.__dict__
         | 
| 15 | 
            +
                fields_to_remove = ['timestamp', 'project_name', 'experiment_id', 'latitude', 'longitude']
         | 
| 16 | 
            +
                return {k: v for k, v in data_dict.items() if k not in fields_to_remove}
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            def get_space_info():
         | 
| 19 | 
            +
                """Get the space username and URL from environment variables"""
         | 
| 20 | 
            +
                space_name = os.getenv("SPACE_ID", "")
         | 
| 21 | 
            +
                if space_name:
         | 
| 22 | 
            +
                    try:
         | 
| 23 | 
            +
                        username = space_name.split("/")[0]
         | 
| 24 | 
            +
                        space_url = f"https://huggingface.co/spaces/{space_name}"
         | 
| 25 | 
            +
                        return username, space_url
         | 
| 26 | 
            +
                    except Exception as e:
         | 
| 27 | 
            +
                        print(f"Error getting space info: {e}")
         | 
| 28 | 
            +
                return "local-user", "local-development" 
         | 
    	
        tasks/utils/evaluation.py
    ADDED
    
    | @@ -0,0 +1,19 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from typing import Optional
         | 
| 2 | 
            +
            from pydantic import BaseModel, Field
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            class BaseEvaluationRequest(BaseModel):
         | 
| 5 | 
            +
                test_size: float = Field(0.2, ge=0.0, le=1.0, description="Size of the test split (between 0 and 1)")
         | 
| 6 | 
            +
                test_seed: int = Field(42, ge=0, description="Random seed for reproducibility")
         | 
| 7 | 
            +
                model_description: Optional[str] = Field("No description provided", description="Description of the model being evaluated")
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            class TextEvaluationRequest(BaseEvaluationRequest):
         | 
| 10 | 
            +
                dataset_name: str = Field("QuotaClimat/frugalaichallenge-text-train", 
         | 
| 11 | 
            +
                                        description="The name of the dataset on HuggingFace Hub")
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            class ImageEvaluationRequest(BaseEvaluationRequest):
         | 
| 14 | 
            +
                dataset_name: str = Field("placeholder/frugalaichallenge-image-train", 
         | 
| 15 | 
            +
                                        description="The name of the dataset on HuggingFace Hub")
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            class AudioEvaluationRequest(BaseEvaluationRequest):
         | 
| 18 | 
            +
                dataset_name: str = Field("placeholder/frugalaichallenge-audio-train", 
         | 
| 19 | 
            +
                                        description="The name of the dataset on HuggingFace Hub") 
         | 
 
			
