Spaces:

NavyDevilDoc
/

MCDA

Sleeping

App Files Files Community

NavyDevilDoc commited on Jul 19

Commit

b29d555

verified ·

1 Parent(s): e3fb153

Upload mcda_v4.py

Browse files

Files changed (1) hide show

src/mcda_v4.py +729 -0

src/mcda_v4.py ADDED Viewed

	@@ -0,0 +1,729 @@

+import pandas as pd
+import numpy as np
+from typing import Dict, List, Tuple
+class UtilityCalculator:
+    """
+    Multi-Criteria Decision Analysis (MCDA) utility calculator.
+    This class implements a weighted utility model for ranking alternatives across
+    multiple criteria. It normalizes scores to a 0-100 scale, applies user-defined
+    weights, and calculates final utility scores for decision making.
+    Core Components:
+    - Data Storage: Raw product scores and configuration
+    - Normalization: Min-max scaling with direction handling
+    - Utility Calculation: Weighted linear aggregation
+    - Caching: Performance optimization for repeated calculations
+    - I/O: Excel integration and results formatting
+    Attributes:
+        categories (List[str]): Names of evaluation criteria
+        maximize (Dict[str, bool]): Direction of optimization per category
+        weights (Dict[str, float]): Importance weights per category (sum=1.0)
+        products (Dict[str, Dict[str, float]]): Raw scores {product: {category: score}}
+        thresholds (Dict[str, float]): Minimum acceptable values per category
+        objectives (Dict[str, float]): Target/ideal values per category
+        use_penalties (bool): Enable/disable threshold-objective penalty system
+        _cache_valid (bool): Cache validity flag for performance optimization
+        _cached_normalized (Dict): Cached normalized scores
+        _cached_utilities (Dict): Cached utility calculations
+    """
+    def __init__(self, categories: List[str], maximize: Dict[str, bool]):
+        """
+        Initialize the MCDA calculator with evaluation criteria.
+        Sets up the decision framework by defining what criteria to evaluate and
+        their optimization direction. Initializes equal weights for all categories.
+        Args:
+            categories: List of category names (e.g., ['price', 'quality', 'speed'])
+            maximize: Dict indicating optimization direction per category
+                    {category: True} for "higher is better"
+                    {category: False} for "lower is better"
+        Raises:
+            ValueError: If categories and maximize keys don't match exactly
+        Example:
+            calc = UtilityCalculator(
+                categories=['price', 'quality'],
+                maximize={'price': False, 'quality': True}
+            )
+        """
+        self.categories = categories
+        self.maximize = maximize
+        # Validate configuration consistency
+        if set(categories) != set(maximize.keys()):
+            raise ValueError("Categories and maximize keys must match exactly")
+        # Initialize equal weights (will sum to 1.0)
+        n_categories = len(self.categories)
+        self.weights = {cat: 1.0/n_categories for cat in self.categories}
+        # Initialize aggregation method
+        self.aggregation_method = 'weighted_sum'  # Default to weighted sum
+        # Initialize threshold and objective values for penalty system
+        self.thresholds = {cat: None for cat in self.categories}  # Minimum acceptable values
+        self.objectives = {cat: None for cat in self.categories}  # Target/ideal values
+        self.use_penalties = False  # Global penalty system toggle
+        # Initialize data storage
+        self.products = {}  # {product_name: {category: raw_score}}
+        # Initialize cache management
+        self._cache_valid = False
+        self._cached_normalized = None  # Stores normalized scores
+        self._cached_utilities = None   # Stores final utility values
+    @classmethod
+    def from_excel(cls, file_path: str, config_sheet: str = 'Config', data_sheet: str = 'Data'):
+        """
+        Factory method to create calculator from Excel configuration.
+        This method provides a convenient way to set up the calculator using
+        Excel files for configuration management. Expects two sheets:
+        - Config sheet: category definitions and optimization directions
+        - Data sheet: product names and their scores
+        Args:
+            file_path: Path to Excel file containing configuration and data
+            config_sheet: Name of sheet with category configuration (default: 'Config')
+            data_sheet: Name of sheet with product data (default: 'Data')
+        Returns:
+            UtilityCalculator: Configured instance with loaded data
+        Raises:
+            ValueError: If required columns are missing from either sheet
+        Expected Excel Format:
+            Config Sheet: columns ['category', 'maximize']
+            Data Sheet: columns ['name'] + all categories from config
+        Example:
+            calc = UtilityCalculator.from_excel('decisions.xlsx')
+        """
+        # Load and validate configuration sheet
+        config_df = pd.read_excel(file_path, sheet_name=config_sheet)
+        required_cols = ['category', 'maximize']
+        if not all(col in config_df.columns for col in required_cols):
+            raise ValueError(f"Config sheet must have columns: {required_cols}")
+        # Extract configuration parameters
+        categories = config_df['category'].tolist()
+        maximize = dict(zip(config_df['category'], config_df['maximize']))
+        # Create calculator instance
+        calc = cls(categories, maximize)
+        # Load and validate data sheet
+        data_df = pd.read_excel(file_path, sheet_name=data_sheet)
+        required_data_cols = ['name'] + categories
+        missing_cols = [col for col in required_data_cols if col not in data_df.columns]
+        if missing_cols:
+            raise ValueError(f"Data sheet missing columns: {missing_cols}")
+        # Load products into calculator
+        products = data_df.to_dict('records')
+        calc.add_products_batch(products)
+        return calc
+    def add_products_batch(self, products_data: List[Dict]):
+        """
+        Add multiple products to the calculator in a single operation.
+        This method provides efficient bulk loading of product data. Each product
+        dictionary should contain a 'name' key and scores for all categories.
+        Invalidates cache to ensure fresh calculations.
+        Args:
+            products_data: List of dictionaries, each containing:
+                          {'name': str, category1: float, category2: float, ...}
+        Example:
+            products = [
+                {'name': 'Product A', 'price': 100, 'quality': 8.5},
+                {'name': 'Product B', 'price': 150, 'quality': 9.2}
+            ]
+            calc.add_products_batch(products)
+        """
+        for product in products_data:
+            # Create copy to avoid mutating input data
+            product_copy = product.copy()
+            name = product_copy.pop('name')
+            self.add_product(name, product_copy)
+    def add_product(self, name: str, scores: Dict[str, float]):
+        """
+        Add a single product with its category scores.
+        This is the core method for adding product data. Validates that scores
+        are provided for all required categories and stores the data for analysis.
+        Invalidates cache to ensure calculations reflect new data.
+        Args:
+            name: Unique identifier for the product
+            scores: Dictionary mapping categories to numeric scores
+                   {category: score} for all categories in self.categories
+        Raises:
+            ValueError: If scores don't include all required categories
+        Example:
+            calc.add_product('Laptop X', {'price': 999, 'performance': 85, 'battery': 8})
+        """
+        # Validate completeness of scores
+        if not all(cat in scores for cat in self.categories):
+            raise ValueError(f"Must provide scores for: {self.categories}")
+        # Store product data (copy to prevent external modification)
+        self.products[name] = scores.copy()
+        # Invalidate cache since data has changed
+        self._cache_valid = False
+    def set_weights(self, weights: Dict[str, float]):
+        """
+        Update the importance weights for categories.
+        Weights represent the relative importance of each category in the final
+        decision. They must sum to 1.0 to maintain the utility scale. Only
+        invalidates utility cache since normalization is weight-independent.
+        Args:
+            weights: Dictionary mapping categories to weight values
+                    Must include all categories and sum to 1.0
+        Raises:
+            ValueError: If weights don't sum to 1.0 (within floating point tolerance)
+        Example:
+            calc.set_weights({'price': 0.4, 'quality': 0.4, 'support': 0.2})
+        """
+        # Validate weight constraints
+        if not np.isclose(sum(weights.values()), 1.0):
+            raise ValueError("Weights must sum to 1.0")
+        # Update weights
+        self.weights.update(weights)
+        # Invalidate cache since weights affect utility calculations
+        self._cache_valid = False
+    def normalize_scores(self) -> Dict[str, Dict[str, float]]:
+        """
+        Normalize all product scores to a 0-100 scale with direction handling.
+        This method implements normalization to make scores comparable across
+        different categories and scales. Routes to penalty system if enabled,
+        otherwise uses standard min-max normalization.
+        Uses caching to avoid recomputation when data hasn't changed.
+        Returns:
+            Dict[str, Dict[str, float]]: Nested dictionary structure
+            {product_name: {category: normalized_score}}
+            where normalized_score is in range [0, 100]
+        Raises:
+            ValueError: If no products have been added to analyze
+        """
+        # Return cached results if available and valid
+        if self._cache_valid and self._cached_normalized:
+            return self._cached_normalized
+        # Route to penalty system if enabled
+        if self.use_penalties:
+            normalized = self.normalize_scores_with_penalties()
+        else:
+            # Use standard min-max normalization
+            normalized = self._standard_normalize_scores()
+        # Cache results and mark as valid
+        self._cached_normalized = normalized
+        self._cache_valid = True
+        return normalized
+    def _standard_normalize_scores(self) -> Dict[str, Dict[str, float]]:
+        """
+        Standard min-max normalization without penalty system.
+        This is the original normalization logic extracted into a separate
+        method for clarity and maintainability.
+        Returns:
+            Dict[str, Dict[str, float]]: Standard normalized scores
+        """
+        # Validate that we have data to normalize
+        if not self.products:
+            raise ValueError("No products to analyze")
+        normalized = {}
+        # Process each category independently
+        for category in self.categories:
+            # Extract all values for this category to find range
+            values = [self.products[p][category] for p in self.products]
+            min_val, max_val = min(values), max(values)
+            range_val = max_val - min_val
+            # Normalize each product's score for this category
+            for product in self.products:
+                if product not in normalized:
+                    normalized[product] = {}
+                raw_score = self.products[product][category]
+                # Handle edge case: no variation in scores
+                if range_val == 0:
+                    normalized_score = 50  # Neutral score
+                # Apply direction-aware normalization
+                elif self.maximize[category]:
+                    # Higher raw scores get higher normalized scores
+                    normalized_score = ((raw_score - min_val) / range_val) * 100
+                else:
+                    # Lower raw scores get higher normalized scores
+                    normalized_score = ((max_val - raw_score) / range_val) * 100
+                normalized[product][category] = normalized_score
+        return normalized
+    def normalize_scores_with_penalties(self) -> Dict[str, Dict[str, float]]:
+        """
+        Normalize scores using threshold/objective penalty system.
+        This method implements a three-zone penalty system for each category:
+        1. Below threshold: Score set to 0 (elimination)
+        2. Threshold to objective: Linear penalty scale (graduated)
+        3. At/above objective: Full normalized score (no penalty)
+        The penalty system operates on raw scores before standard normalization,
+        creating a more realistic evaluation that reflects minimum requirements
+        and ideal targets.
+        Returns:
+            Dict[str, Dict[str, float]]: Nested dictionary structure
+            {product_name: {category: penalized_score}}
+            where penalized_score incorporates threshold/objective logic
+        Raises:
+            ValueError: If penalty system is enabled but not properly configured
+        Penalty Logic:
+            For each category:
+            - raw_score < threshold: penalized_score = 0
+            - threshold <= raw_score < objective: linear interpolation
+            - raw_score >= objective: standard normalization
+        Example:
+            Category: reliability (maximize=True, threshold=80, objective=95)
+            Raw scores: [70, 85, 98] → Penalized: [0, ~33, 100]
+        """
+        # Validate penalty configuration
+        validation_errors = self.validate_penalty_configuration()
+        if validation_errors:
+            raise ValueError(f"Penalty configuration errors: {validation_errors}")
+        # Validate that we have data to normalize
+        if not self.products:
+            raise ValueError("No products to analyze")
+        penalized = {}
+        # Process each category independently
+        for category in self.categories:
+            threshold = self.thresholds[category]
+            objective = self.objectives[category]
+            maximize = self.maximize[category]
+            # Extract all values for this category
+            values = [self.products[p][category] for p in self.products]
+            # Calculate penalized scores for each product
+            for product in self.products:
+                if product not in penalized:
+                    penalized[product] = {}
+                raw_score = self.products[product][category]
+                # Apply three-zone penalty logic
+                if maximize:
+                    # For maximize categories: higher is better
+                    if raw_score < threshold:
+                        # Zone 1: Below threshold = elimination
+                        penalized_score = 0.0
+                    elif raw_score < objective:
+                        # Zone 2: Threshold to objective = linear penalty
+                        # Scale from 0 to some intermediate value (e.g., 50)
+                        progress = (raw_score - threshold) / (objective - threshold)
+                        penalized_score = progress * 50.0  # Scale to 0-50 range
+                    else:
+                        # Zone 3: At/above objective = standard normalization
+                        # Find min/max among products that meet objective
+                        qualified_values = [v for v in values if v >= objective]
+                        if len(qualified_values) > 1:
+                            min_qual = min(qualified_values)
+                            max_qual = max(qualified_values)
+                            range_qual = max_qual - min_qual
+                            if range_qual > 0:
+                                penalized_score = 50 + ((raw_score - min_qual) / range_qual) * 50
+                            else:
+                                penalized_score = 100.0  # All qualified scores are equal
+                        else:
+                            penalized_score = 100.0  # Only one or no qualified products
+                else:
+                    # For minimize categories: lower is better
+                    if raw_score > threshold:
+                        # Zone 1: Above threshold = elimination
+                        penalized_score = 0.0
+                    elif raw_score > objective:
+                        # Zone 2: Objective to threshold = linear penalty
+                        progress = (threshold - raw_score) / (threshold - objective)
+                        penalized_score = progress * 50.0
+                    else:
+                        # Zone 3: At/below objective = standard normalization
+                        qualified_values = [v for v in values if v <= objective]
+                        if len(qualified_values) > 1:
+                            min_qual = min(qualified_values)
+                            max_qual = max(qualified_values)
+                            range_qual = max_qual - min_qual
+                            if range_qual > 0:
+                                penalized_score = 50 + ((max_qual - raw_score) / range_qual) * 50
+                            else:
+                                penalized_score = 100.0
+                        else:
+                            penalized_score = 100.0
+                penalized[product][category] = penalized_score
+        return penalized
+    def set_aggregation_method(self, method: str):
+        """
+        Set the aggregation method for utility calculation.
+        This method allows switching between different mathematical approaches
+        for combining normalized scores. Affects risk tolerance and compensation
+        between criteria.
+        Args:
+            method: Aggregation approach to use
+                'weighted_sum': Linear aggregation (full compensation)
+                'geometric_mean': Geometric aggregation (penalizes poor performance)
+                'threshold_penalty': Threshold/objective penalty system
+        Raises:
+            ValueError: If method is not supported
+        Example:
+            calc.set_aggregation_method('threshold_penalty')  # Enable threshold penalties
+        """
+        valid_methods = ['weighted_sum', 'geometric_mean', 'threshold_penalty']
+        if method not in valid_methods:
+            raise ValueError(f"Method must be one of: {valid_methods}")
+        self.aggregation_method = method
+        # Enable penalty system if threshold_penalty method is selected
+        if method == 'threshold_penalty':
+            self.use_penalties = True
+        else:
+            self.use_penalties = False
+        # Invalidate cache since calculation method has changed
+        self._cache_valid = False
+    def set_thresholds(self, thresholds: Dict[str, float]):
+        """
+        Set minimum acceptable threshold values for categories.
+        Thresholds represent the minimum acceptable raw score for each category.
+        Products scoring below threshold in any category will be heavily penalized
+        or eliminated from consideration (depending on penalty settings).
+        Args:
+            thresholds: Dictionary mapping categories to minimum threshold values
+                    {category: threshold_value} in raw score units
+        Raises:
+            ValueError: If thresholds don't include all categories
+        Example:
+            calc.set_thresholds({'reliability': 80, 'performance': 60, 'cost': 1000})
+        """
+        # Validate all categories are included
+        missing_cats = set(self.categories) - set(thresholds.keys())
+        if missing_cats:
+            raise ValueError(f"Must provide thresholds for all categories. Missing: {missing_cats}")
+        # Update thresholds
+        self.thresholds.update(thresholds)
+        # Invalidate cache since penalty calculations may change
+        self._cache_valid = False
+    def set_objectives(self, objectives: Dict[str, float]):
+        """
+        Set target/ideal objective values for categories.
+        Objectives represent the ideal or target raw score for each category.
+        Products meeting or exceeding objectives receive full normalized scores.
+        Products between threshold and objective receive graduated penalties.
+        Args:
+            objectives: Dictionary mapping categories to target objective values
+                    {category: objective_value} in raw score units
+        Raises:
+            ValueError: If objectives don't include all categories
+        Example:
+            calc.set_objectives({'reliability': 95, 'performance': 90, 'cost': 500})
+        """
+        # Validate all categories are included
+        missing_cats = set(self.categories) - set(objectives.keys())
+        if missing_cats:
+            raise ValueError(f"Must provide objectives for all categories. Missing: {missing_cats}")
+        # Update objectives
+        self.objectives.update(objectives)
+        # Invalidate cache since penalty calculations may change
+        self._cache_valid = False
+    def set_penalty_system(self, enabled: bool):
+        """
+        Enable or disable the threshold/objective penalty system.
+        When enabled, products are evaluated using threshold and objective values:
+        - Below threshold: Severe penalty or elimination
+        - Threshold to objective: Linear penalty scale
+        - At/above objective: Full reward
+        When disabled, uses standard min-max normalization without penalties.
+        Args:
+            enabled: True to enable penalty system, False to use standard normalization
+        Example:
+            calc.set_penalty_system(True)  # Enable penalties
+        """
+        self.use_penalties = enabled
+        # Invalidate cache since calculation method changes
+        self._cache_valid = False
+    def validate_penalty_configuration(self) -> List[str]:
+        """
+        Validate the penalty system configuration and return any issues.
+        Checks for common configuration problems like missing values,
+        threshold > objective, or invalid relationships between values.
+        Returns:
+            List[str]: List of validation error messages (empty if valid)
+        Example:
+            errors = calc.validate_penalty_configuration()
+            if errors:
+                print("Configuration issues:", errors)
+        """
+        errors = []
+        if self.use_penalties:
+            # Check for missing threshold/objective values
+            for cat in self.categories:
+                if self.thresholds[cat] is None:
+                    errors.append(f"Missing threshold value for category: {cat}")
+                if self.objectives[cat] is None:
+                    errors.append(f"Missing objective value for category: {cat}")
+            # Check threshold <= objective relationship
+            for cat in self.categories:
+                threshold = self.thresholds[cat]
+                objective = self.objectives[cat]
+                if threshold is not None and objective is not None:
+                    if self.maximize[cat]:
+                        # For maximize: threshold should be <= objective
+                        if threshold > objective:
+                            errors.append(f"Category '{cat}': threshold ({threshold}) should be <= objective ({objective}) for maximize categories")
+                    else:
+                        # For minimize: threshold should be >= objective
+                        if threshold < objective:
+                            errors.append(f"Category '{cat}': threshold ({threshold}) should be >= objective ({objective}) for minimize categories")
+        return errors
+    def calculate_utilities(self) -> Dict[str, float]:
+        """
+        Calculate final weighted utility scores for all products.
+        This method implements the core MCDA calculation by combining normalized
+        scores with user-defined weights. The result is a single utility value
+        per product that can be used for ranking and decision making.
+        Uses caching to avoid recomputation. Depends on normalize_scores() for
+        input data, creating a calculation chain: raw → normalized → utilities.
+        Returns:
+            Dict[str, float]: Mapping of product names to utility scores
+            {product_name: utility_score} where utility is roughly 0-100 scale
+        Utility Formula:
+            utility = Σ(weight[category] * normalized_score[category])
+            for all categories
+        Example Output:
+            {'Product A': 78.5, 'Product B': 65.2, 'Product C': 82.1}
+        Note:
+            Higher utility scores indicate better overall performance considering
+            all criteria and their relative importance weights.
+        """
+        # Return cached results if available and valid
+        if self._cache_valid and self._cached_utilities:
+            return self._cached_utilities
+        # Get normalized scores (may trigger normalization if needed)
+        normalized = self.normalize_scores()
+        utilities = {}
+        # Calculate utilities based on selected aggregation method
+        for product, scores in normalized.items():
+            if self.aggregation_method == 'weighted_sum':
+                # Linear aggregation: full compensation between criteria
+                utility = sum(self.weights[cat] * scores[cat] for cat in self.categories)
+            elif self.aggregation_method == 'threshold_penalty':
+                # Threshold penalty system uses weighted sum on penalized scores
+                utility = sum(self.weights[cat] * scores[cat] for cat in self.categories)
+            elif self.aggregation_method == 'geometric_mean':
+                # Geometric aggregation: penalizes poor performance
+                utility = 1.0
+                for cat in self.categories:
+                    # Convert normalized score to 0-1 scale for geometric mean
+                    # Add small epsilon to avoid zero values that would make product zero
+                    score_01 = max(scores[cat] / 100.0, 0.001)
+                    utility *= score_01 ** self.weights[cat]
+                # Convert back to 0-100 scale for consistency with weighted sum
+                utility *= 100.0
+            utilities[product] = utility
+        # Cache results
+        self._cached_utilities = utilities
+        return utilities
+    def rank_products(self) -> List[Tuple[str, float]]:
+        """
+        Rank all products by utility score in descending order.
+        This method provides the primary output for decision making by ordering
+        products from best (highest utility) to worst (lowest utility). Uses
+        the calculated utilities as the ranking criterion.
+        Returns:
+            List[Tuple[str, float]]: List of (product_name, utility_score) tuples
+            ordered by utility score (highest first)
+        Example Output:
+            [('Product C', 82.1), ('Product A', 78.5), ('Product B', 65.2)]
+        Usage:
+            rankings = calc.rank_products()
+            best_product = rankings[0][0]  # Name of top-ranked product
+            best_score = rankings[0][1]    # Utility score of best product
+        """
+        utilities = self.calculate_utilities()
+        return sorted(utilities.items(), key=lambda x: x[1], reverse=True)
+    def get_results_df(self) -> pd.DataFrame:
+        """
+        Generate comprehensive results as a pandas DataFrame.
+        This method creates a detailed output table showing raw scores, normalized
+        scores, and final utilities for all products. Useful for detailed analysis,
+        reporting, and understanding how the calculations work.
+        Returns:
+            pd.DataFrame: Results table with columns:
+                - Product: product name
+                - Utility: final utility score
+                - {category}_raw: original score for each category
+                - {category}_norm: normalized score for each category
+            Sorted by utility score (highest first)
+        Returns empty DataFrame if no products have been added.
+        Usage:
+            df = calc.get_results_df()
+            df.to_excel('results.xlsx', index=False)
+        """
+        # Handle empty case
+        if not self.products:
+            return pd.DataFrame()
+        # Get calculated values
+        utilities = self.calculate_utilities()
+        normalized = self.normalize_scores()
+        # Build comprehensive results
+        results = []
+        for product in self.products:
+            # Start with product name and utility
+            row = {'Product': product, 'Utility': utilities[product]}
+            # Add raw and normalized scores for each category
+            for category in self.categories:
+                row[f'{category}_raw'] = self.products[product][category]
+                row[f'{category}_norm'] = normalized[product][category]
+            results.append(row)
+        # Return as sorted DataFrame
+        return pd.DataFrame(results).sort_values('Utility', ascending=False)
+    def print_summary(self):
+        """
+        Print a concise summary of the calculator state and results.
+        This method provides a quick overview for interactive use, showing
+        the configuration and current rankings without requiring additional
+        data processing or formatting.
+        Output includes:
+        - Number and names of evaluation categories
+        - Number of products loaded
+        - Current product rankings (if any products exist)
+        """
+        print(f"\nUtility Calculator Summary")
+        print(f"Categories: {', '.join(self.categories)}")
+        print(f"Products: {len(self.products)}")
+        # Show rankings if we have products
+        if self.products:
+            rankings = self.rank_products()
+            print(f"\nRankings:")
+            for i, (product, utility) in enumerate(rankings, 1):
+                print(f"  {i}. {product}: {utility:.1f}")