Spaces:
Sleeping
Sleeping
Upload mcda_v4.py
Browse files- src/mcda_v4.py +729 -0
src/mcda_v4.py
ADDED
|
@@ -0,0 +1,729 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from typing import Dict, List, Tuple
|
| 4 |
+
|
| 5 |
+
class UtilityCalculator:
|
| 6 |
+
"""
|
| 7 |
+
Multi-Criteria Decision Analysis (MCDA) utility calculator.
|
| 8 |
+
|
| 9 |
+
This class implements a weighted utility model for ranking alternatives across
|
| 10 |
+
multiple criteria. It normalizes scores to a 0-100 scale, applies user-defined
|
| 11 |
+
weights, and calculates final utility scores for decision making.
|
| 12 |
+
|
| 13 |
+
Core Components:
|
| 14 |
+
- Data Storage: Raw product scores and configuration
|
| 15 |
+
- Normalization: Min-max scaling with direction handling
|
| 16 |
+
- Utility Calculation: Weighted linear aggregation
|
| 17 |
+
- Caching: Performance optimization for repeated calculations
|
| 18 |
+
- I/O: Excel integration and results formatting
|
| 19 |
+
|
| 20 |
+
Attributes:
|
| 21 |
+
categories (List[str]): Names of evaluation criteria
|
| 22 |
+
maximize (Dict[str, bool]): Direction of optimization per category
|
| 23 |
+
weights (Dict[str, float]): Importance weights per category (sum=1.0)
|
| 24 |
+
products (Dict[str, Dict[str, float]]): Raw scores {product: {category: score}}
|
| 25 |
+
thresholds (Dict[str, float]): Minimum acceptable values per category
|
| 26 |
+
objectives (Dict[str, float]): Target/ideal values per category
|
| 27 |
+
use_penalties (bool): Enable/disable threshold-objective penalty system
|
| 28 |
+
_cache_valid (bool): Cache validity flag for performance optimization
|
| 29 |
+
_cached_normalized (Dict): Cached normalized scores
|
| 30 |
+
_cached_utilities (Dict): Cached utility calculations
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self, categories: List[str], maximize: Dict[str, bool]):
|
| 34 |
+
"""
|
| 35 |
+
Initialize the MCDA calculator with evaluation criteria.
|
| 36 |
+
|
| 37 |
+
Sets up the decision framework by defining what criteria to evaluate and
|
| 38 |
+
their optimization direction. Initializes equal weights for all categories.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
categories: List of category names (e.g., ['price', 'quality', 'speed'])
|
| 42 |
+
maximize: Dict indicating optimization direction per category
|
| 43 |
+
{category: True} for "higher is better"
|
| 44 |
+
{category: False} for "lower is better"
|
| 45 |
+
|
| 46 |
+
Raises:
|
| 47 |
+
ValueError: If categories and maximize keys don't match exactly
|
| 48 |
+
|
| 49 |
+
Example:
|
| 50 |
+
calc = UtilityCalculator(
|
| 51 |
+
categories=['price', 'quality'],
|
| 52 |
+
maximize={'price': False, 'quality': True}
|
| 53 |
+
)
|
| 54 |
+
"""
|
| 55 |
+
self.categories = categories
|
| 56 |
+
self.maximize = maximize
|
| 57 |
+
|
| 58 |
+
# Validate configuration consistency
|
| 59 |
+
if set(categories) != set(maximize.keys()):
|
| 60 |
+
raise ValueError("Categories and maximize keys must match exactly")
|
| 61 |
+
|
| 62 |
+
# Initialize equal weights (will sum to 1.0)
|
| 63 |
+
n_categories = len(self.categories)
|
| 64 |
+
self.weights = {cat: 1.0/n_categories for cat in self.categories}
|
| 65 |
+
|
| 66 |
+
# Initialize aggregation method
|
| 67 |
+
self.aggregation_method = 'weighted_sum' # Default to weighted sum
|
| 68 |
+
|
| 69 |
+
# Initialize threshold and objective values for penalty system
|
| 70 |
+
self.thresholds = {cat: None for cat in self.categories} # Minimum acceptable values
|
| 71 |
+
self.objectives = {cat: None for cat in self.categories} # Target/ideal values
|
| 72 |
+
self.use_penalties = False # Global penalty system toggle
|
| 73 |
+
|
| 74 |
+
# Initialize data storage
|
| 75 |
+
self.products = {} # {product_name: {category: raw_score}}
|
| 76 |
+
|
| 77 |
+
# Initialize cache management
|
| 78 |
+
self._cache_valid = False
|
| 79 |
+
self._cached_normalized = None # Stores normalized scores
|
| 80 |
+
self._cached_utilities = None # Stores final utility values
|
| 81 |
+
|
| 82 |
+
@classmethod
|
| 83 |
+
def from_excel(cls, file_path: str, config_sheet: str = 'Config', data_sheet: str = 'Data'):
|
| 84 |
+
"""
|
| 85 |
+
Factory method to create calculator from Excel configuration.
|
| 86 |
+
|
| 87 |
+
This method provides a convenient way to set up the calculator using
|
| 88 |
+
Excel files for configuration management. Expects two sheets:
|
| 89 |
+
- Config sheet: category definitions and optimization directions
|
| 90 |
+
- Data sheet: product names and their scores
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
file_path: Path to Excel file containing configuration and data
|
| 94 |
+
config_sheet: Name of sheet with category configuration (default: 'Config')
|
| 95 |
+
data_sheet: Name of sheet with product data (default: 'Data')
|
| 96 |
+
|
| 97 |
+
Returns:
|
| 98 |
+
UtilityCalculator: Configured instance with loaded data
|
| 99 |
+
|
| 100 |
+
Raises:
|
| 101 |
+
ValueError: If required columns are missing from either sheet
|
| 102 |
+
|
| 103 |
+
Expected Excel Format:
|
| 104 |
+
Config Sheet: columns ['category', 'maximize']
|
| 105 |
+
Data Sheet: columns ['name'] + all categories from config
|
| 106 |
+
|
| 107 |
+
Example:
|
| 108 |
+
calc = UtilityCalculator.from_excel('decisions.xlsx')
|
| 109 |
+
"""
|
| 110 |
+
# Load and validate configuration sheet
|
| 111 |
+
config_df = pd.read_excel(file_path, sheet_name=config_sheet)
|
| 112 |
+
|
| 113 |
+
required_cols = ['category', 'maximize']
|
| 114 |
+
if not all(col in config_df.columns for col in required_cols):
|
| 115 |
+
raise ValueError(f"Config sheet must have columns: {required_cols}")
|
| 116 |
+
|
| 117 |
+
# Extract configuration parameters
|
| 118 |
+
categories = config_df['category'].tolist()
|
| 119 |
+
maximize = dict(zip(config_df['category'], config_df['maximize']))
|
| 120 |
+
|
| 121 |
+
# Create calculator instance
|
| 122 |
+
calc = cls(categories, maximize)
|
| 123 |
+
|
| 124 |
+
# Load and validate data sheet
|
| 125 |
+
data_df = pd.read_excel(file_path, sheet_name=data_sheet)
|
| 126 |
+
|
| 127 |
+
required_data_cols = ['name'] + categories
|
| 128 |
+
missing_cols = [col for col in required_data_cols if col not in data_df.columns]
|
| 129 |
+
if missing_cols:
|
| 130 |
+
raise ValueError(f"Data sheet missing columns: {missing_cols}")
|
| 131 |
+
|
| 132 |
+
# Load products into calculator
|
| 133 |
+
products = data_df.to_dict('records')
|
| 134 |
+
calc.add_products_batch(products)
|
| 135 |
+
|
| 136 |
+
return calc
|
| 137 |
+
|
| 138 |
+
def add_products_batch(self, products_data: List[Dict]):
|
| 139 |
+
"""
|
| 140 |
+
Add multiple products to the calculator in a single operation.
|
| 141 |
+
|
| 142 |
+
This method provides efficient bulk loading of product data. Each product
|
| 143 |
+
dictionary should contain a 'name' key and scores for all categories.
|
| 144 |
+
Invalidates cache to ensure fresh calculations.
|
| 145 |
+
|
| 146 |
+
Args:
|
| 147 |
+
products_data: List of dictionaries, each containing:
|
| 148 |
+
{'name': str, category1: float, category2: float, ...}
|
| 149 |
+
|
| 150 |
+
Example:
|
| 151 |
+
products = [
|
| 152 |
+
{'name': 'Product A', 'price': 100, 'quality': 8.5},
|
| 153 |
+
{'name': 'Product B', 'price': 150, 'quality': 9.2}
|
| 154 |
+
]
|
| 155 |
+
calc.add_products_batch(products)
|
| 156 |
+
"""
|
| 157 |
+
for product in products_data:
|
| 158 |
+
# Create copy to avoid mutating input data
|
| 159 |
+
product_copy = product.copy()
|
| 160 |
+
name = product_copy.pop('name')
|
| 161 |
+
self.add_product(name, product_copy)
|
| 162 |
+
|
| 163 |
+
def add_product(self, name: str, scores: Dict[str, float]):
|
| 164 |
+
"""
|
| 165 |
+
Add a single product with its category scores.
|
| 166 |
+
|
| 167 |
+
This is the core method for adding product data. Validates that scores
|
| 168 |
+
are provided for all required categories and stores the data for analysis.
|
| 169 |
+
Invalidates cache to ensure calculations reflect new data.
|
| 170 |
+
|
| 171 |
+
Args:
|
| 172 |
+
name: Unique identifier for the product
|
| 173 |
+
scores: Dictionary mapping categories to numeric scores
|
| 174 |
+
{category: score} for all categories in self.categories
|
| 175 |
+
|
| 176 |
+
Raises:
|
| 177 |
+
ValueError: If scores don't include all required categories
|
| 178 |
+
|
| 179 |
+
Example:
|
| 180 |
+
calc.add_product('Laptop X', {'price': 999, 'performance': 85, 'battery': 8})
|
| 181 |
+
"""
|
| 182 |
+
# Validate completeness of scores
|
| 183 |
+
if not all(cat in scores for cat in self.categories):
|
| 184 |
+
raise ValueError(f"Must provide scores for: {self.categories}")
|
| 185 |
+
|
| 186 |
+
# Store product data (copy to prevent external modification)
|
| 187 |
+
self.products[name] = scores.copy()
|
| 188 |
+
|
| 189 |
+
# Invalidate cache since data has changed
|
| 190 |
+
self._cache_valid = False
|
| 191 |
+
|
| 192 |
+
def set_weights(self, weights: Dict[str, float]):
|
| 193 |
+
"""
|
| 194 |
+
Update the importance weights for categories.
|
| 195 |
+
|
| 196 |
+
Weights represent the relative importance of each category in the final
|
| 197 |
+
decision. They must sum to 1.0 to maintain the utility scale. Only
|
| 198 |
+
invalidates utility cache since normalization is weight-independent.
|
| 199 |
+
|
| 200 |
+
Args:
|
| 201 |
+
weights: Dictionary mapping categories to weight values
|
| 202 |
+
Must include all categories and sum to 1.0
|
| 203 |
+
|
| 204 |
+
Raises:
|
| 205 |
+
ValueError: If weights don't sum to 1.0 (within floating point tolerance)
|
| 206 |
+
|
| 207 |
+
Example:
|
| 208 |
+
calc.set_weights({'price': 0.4, 'quality': 0.4, 'support': 0.2})
|
| 209 |
+
"""
|
| 210 |
+
# Validate weight constraints
|
| 211 |
+
if not np.isclose(sum(weights.values()), 1.0):
|
| 212 |
+
raise ValueError("Weights must sum to 1.0")
|
| 213 |
+
|
| 214 |
+
# Update weights
|
| 215 |
+
self.weights.update(weights)
|
| 216 |
+
|
| 217 |
+
# Invalidate cache since weights affect utility calculations
|
| 218 |
+
self._cache_valid = False
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def normalize_scores(self) -> Dict[str, Dict[str, float]]:
|
| 223 |
+
"""
|
| 224 |
+
Normalize all product scores to a 0-100 scale with direction handling.
|
| 225 |
+
|
| 226 |
+
This method implements normalization to make scores comparable across
|
| 227 |
+
different categories and scales. Routes to penalty system if enabled,
|
| 228 |
+
otherwise uses standard min-max normalization.
|
| 229 |
+
|
| 230 |
+
Uses caching to avoid recomputation when data hasn't changed.
|
| 231 |
+
|
| 232 |
+
Returns:
|
| 233 |
+
Dict[str, Dict[str, float]]: Nested dictionary structure
|
| 234 |
+
{product_name: {category: normalized_score}}
|
| 235 |
+
where normalized_score is in range [0, 100]
|
| 236 |
+
|
| 237 |
+
Raises:
|
| 238 |
+
ValueError: If no products have been added to analyze
|
| 239 |
+
"""
|
| 240 |
+
# Return cached results if available and valid
|
| 241 |
+
if self._cache_valid and self._cached_normalized:
|
| 242 |
+
return self._cached_normalized
|
| 243 |
+
|
| 244 |
+
# Route to penalty system if enabled
|
| 245 |
+
if self.use_penalties:
|
| 246 |
+
normalized = self.normalize_scores_with_penalties()
|
| 247 |
+
else:
|
| 248 |
+
# Use standard min-max normalization
|
| 249 |
+
normalized = self._standard_normalize_scores()
|
| 250 |
+
|
| 251 |
+
# Cache results and mark as valid
|
| 252 |
+
self._cached_normalized = normalized
|
| 253 |
+
self._cache_valid = True
|
| 254 |
+
return normalized
|
| 255 |
+
|
| 256 |
+
def _standard_normalize_scores(self) -> Dict[str, Dict[str, float]]:
|
| 257 |
+
"""
|
| 258 |
+
Standard min-max normalization without penalty system.
|
| 259 |
+
|
| 260 |
+
This is the original normalization logic extracted into a separate
|
| 261 |
+
method for clarity and maintainability.
|
| 262 |
+
|
| 263 |
+
Returns:
|
| 264 |
+
Dict[str, Dict[str, float]]: Standard normalized scores
|
| 265 |
+
"""
|
| 266 |
+
# Validate that we have data to normalize
|
| 267 |
+
if not self.products:
|
| 268 |
+
raise ValueError("No products to analyze")
|
| 269 |
+
|
| 270 |
+
normalized = {}
|
| 271 |
+
|
| 272 |
+
# Process each category independently
|
| 273 |
+
for category in self.categories:
|
| 274 |
+
# Extract all values for this category to find range
|
| 275 |
+
values = [self.products[p][category] for p in self.products]
|
| 276 |
+
min_val, max_val = min(values), max(values)
|
| 277 |
+
range_val = max_val - min_val
|
| 278 |
+
|
| 279 |
+
# Normalize each product's score for this category
|
| 280 |
+
for product in self.products:
|
| 281 |
+
if product not in normalized:
|
| 282 |
+
normalized[product] = {}
|
| 283 |
+
|
| 284 |
+
raw_score = self.products[product][category]
|
| 285 |
+
|
| 286 |
+
# Handle edge case: no variation in scores
|
| 287 |
+
if range_val == 0:
|
| 288 |
+
normalized_score = 50 # Neutral score
|
| 289 |
+
# Apply direction-aware normalization
|
| 290 |
+
elif self.maximize[category]:
|
| 291 |
+
# Higher raw scores get higher normalized scores
|
| 292 |
+
normalized_score = ((raw_score - min_val) / range_val) * 100
|
| 293 |
+
else:
|
| 294 |
+
# Lower raw scores get higher normalized scores
|
| 295 |
+
normalized_score = ((max_val - raw_score) / range_val) * 100
|
| 296 |
+
|
| 297 |
+
normalized[product][category] = normalized_score
|
| 298 |
+
|
| 299 |
+
return normalized
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
def normalize_scores_with_penalties(self) -> Dict[str, Dict[str, float]]:
|
| 304 |
+
"""
|
| 305 |
+
Normalize scores using threshold/objective penalty system.
|
| 306 |
+
|
| 307 |
+
This method implements a three-zone penalty system for each category:
|
| 308 |
+
1. Below threshold: Score set to 0 (elimination)
|
| 309 |
+
2. Threshold to objective: Linear penalty scale (graduated)
|
| 310 |
+
3. At/above objective: Full normalized score (no penalty)
|
| 311 |
+
|
| 312 |
+
The penalty system operates on raw scores before standard normalization,
|
| 313 |
+
creating a more realistic evaluation that reflects minimum requirements
|
| 314 |
+
and ideal targets.
|
| 315 |
+
|
| 316 |
+
Returns:
|
| 317 |
+
Dict[str, Dict[str, float]]: Nested dictionary structure
|
| 318 |
+
{product_name: {category: penalized_score}}
|
| 319 |
+
where penalized_score incorporates threshold/objective logic
|
| 320 |
+
|
| 321 |
+
Raises:
|
| 322 |
+
ValueError: If penalty system is enabled but not properly configured
|
| 323 |
+
|
| 324 |
+
Penalty Logic:
|
| 325 |
+
For each category:
|
| 326 |
+
- raw_score < threshold: penalized_score = 0
|
| 327 |
+
- threshold <= raw_score < objective: linear interpolation
|
| 328 |
+
- raw_score >= objective: standard normalization
|
| 329 |
+
|
| 330 |
+
Example:
|
| 331 |
+
Category: reliability (maximize=True, threshold=80, objective=95)
|
| 332 |
+
Raw scores: [70, 85, 98] → Penalized: [0, ~33, 100]
|
| 333 |
+
"""
|
| 334 |
+
# Validate penalty configuration
|
| 335 |
+
validation_errors = self.validate_penalty_configuration()
|
| 336 |
+
if validation_errors:
|
| 337 |
+
raise ValueError(f"Penalty configuration errors: {validation_errors}")
|
| 338 |
+
|
| 339 |
+
# Validate that we have data to normalize
|
| 340 |
+
if not self.products:
|
| 341 |
+
raise ValueError("No products to analyze")
|
| 342 |
+
|
| 343 |
+
penalized = {}
|
| 344 |
+
|
| 345 |
+
# Process each category independently
|
| 346 |
+
for category in self.categories:
|
| 347 |
+
threshold = self.thresholds[category]
|
| 348 |
+
objective = self.objectives[category]
|
| 349 |
+
maximize = self.maximize[category]
|
| 350 |
+
|
| 351 |
+
# Extract all values for this category
|
| 352 |
+
values = [self.products[p][category] for p in self.products]
|
| 353 |
+
|
| 354 |
+
# Calculate penalized scores for each product
|
| 355 |
+
for product in self.products:
|
| 356 |
+
if product not in penalized:
|
| 357 |
+
penalized[product] = {}
|
| 358 |
+
|
| 359 |
+
raw_score = self.products[product][category]
|
| 360 |
+
|
| 361 |
+
# Apply three-zone penalty logic
|
| 362 |
+
if maximize:
|
| 363 |
+
# For maximize categories: higher is better
|
| 364 |
+
if raw_score < threshold:
|
| 365 |
+
# Zone 1: Below threshold = elimination
|
| 366 |
+
penalized_score = 0.0
|
| 367 |
+
elif raw_score < objective:
|
| 368 |
+
# Zone 2: Threshold to objective = linear penalty
|
| 369 |
+
# Scale from 0 to some intermediate value (e.g., 50)
|
| 370 |
+
progress = (raw_score - threshold) / (objective - threshold)
|
| 371 |
+
penalized_score = progress * 50.0 # Scale to 0-50 range
|
| 372 |
+
else:
|
| 373 |
+
# Zone 3: At/above objective = standard normalization
|
| 374 |
+
# Find min/max among products that meet objective
|
| 375 |
+
qualified_values = [v for v in values if v >= objective]
|
| 376 |
+
if len(qualified_values) > 1:
|
| 377 |
+
min_qual = min(qualified_values)
|
| 378 |
+
max_qual = max(qualified_values)
|
| 379 |
+
range_qual = max_qual - min_qual
|
| 380 |
+
if range_qual > 0:
|
| 381 |
+
penalized_score = 50 + ((raw_score - min_qual) / range_qual) * 50
|
| 382 |
+
else:
|
| 383 |
+
penalized_score = 100.0 # All qualified scores are equal
|
| 384 |
+
else:
|
| 385 |
+
penalized_score = 100.0 # Only one or no qualified products
|
| 386 |
+
else:
|
| 387 |
+
# For minimize categories: lower is better
|
| 388 |
+
if raw_score > threshold:
|
| 389 |
+
# Zone 1: Above threshold = elimination
|
| 390 |
+
penalized_score = 0.0
|
| 391 |
+
elif raw_score > objective:
|
| 392 |
+
# Zone 2: Objective to threshold = linear penalty
|
| 393 |
+
progress = (threshold - raw_score) / (threshold - objective)
|
| 394 |
+
penalized_score = progress * 50.0
|
| 395 |
+
else:
|
| 396 |
+
# Zone 3: At/below objective = standard normalization
|
| 397 |
+
qualified_values = [v for v in values if v <= objective]
|
| 398 |
+
if len(qualified_values) > 1:
|
| 399 |
+
min_qual = min(qualified_values)
|
| 400 |
+
max_qual = max(qualified_values)
|
| 401 |
+
range_qual = max_qual - min_qual
|
| 402 |
+
if range_qual > 0:
|
| 403 |
+
penalized_score = 50 + ((max_qual - raw_score) / range_qual) * 50
|
| 404 |
+
else:
|
| 405 |
+
penalized_score = 100.0
|
| 406 |
+
else:
|
| 407 |
+
penalized_score = 100.0
|
| 408 |
+
|
| 409 |
+
penalized[product][category] = penalized_score
|
| 410 |
+
|
| 411 |
+
return penalized
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
def set_aggregation_method(self, method: str):
|
| 416 |
+
"""
|
| 417 |
+
Set the aggregation method for utility calculation.
|
| 418 |
+
|
| 419 |
+
This method allows switching between different mathematical approaches
|
| 420 |
+
for combining normalized scores. Affects risk tolerance and compensation
|
| 421 |
+
between criteria.
|
| 422 |
+
|
| 423 |
+
Args:
|
| 424 |
+
method: Aggregation approach to use
|
| 425 |
+
'weighted_sum': Linear aggregation (full compensation)
|
| 426 |
+
'geometric_mean': Geometric aggregation (penalizes poor performance)
|
| 427 |
+
'threshold_penalty': Threshold/objective penalty system
|
| 428 |
+
|
| 429 |
+
Raises:
|
| 430 |
+
ValueError: If method is not supported
|
| 431 |
+
|
| 432 |
+
Example:
|
| 433 |
+
calc.set_aggregation_method('threshold_penalty') # Enable threshold penalties
|
| 434 |
+
"""
|
| 435 |
+
valid_methods = ['weighted_sum', 'geometric_mean', 'threshold_penalty']
|
| 436 |
+
if method not in valid_methods:
|
| 437 |
+
raise ValueError(f"Method must be one of: {valid_methods}")
|
| 438 |
+
|
| 439 |
+
self.aggregation_method = method
|
| 440 |
+
|
| 441 |
+
# Enable penalty system if threshold_penalty method is selected
|
| 442 |
+
if method == 'threshold_penalty':
|
| 443 |
+
self.use_penalties = True
|
| 444 |
+
else:
|
| 445 |
+
self.use_penalties = False
|
| 446 |
+
|
| 447 |
+
# Invalidate cache since calculation method has changed
|
| 448 |
+
self._cache_valid = False
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def set_thresholds(self, thresholds: Dict[str, float]):
|
| 453 |
+
"""
|
| 454 |
+
Set minimum acceptable threshold values for categories.
|
| 455 |
+
|
| 456 |
+
Thresholds represent the minimum acceptable raw score for each category.
|
| 457 |
+
Products scoring below threshold in any category will be heavily penalized
|
| 458 |
+
or eliminated from consideration (depending on penalty settings).
|
| 459 |
+
|
| 460 |
+
Args:
|
| 461 |
+
thresholds: Dictionary mapping categories to minimum threshold values
|
| 462 |
+
{category: threshold_value} in raw score units
|
| 463 |
+
|
| 464 |
+
Raises:
|
| 465 |
+
ValueError: If thresholds don't include all categories
|
| 466 |
+
|
| 467 |
+
Example:
|
| 468 |
+
calc.set_thresholds({'reliability': 80, 'performance': 60, 'cost': 1000})
|
| 469 |
+
"""
|
| 470 |
+
# Validate all categories are included
|
| 471 |
+
missing_cats = set(self.categories) - set(thresholds.keys())
|
| 472 |
+
if missing_cats:
|
| 473 |
+
raise ValueError(f"Must provide thresholds for all categories. Missing: {missing_cats}")
|
| 474 |
+
|
| 475 |
+
# Update thresholds
|
| 476 |
+
self.thresholds.update(thresholds)
|
| 477 |
+
|
| 478 |
+
# Invalidate cache since penalty calculations may change
|
| 479 |
+
self._cache_valid = False
|
| 480 |
+
|
| 481 |
+
def set_objectives(self, objectives: Dict[str, float]):
|
| 482 |
+
"""
|
| 483 |
+
Set target/ideal objective values for categories.
|
| 484 |
+
|
| 485 |
+
Objectives represent the ideal or target raw score for each category.
|
| 486 |
+
Products meeting or exceeding objectives receive full normalized scores.
|
| 487 |
+
Products between threshold and objective receive graduated penalties.
|
| 488 |
+
|
| 489 |
+
Args:
|
| 490 |
+
objectives: Dictionary mapping categories to target objective values
|
| 491 |
+
{category: objective_value} in raw score units
|
| 492 |
+
|
| 493 |
+
Raises:
|
| 494 |
+
ValueError: If objectives don't include all categories
|
| 495 |
+
|
| 496 |
+
Example:
|
| 497 |
+
calc.set_objectives({'reliability': 95, 'performance': 90, 'cost': 500})
|
| 498 |
+
"""
|
| 499 |
+
# Validate all categories are included
|
| 500 |
+
missing_cats = set(self.categories) - set(objectives.keys())
|
| 501 |
+
if missing_cats:
|
| 502 |
+
raise ValueError(f"Must provide objectives for all categories. Missing: {missing_cats}")
|
| 503 |
+
|
| 504 |
+
# Update objectives
|
| 505 |
+
self.objectives.update(objectives)
|
| 506 |
+
|
| 507 |
+
# Invalidate cache since penalty calculations may change
|
| 508 |
+
self._cache_valid = False
|
| 509 |
+
|
| 510 |
+
def set_penalty_system(self, enabled: bool):
|
| 511 |
+
"""
|
| 512 |
+
Enable or disable the threshold/objective penalty system.
|
| 513 |
+
|
| 514 |
+
When enabled, products are evaluated using threshold and objective values:
|
| 515 |
+
- Below threshold: Severe penalty or elimination
|
| 516 |
+
- Threshold to objective: Linear penalty scale
|
| 517 |
+
- At/above objective: Full reward
|
| 518 |
+
|
| 519 |
+
When disabled, uses standard min-max normalization without penalties.
|
| 520 |
+
|
| 521 |
+
Args:
|
| 522 |
+
enabled: True to enable penalty system, False to use standard normalization
|
| 523 |
+
|
| 524 |
+
Example:
|
| 525 |
+
calc.set_penalty_system(True) # Enable penalties
|
| 526 |
+
"""
|
| 527 |
+
self.use_penalties = enabled
|
| 528 |
+
|
| 529 |
+
# Invalidate cache since calculation method changes
|
| 530 |
+
self._cache_valid = False
|
| 531 |
+
|
| 532 |
+
def validate_penalty_configuration(self) -> List[str]:
|
| 533 |
+
"""
|
| 534 |
+
Validate the penalty system configuration and return any issues.
|
| 535 |
+
|
| 536 |
+
Checks for common configuration problems like missing values,
|
| 537 |
+
threshold > objective, or invalid relationships between values.
|
| 538 |
+
|
| 539 |
+
Returns:
|
| 540 |
+
List[str]: List of validation error messages (empty if valid)
|
| 541 |
+
|
| 542 |
+
Example:
|
| 543 |
+
errors = calc.validate_penalty_configuration()
|
| 544 |
+
if errors:
|
| 545 |
+
print("Configuration issues:", errors)
|
| 546 |
+
"""
|
| 547 |
+
errors = []
|
| 548 |
+
|
| 549 |
+
if self.use_penalties:
|
| 550 |
+
# Check for missing threshold/objective values
|
| 551 |
+
for cat in self.categories:
|
| 552 |
+
if self.thresholds[cat] is None:
|
| 553 |
+
errors.append(f"Missing threshold value for category: {cat}")
|
| 554 |
+
if self.objectives[cat] is None:
|
| 555 |
+
errors.append(f"Missing objective value for category: {cat}")
|
| 556 |
+
|
| 557 |
+
# Check threshold <= objective relationship
|
| 558 |
+
for cat in self.categories:
|
| 559 |
+
threshold = self.thresholds[cat]
|
| 560 |
+
objective = self.objectives[cat]
|
| 561 |
+
|
| 562 |
+
if threshold is not None and objective is not None:
|
| 563 |
+
if self.maximize[cat]:
|
| 564 |
+
# For maximize: threshold should be <= objective
|
| 565 |
+
if threshold > objective:
|
| 566 |
+
errors.append(f"Category '{cat}': threshold ({threshold}) should be <= objective ({objective}) for maximize categories")
|
| 567 |
+
else:
|
| 568 |
+
# For minimize: threshold should be >= objective
|
| 569 |
+
if threshold < objective:
|
| 570 |
+
errors.append(f"Category '{cat}': threshold ({threshold}) should be >= objective ({objective}) for minimize categories")
|
| 571 |
+
|
| 572 |
+
return errors
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
|
| 576 |
+
def calculate_utilities(self) -> Dict[str, float]:
|
| 577 |
+
"""
|
| 578 |
+
Calculate final weighted utility scores for all products.
|
| 579 |
+
|
| 580 |
+
This method implements the core MCDA calculation by combining normalized
|
| 581 |
+
scores with user-defined weights. The result is a single utility value
|
| 582 |
+
per product that can be used for ranking and decision making.
|
| 583 |
+
|
| 584 |
+
Uses caching to avoid recomputation. Depends on normalize_scores() for
|
| 585 |
+
input data, creating a calculation chain: raw → normalized → utilities.
|
| 586 |
+
|
| 587 |
+
Returns:
|
| 588 |
+
Dict[str, float]: Mapping of product names to utility scores
|
| 589 |
+
{product_name: utility_score} where utility is roughly 0-100 scale
|
| 590 |
+
|
| 591 |
+
Utility Formula:
|
| 592 |
+
utility = Σ(weight[category] * normalized_score[category])
|
| 593 |
+
for all categories
|
| 594 |
+
|
| 595 |
+
Example Output:
|
| 596 |
+
{'Product A': 78.5, 'Product B': 65.2, 'Product C': 82.1}
|
| 597 |
+
|
| 598 |
+
Note:
|
| 599 |
+
Higher utility scores indicate better overall performance considering
|
| 600 |
+
all criteria and their relative importance weights.
|
| 601 |
+
"""
|
| 602 |
+
# Return cached results if available and valid
|
| 603 |
+
if self._cache_valid and self._cached_utilities:
|
| 604 |
+
return self._cached_utilities
|
| 605 |
+
|
| 606 |
+
# Get normalized scores (may trigger normalization if needed)
|
| 607 |
+
normalized = self.normalize_scores()
|
| 608 |
+
utilities = {}
|
| 609 |
+
|
| 610 |
+
# Calculate utilities based on selected aggregation method
|
| 611 |
+
for product, scores in normalized.items():
|
| 612 |
+
if self.aggregation_method == 'weighted_sum':
|
| 613 |
+
# Linear aggregation: full compensation between criteria
|
| 614 |
+
utility = sum(self.weights[cat] * scores[cat] for cat in self.categories)
|
| 615 |
+
|
| 616 |
+
elif self.aggregation_method == 'threshold_penalty':
|
| 617 |
+
# Threshold penalty system uses weighted sum on penalized scores
|
| 618 |
+
utility = sum(self.weights[cat] * scores[cat] for cat in self.categories)
|
| 619 |
+
|
| 620 |
+
elif self.aggregation_method == 'geometric_mean':
|
| 621 |
+
# Geometric aggregation: penalizes poor performance
|
| 622 |
+
utility = 1.0
|
| 623 |
+
for cat in self.categories:
|
| 624 |
+
# Convert normalized score to 0-1 scale for geometric mean
|
| 625 |
+
# Add small epsilon to avoid zero values that would make product zero
|
| 626 |
+
score_01 = max(scores[cat] / 100.0, 0.001)
|
| 627 |
+
utility *= score_01 ** self.weights[cat]
|
| 628 |
+
|
| 629 |
+
# Convert back to 0-100 scale for consistency with weighted sum
|
| 630 |
+
utility *= 100.0
|
| 631 |
+
|
| 632 |
+
utilities[product] = utility
|
| 633 |
+
|
| 634 |
+
# Cache results
|
| 635 |
+
self._cached_utilities = utilities
|
| 636 |
+
return utilities
|
| 637 |
+
|
| 638 |
+
def rank_products(self) -> List[Tuple[str, float]]:
|
| 639 |
+
"""
|
| 640 |
+
Rank all products by utility score in descending order.
|
| 641 |
+
|
| 642 |
+
This method provides the primary output for decision making by ordering
|
| 643 |
+
products from best (highest utility) to worst (lowest utility). Uses
|
| 644 |
+
the calculated utilities as the ranking criterion.
|
| 645 |
+
|
| 646 |
+
Returns:
|
| 647 |
+
List[Tuple[str, float]]: List of (product_name, utility_score) tuples
|
| 648 |
+
ordered by utility score (highest first)
|
| 649 |
+
|
| 650 |
+
Example Output:
|
| 651 |
+
[('Product C', 82.1), ('Product A', 78.5), ('Product B', 65.2)]
|
| 652 |
+
|
| 653 |
+
Usage:
|
| 654 |
+
rankings = calc.rank_products()
|
| 655 |
+
best_product = rankings[0][0] # Name of top-ranked product
|
| 656 |
+
best_score = rankings[0][1] # Utility score of best product
|
| 657 |
+
"""
|
| 658 |
+
utilities = self.calculate_utilities()
|
| 659 |
+
return sorted(utilities.items(), key=lambda x: x[1], reverse=True)
|
| 660 |
+
|
| 661 |
+
def get_results_df(self) -> pd.DataFrame:
|
| 662 |
+
"""
|
| 663 |
+
Generate comprehensive results as a pandas DataFrame.
|
| 664 |
+
|
| 665 |
+
This method creates a detailed output table showing raw scores, normalized
|
| 666 |
+
scores, and final utilities for all products. Useful for detailed analysis,
|
| 667 |
+
reporting, and understanding how the calculations work.
|
| 668 |
+
|
| 669 |
+
Returns:
|
| 670 |
+
pd.DataFrame: Results table with columns:
|
| 671 |
+
- Product: product name
|
| 672 |
+
- Utility: final utility score
|
| 673 |
+
- {category}_raw: original score for each category
|
| 674 |
+
- {category}_norm: normalized score for each category
|
| 675 |
+
Sorted by utility score (highest first)
|
| 676 |
+
|
| 677 |
+
Returns empty DataFrame if no products have been added.
|
| 678 |
+
|
| 679 |
+
Usage:
|
| 680 |
+
df = calc.get_results_df()
|
| 681 |
+
df.to_excel('results.xlsx', index=False)
|
| 682 |
+
"""
|
| 683 |
+
# Handle empty case
|
| 684 |
+
if not self.products:
|
| 685 |
+
return pd.DataFrame()
|
| 686 |
+
|
| 687 |
+
# Get calculated values
|
| 688 |
+
utilities = self.calculate_utilities()
|
| 689 |
+
normalized = self.normalize_scores()
|
| 690 |
+
|
| 691 |
+
# Build comprehensive results
|
| 692 |
+
results = []
|
| 693 |
+
for product in self.products:
|
| 694 |
+
# Start with product name and utility
|
| 695 |
+
row = {'Product': product, 'Utility': utilities[product]}
|
| 696 |
+
|
| 697 |
+
# Add raw and normalized scores for each category
|
| 698 |
+
for category in self.categories:
|
| 699 |
+
row[f'{category}_raw'] = self.products[product][category]
|
| 700 |
+
row[f'{category}_norm'] = normalized[product][category]
|
| 701 |
+
|
| 702 |
+
results.append(row)
|
| 703 |
+
|
| 704 |
+
# Return as sorted DataFrame
|
| 705 |
+
return pd.DataFrame(results).sort_values('Utility', ascending=False)
|
| 706 |
+
|
| 707 |
+
def print_summary(self):
|
| 708 |
+
"""
|
| 709 |
+
Print a concise summary of the calculator state and results.
|
| 710 |
+
|
| 711 |
+
This method provides a quick overview for interactive use, showing
|
| 712 |
+
the configuration and current rankings without requiring additional
|
| 713 |
+
data processing or formatting.
|
| 714 |
+
|
| 715 |
+
Output includes:
|
| 716 |
+
- Number and names of evaluation categories
|
| 717 |
+
- Number of products loaded
|
| 718 |
+
- Current product rankings (if any products exist)
|
| 719 |
+
"""
|
| 720 |
+
print(f"\nUtility Calculator Summary")
|
| 721 |
+
print(f"Categories: {', '.join(self.categories)}")
|
| 722 |
+
print(f"Products: {len(self.products)}")
|
| 723 |
+
|
| 724 |
+
# Show rankings if we have products
|
| 725 |
+
if self.products:
|
| 726 |
+
rankings = self.rank_products()
|
| 727 |
+
print(f"\nRankings:")
|
| 728 |
+
for i, (product, utility) in enumerate(rankings, 1):
|
| 729 |
+
print(f" {i}. {product}: {utility:.1f}")
|