Spaces:

WIPI
/

DeceptivePatternDetector

Sleeping

App Files Files Community

Asmit Nayak commited on 30 days ago

Commit

f784cc0

1 Parent(s): cc71657

Add debug mode functionality with data loading and mock analysis

Browse files

Files changed (1) hide show

app.py +233 -5

app.py CHANGED Viewed

@@ -16,6 +16,12 @@ from py_files import yolo
 from py_files import dataset_upload
 from py_files.ocr import get_text_from_image_doc
 def take_screenshot_and_process(url, gemini_api_key):
     """
     Take a screenshot of the provided URL and process it for deceptive pattern detection.
@@ -25,6 +31,26 @@ def take_screenshot_and_process(url, gemini_api_key):
     print(f"[CONSOLE] URL: {url}")
     print(f"[CONSOLE] Gemini API Key provided: {'Yes' if gemini_api_key else 'No'}")
     if not url or not (url.startswith("http://") or url.startswith("https://")):
         print(f"[CONSOLE] ERROR: Invalid URL format - {url}")
         yield (None, "❌ Invalid URL format - please use http:// or https://", None, None)
@@ -687,6 +713,163 @@ def create_annotated_screenshot(image_path, df, eval_dir=None):
         return image_path
 # Create the Gradio interface
 def create_interface():
     global scheduler, dataset_dir, jsonl_path
@@ -930,7 +1113,9 @@ def create_interface():
         # Detailed results table spanning both columns (full width)
         results_dataframe = gr.Dataframe(
             label="Detailed Results (Scroll right to see all columns)",
-            visible=False
         )
         # Download button for results CSV
@@ -1124,9 +1309,10 @@ def create_interface():
                     else:
                         print(f"[CONSOLE] Warning: Image path not found or invalid: {dataset_image_path}")
                         image_df = pd.DataFrame([{"id": save_url, "image": None, "annotated_image": None}])
-                    dataset_upload.update_dataset_with_new_splits(save_dict)
-                    dataset_upload.update_dataset_with_new_images(image_df, scheduler=scheduler, dataset_dir=dataset_dir, jsonl_path=jsonl_path)
                     # Prepare CSV for download
                     csv_file_path = save_results_to_csv(final_result, url)
@@ -1220,7 +1406,49 @@ if __name__ == "__main__":
             print(f"[CONSOLE] Failed to decrypt system prompts, exiting...")
             exit(1)
-    # Set debug mode, when debug mode is on then we specify a specific split to get the table from and
     print(f"[CONSOLE] ===== STARTING GRADIO APPLICATION =====")
     print(f"[CONSOLE] Creating Gradio interface...")

 from py_files import dataset_upload
 from py_files.ocr import get_text_from_image_doc
+# Global debug mode variables
+DEBUG_MODE = False
+DEBUG_TABLE_DF = None
+DEBUG_ORIGINAL_IMAGE = None
+DEBUG_ANNOTATED_IMAGE = None
 def take_screenshot_and_process(url, gemini_api_key):
     """
     Take a screenshot of the provided URL and process it for deceptive pattern detection.
     print(f"[CONSOLE] URL: {url}")
     print(f"[CONSOLE] Gemini API Key provided: {'Yes' if gemini_api_key else 'No'}")
+    # Check if debug mode is enabled
+    if DEBUG_MODE:
+        print(f"[CONSOLE] ===== DEBUG MODE ENABLED =====")
+        print(f"[CONSOLE] [DEBUG MODE] Using pre-loaded debug data instead of actual analysis")
+        # Create temporary directory for debug processing
+        eval_dir = tempfile.mkdtemp()
+        print(f"[CONSOLE] [DEBUG MODE] Created temporary directory: {eval_dir}")
+        # Use the mock pipeline with debug data
+        for result in create_mock_analysis_with_debug_data(
+            DEBUG_TABLE_DF,
+            DEBUG_ORIGINAL_IMAGE,
+            DEBUG_ANNOTATED_IMAGE,
+            eval_dir
+        ):
+            yield result
+        return
+    # Normal mode - proceed with regular processing
     if not url or not (url.startswith("http://") or url.startswith("https://")):
         print(f"[CONSOLE] ERROR: Invalid URL format - {url}")
         yield (None, "❌ Invalid URL format - please use http:// or https://", None, None)
         return image_path
+def load_debug_table_data(repo_id, split_name):
+    """Load pre-analyzed table from HuggingFace dataset."""
+    from datasets import load_dataset
+    print(f"[CONSOLE] [DEBUG MODE] Loading table data from repo: {repo_id}, split: {split_name}")
+    try:
+        dataset = load_dataset(repo_id, split=split_name)
+        df = dataset.to_pandas()
+        df = df[["Text", "Element Type", "Top Co-ordinates", "Bottom Co-ordinates", "Font Size", "Background Color", "Font Color", "Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"]]
+        print(f"[CONSOLE] [DEBUG MODE] Loaded table with {len(df)} rows")
+        return df
+    except Exception as e:
+        print(f"[CONSOLE] [DEBUG MODE] Error loading table data: {e}")
+        # Return a dummy dataframe as fallback
+        return pd.DataFrame({
+            'Text': ['Sample Button', 'Sample Checkbox'],
+            'Element Type': ['button', 'checked checkbox'],
+            'Top Co-ordinates': ['(100, 100)', '(200, 200)'],
+            'Bottom Co-ordinates': ['(200, 150)', '(250, 230)'],
+            'Deceptive Design Category': ['forced-action', 'non-deceptive'],
+            'Deceptive Design Subtype': ['obstruction', 'not-applicable']
+        })
+def load_debug_images(repo_id, image_id):
+    """Load original and annotated images from HuggingFace dataset."""
+    from datasets import load_dataset
+    print(f"[CONSOLE] [DEBUG MODE] Loading images from repo: {repo_id}, image_id: {image_id}")
+    try:
+        dataset = load_dataset(repo_id, split='train')
+        # Find the record with matching ID
+        for record in dataset:
+            if record.get('id') == image_id:
+                original_image = record.get('image')
+                annotated_image = record.get('annotated')
+                # Save images to temporary files
+                original_path = None
+                annotated_path = None
+                if original_image:
+                    temp_original = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
+                    if hasattr(original_image, 'save'):
+                        original_image.save(temp_original.name)
+                    original_path = temp_original.name
+                    print(f"[CONSOLE] [DEBUG MODE] Original image saved to: {original_path}")
+                if annotated_image:
+                    temp_annotated = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
+                    if hasattr(annotated_image, 'save'):
+                        annotated_image.save(temp_annotated.name)
+                    annotated_path = temp_annotated.name
+                    print(f"[CONSOLE] [DEBUG MODE] Annotated image saved to: {annotated_path}")
+                return original_path, annotated_path
+        print(f"[CONSOLE] [DEBUG MODE] Image ID '{image_id}' not found in dataset")
+        return None, None
+    except Exception as e:
+        print(f"[CONSOLE] [DEBUG MODE] Error loading images: {e}")
+        return None, None
+def create_mock_analysis_with_debug_data(debug_table_df, debug_original_image, debug_annotated_image, eval_dir):
+    """
+    Simulate the analysis pipeline using debug data with time delays.
+    Yields progress updates like the real function.
+    """
+    print(f"[CONSOLE] [DEBUG MODE] Starting mock analysis pipeline")
+    try:
+        # Create necessary directories
+        screenshots_dir = os.path.join(eval_dir, "screenshots")
+        ocr_dir = os.path.join(eval_dir, "ocr")
+        yolo_dir = os.path.join(eval_dir, "yolo")
+        csv_yolo_dir = os.path.join(eval_dir, "csv_with_yolo")
+        gemini_fs_dir = os.path.join(eval_dir, "gemini_fs")
+        for d in [screenshots_dir, ocr_dir, yolo_dir, csv_yolo_dir, gemini_fs_dir]:
+            os.makedirs(d, exist_ok=True)
+        # Step 1: Taking screenshot
+        print(f"[CONSOLE] [DEBUG MODE] STEP 1/6: Mock screenshot capture")
+        yield (None, "Step 1/6: Taking screenshot of the website...", None, eval_dir)
+        time.sleep(2)
+        # Copy debug original image to screenshots directory
+        screenshot_path = os.path.join(screenshots_dir, "screenshot.png")
+        if debug_original_image and os.path.exists(debug_original_image):
+            shutil.copy(debug_original_image, screenshot_path)
+            print(f"[CONSOLE] [DEBUG MODE] Copied original image to: {screenshot_path}")
+        yield (None, "📷 Screenshot captured! Starting analysis...", screenshot_path, eval_dir)
+        # Step 2: Setup directories
+        print(f"[CONSOLE] [DEBUG MODE] STEP 2/6: Setting up directories")
+        yield (None, "Step 2/6: Setting up processing directories...", screenshot_path, eval_dir)
+        time.sleep(0.5)
+        # Step 3: Run OCR (mock)
+        print(f"[CONSOLE] [DEBUG MODE] STEP 3/6: Mock OCR analysis")
+        yield (None, "Step 3/6: Running OCR analysis...", screenshot_path, eval_dir)
+        time.sleep(0.2)
+        print(f"[CONSOLE] [DEBUG MODE] Mock OCR completed")
+        # Step 4: Run YOLO (mock)
+        print(f"[CONSOLE] [DEBUG MODE] STEP 4/6: Mock YOLO detection")
+        yield (None, "Step 4/6: Running YOLO object detection...", screenshot_path, eval_dir)
+        time.sleep(1)
+        print(f"[CONSOLE] [DEBUG MODE] Mock YOLO completed")
+        # Step 5: Combine results (mock)
+        print(f"[CONSOLE] [DEBUG MODE] STEP 5/6: Mock combining results")
+        yield (None, "Step 5/6: Combining OCR and element detection results...", screenshot_path, eval_dir)
+        time.sleep(0.3)
+        print(f"[CONSOLE] [DEBUG MODE] Mock combining completed")
+        # Step 6: Gemini analysis (mock)
+        print(f"[CONSOLE] [DEBUG MODE] STEP 6/6: Mock Gemini analysis")
+        yield (None, "Step 6/6: Analyzing for deceptive patterns with Gemini...", screenshot_path, eval_dir)
+        yield (None, "🔧 Preparing data for Gemini analysis...", screenshot_path, eval_dir)
+        total_elements = len(debug_table_df) if debug_table_df is not None else 0
+        yield (None, f"📊 Processing {total_elements} UI elements for deceptive pattern analysis...", screenshot_path, eval_dir)
+        time.sleep(0.4)
+        print(f"[CONSOLE] [DEBUG MODE] Mock Gemini analysis completed")
+        # Return the debug data
+        deceptive_count = 0
+        if debug_table_df is not None and 'Deceptive Design Category' in debug_table_df.columns:
+            deceptive_count = len(debug_table_df[debug_table_df['Deceptive Design Category'].str.lower() != 'non-deceptive'])
+        yield (None, f"📊 Analysis complete! Found {deceptive_count} deceptive patterns out of {total_elements} UI elements", screenshot_path, eval_dir)
+        yield (None, "🎨 Creating annotated screenshot with colored highlights...", screenshot_path, eval_dir)
+        # Use the debug annotated image
+        annotated_path = screenshot_path
+        if debug_annotated_image and os.path.exists(debug_annotated_image):
+            annotated_path = os.path.join(eval_dir, "annotated_screenshot.png")
+            shutil.copy(debug_annotated_image, annotated_path)
+            print(f"[CONSOLE] [DEBUG MODE] Copied annotated image to: {annotated_path}")
+        status_message = "✅ Analysis complete! All elements annotated with colored bounding boxes."
+        yield (debug_table_df, status_message, annotated_path, eval_dir)
+        print(f"[CONSOLE] [DEBUG MODE] Mock analysis pipeline completed successfully")
+    except Exception as e:
+        print(f"[CONSOLE] [DEBUG MODE] Error in mock analysis: {str(e)}")
+        yield (None, f"❌ Error in debug mode: {str(e)}", None, eval_dir)
+        raise gr.Error(f"Debug mode error: {str(e)}")
 # Create the Gradio interface
 def create_interface():
     global scheduler, dataset_dir, jsonl_path
         # Detailed results table spanning both columns (full width)
         results_dataframe = gr.Dataframe(
             label="Detailed Results (Scroll right to see all columns)",
+            visible=False,
+            wrap=True,
+            column_widths=["14%", "5%", "8%", "8%", "3%", "11%", "11%", "9%", "9%", "19%"]  # First column (Text) gets 15% width, others auto-sized
         )
         # Download button for results CSV
                     else:
                         print(f"[CONSOLE] Warning: Image path not found or invalid: {dataset_image_path}")
                         image_df = pd.DataFrame([{"id": save_url, "image": None, "annotated_image": None}])
+                    if not DEBUG_MODE:
+                        dataset_upload.update_dataset_with_new_splits(save_dict)
+                        dataset_upload.update_dataset_with_new_images(image_df, scheduler=scheduler, dataset_dir=dataset_dir, jsonl_path=jsonl_path)
                     # Prepare CSV for download
                     csv_file_path = save_results_to_csv(final_result, url)
             print(f"[CONSOLE] Failed to decrypt system prompts, exiting...")
             exit(1)
+    # ===== DEBUG MODE CONFIGURATION =====
+    # Check if debug mode is enabled via environment variable
+    debug_mode_env = os.environ.get("DEBUG_MODE", "false").lower()
+    if debug_mode_env in ["true", "1", "yes", "on"]:
+        DEBUG_MODE = True
+        print(f"[CONSOLE] ===== DEBUG MODE ENABLED =====")
+        # Get debug configuration from environment variables
+        debug_table_split = os.environ.get("DEBUG_TABLE_SPLIT", "")
+        debug_image_id = os.environ.get("DEBUG_TABLE_SPLIT", "")
+        print(f"[CONSOLE] [DEBUG MODE] Table Split: {debug_table_split}")
+        print(f"[CONSOLE] [DEBUG MODE] Image ID: {debug_image_id}")
+        # Load debug data from HuggingFace datasets
+        try:
+            repo_id = os.environ.get("REPO_ID")
+            image_repo_id = os.environ.get("IMAGE_REPO_ID")
+            if not repo_id or not image_repo_id:
+                print(f"[CONSOLE] [DEBUG MODE] ERROR: REPO_ID or IMAGE_REPO_ID not set in environment")
+                print(f"[CONSOLE] [DEBUG MODE] REPO_ID: {repo_id}")
+                print(f"[CONSOLE] [DEBUG MODE] IMAGE_REPO_ID: {image_repo_id}")
+            else:
+                print(f"[CONSOLE] [DEBUG MODE] Loading data from REPO_ID: {repo_id}")
+                print(f"[CONSOLE] [DEBUG MODE] Loading images from IMAGE_REPO_ID: {image_repo_id}")
+                # Load table data
+                DEBUG_TABLE_DF = load_debug_table_data(repo_id, debug_table_split)
+                print(f"[CONSOLE] [DEBUG MODE] Table loaded: {len(DEBUG_TABLE_DF) if DEBUG_TABLE_DF is not None else 0} rows")
+                # Load images
+                DEBUG_ORIGINAL_IMAGE, DEBUG_ANNOTATED_IMAGE = load_debug_images(image_repo_id, debug_image_id)
+                print(f"[CONSOLE] [DEBUG MODE] Original image: {DEBUG_ORIGINAL_IMAGE}")
+                print(f"[CONSOLE] [DEBUG MODE] Annotated image: {DEBUG_ANNOTATED_IMAGE}")
+                if DEBUG_TABLE_DF is None or DEBUG_ORIGINAL_IMAGE is None:
+                    print(f"[CONSOLE] [DEBUG MODE] WARNING: Failed to load debug data, debug mode may not work correctly")
+        except Exception as e:
+            print(f"[CONSOLE] [DEBUG MODE] ERROR loading debug data: {e}")
+            print(f"[CONSOLE] [DEBUG MODE] Debug mode will use fallback dummy data")
+    else:
+        print(f"[CONSOLE] Debug mode is OFF (set DEBUG_MODE=true to enable)")
     print(f"[CONSOLE] ===== STARTING GRADIO APPLICATION =====")
     print(f"[CONSOLE] Creating Gradio interface...")