Asmit Nayak commited on
Commit
f784cc0
Β·
1 Parent(s): cc71657

Add debug mode functionality with data loading and mock analysis

Browse files
Files changed (1) hide show
  1. app.py +233 -5
app.py CHANGED
@@ -16,6 +16,12 @@ from py_files import yolo
16
  from py_files import dataset_upload
17
  from py_files.ocr import get_text_from_image_doc
18
 
 
 
 
 
 
 
19
  def take_screenshot_and_process(url, gemini_api_key):
20
  """
21
  Take a screenshot of the provided URL and process it for deceptive pattern detection.
@@ -25,6 +31,26 @@ def take_screenshot_and_process(url, gemini_api_key):
25
  print(f"[CONSOLE] URL: {url}")
26
  print(f"[CONSOLE] Gemini API Key provided: {'Yes' if gemini_api_key else 'No'}")
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  if not url or not (url.startswith("http://") or url.startswith("https://")):
29
  print(f"[CONSOLE] ERROR: Invalid URL format - {url}")
30
  yield (None, "❌ Invalid URL format - please use http:// or https://", None, None)
@@ -687,6 +713,163 @@ def create_annotated_screenshot(image_path, df, eval_dir=None):
687
  return image_path
688
 
689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690
  # Create the Gradio interface
691
  def create_interface():
692
  global scheduler, dataset_dir, jsonl_path
@@ -930,7 +1113,9 @@ def create_interface():
930
  # Detailed results table spanning both columns (full width)
931
  results_dataframe = gr.Dataframe(
932
  label="Detailed Results (Scroll right to see all columns)",
933
- visible=False
 
 
934
  )
935
 
936
  # Download button for results CSV
@@ -1124,9 +1309,10 @@ def create_interface():
1124
  else:
1125
  print(f"[CONSOLE] Warning: Image path not found or invalid: {dataset_image_path}")
1126
  image_df = pd.DataFrame([{"id": save_url, "image": None, "annotated_image": None}])
1127
-
1128
- dataset_upload.update_dataset_with_new_splits(save_dict)
1129
- dataset_upload.update_dataset_with_new_images(image_df, scheduler=scheduler, dataset_dir=dataset_dir, jsonl_path=jsonl_path)
 
1130
 
1131
  # Prepare CSV for download
1132
  csv_file_path = save_results_to_csv(final_result, url)
@@ -1220,7 +1406,49 @@ if __name__ == "__main__":
1220
  print(f"[CONSOLE] Failed to decrypt system prompts, exiting...")
1221
  exit(1)
1222
 
1223
- # Set debug mode, when debug mode is on then we specify a specific split to get the table from and
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1224
 
1225
  print(f"[CONSOLE] ===== STARTING GRADIO APPLICATION =====")
1226
  print(f"[CONSOLE] Creating Gradio interface...")
 
16
  from py_files import dataset_upload
17
  from py_files.ocr import get_text_from_image_doc
18
 
19
+ # Global debug mode variables
20
+ DEBUG_MODE = False
21
+ DEBUG_TABLE_DF = None
22
+ DEBUG_ORIGINAL_IMAGE = None
23
+ DEBUG_ANNOTATED_IMAGE = None
24
+
25
  def take_screenshot_and_process(url, gemini_api_key):
26
  """
27
  Take a screenshot of the provided URL and process it for deceptive pattern detection.
 
31
  print(f"[CONSOLE] URL: {url}")
32
  print(f"[CONSOLE] Gemini API Key provided: {'Yes' if gemini_api_key else 'No'}")
33
 
34
+ # Check if debug mode is enabled
35
+ if DEBUG_MODE:
36
+ print(f"[CONSOLE] ===== DEBUG MODE ENABLED =====")
37
+ print(f"[CONSOLE] [DEBUG MODE] Using pre-loaded debug data instead of actual analysis")
38
+
39
+ # Create temporary directory for debug processing
40
+ eval_dir = tempfile.mkdtemp()
41
+ print(f"[CONSOLE] [DEBUG MODE] Created temporary directory: {eval_dir}")
42
+
43
+ # Use the mock pipeline with debug data
44
+ for result in create_mock_analysis_with_debug_data(
45
+ DEBUG_TABLE_DF,
46
+ DEBUG_ORIGINAL_IMAGE,
47
+ DEBUG_ANNOTATED_IMAGE,
48
+ eval_dir
49
+ ):
50
+ yield result
51
+ return
52
+
53
+ # Normal mode - proceed with regular processing
54
  if not url or not (url.startswith("http://") or url.startswith("https://")):
55
  print(f"[CONSOLE] ERROR: Invalid URL format - {url}")
56
  yield (None, "❌ Invalid URL format - please use http:// or https://", None, None)
 
713
  return image_path
714
 
715
 
716
+ def load_debug_table_data(repo_id, split_name):
717
+ """Load pre-analyzed table from HuggingFace dataset."""
718
+ from datasets import load_dataset
719
+
720
+ print(f"[CONSOLE] [DEBUG MODE] Loading table data from repo: {repo_id}, split: {split_name}")
721
+ try:
722
+ dataset = load_dataset(repo_id, split=split_name)
723
+ df = dataset.to_pandas()
724
+ df = df[["Text", "Element Type", "Top Co-ordinates", "Bottom Co-ordinates", "Font Size", "Background Color", "Font Color", "Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"]]
725
+ print(f"[CONSOLE] [DEBUG MODE] Loaded table with {len(df)} rows")
726
+ return df
727
+ except Exception as e:
728
+ print(f"[CONSOLE] [DEBUG MODE] Error loading table data: {e}")
729
+ # Return a dummy dataframe as fallback
730
+ return pd.DataFrame({
731
+ 'Text': ['Sample Button', 'Sample Checkbox'],
732
+ 'Element Type': ['button', 'checked checkbox'],
733
+ 'Top Co-ordinates': ['(100, 100)', '(200, 200)'],
734
+ 'Bottom Co-ordinates': ['(200, 150)', '(250, 230)'],
735
+ 'Deceptive Design Category': ['forced-action', 'non-deceptive'],
736
+ 'Deceptive Design Subtype': ['obstruction', 'not-applicable']
737
+ })
738
+
739
+
740
+ def load_debug_images(repo_id, image_id):
741
+ """Load original and annotated images from HuggingFace dataset."""
742
+ from datasets import load_dataset
743
+
744
+ print(f"[CONSOLE] [DEBUG MODE] Loading images from repo: {repo_id}, image_id: {image_id}")
745
+ try:
746
+ dataset = load_dataset(repo_id, split='train')
747
+
748
+ # Find the record with matching ID
749
+ for record in dataset:
750
+ if record.get('id') == image_id:
751
+ original_image = record.get('image')
752
+ annotated_image = record.get('annotated')
753
+
754
+ # Save images to temporary files
755
+ original_path = None
756
+ annotated_path = None
757
+
758
+ if original_image:
759
+ temp_original = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
760
+ if hasattr(original_image, 'save'):
761
+ original_image.save(temp_original.name)
762
+ original_path = temp_original.name
763
+ print(f"[CONSOLE] [DEBUG MODE] Original image saved to: {original_path}")
764
+
765
+ if annotated_image:
766
+ temp_annotated = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
767
+ if hasattr(annotated_image, 'save'):
768
+ annotated_image.save(temp_annotated.name)
769
+ annotated_path = temp_annotated.name
770
+ print(f"[CONSOLE] [DEBUG MODE] Annotated image saved to: {annotated_path}")
771
+
772
+ return original_path, annotated_path
773
+
774
+ print(f"[CONSOLE] [DEBUG MODE] Image ID '{image_id}' not found in dataset")
775
+ return None, None
776
+
777
+ except Exception as e:
778
+ print(f"[CONSOLE] [DEBUG MODE] Error loading images: {e}")
779
+ return None, None
780
+
781
+
782
+ def create_mock_analysis_with_debug_data(debug_table_df, debug_original_image, debug_annotated_image, eval_dir):
783
+ """
784
+ Simulate the analysis pipeline using debug data with time delays.
785
+ Yields progress updates like the real function.
786
+ """
787
+ print(f"[CONSOLE] [DEBUG MODE] Starting mock analysis pipeline")
788
+
789
+ try:
790
+ # Create necessary directories
791
+ screenshots_dir = os.path.join(eval_dir, "screenshots")
792
+ ocr_dir = os.path.join(eval_dir, "ocr")
793
+ yolo_dir = os.path.join(eval_dir, "yolo")
794
+ csv_yolo_dir = os.path.join(eval_dir, "csv_with_yolo")
795
+ gemini_fs_dir = os.path.join(eval_dir, "gemini_fs")
796
+
797
+ for d in [screenshots_dir, ocr_dir, yolo_dir, csv_yolo_dir, gemini_fs_dir]:
798
+ os.makedirs(d, exist_ok=True)
799
+
800
+ # Step 1: Taking screenshot
801
+ print(f"[CONSOLE] [DEBUG MODE] STEP 1/6: Mock screenshot capture")
802
+ yield (None, "Step 1/6: Taking screenshot of the website...", None, eval_dir)
803
+ time.sleep(2)
804
+
805
+ # Copy debug original image to screenshots directory
806
+ screenshot_path = os.path.join(screenshots_dir, "screenshot.png")
807
+ if debug_original_image and os.path.exists(debug_original_image):
808
+ shutil.copy(debug_original_image, screenshot_path)
809
+ print(f"[CONSOLE] [DEBUG MODE] Copied original image to: {screenshot_path}")
810
+
811
+ yield (None, "πŸ“· Screenshot captured! Starting analysis...", screenshot_path, eval_dir)
812
+
813
+ # Step 2: Setup directories
814
+ print(f"[CONSOLE] [DEBUG MODE] STEP 2/6: Setting up directories")
815
+ yield (None, "Step 2/6: Setting up processing directories...", screenshot_path, eval_dir)
816
+ time.sleep(0.5)
817
+
818
+ # Step 3: Run OCR (mock)
819
+ print(f"[CONSOLE] [DEBUG MODE] STEP 3/6: Mock OCR analysis")
820
+ yield (None, "Step 3/6: Running OCR analysis...", screenshot_path, eval_dir)
821
+ time.sleep(0.2)
822
+ print(f"[CONSOLE] [DEBUG MODE] Mock OCR completed")
823
+
824
+ # Step 4: Run YOLO (mock)
825
+ print(f"[CONSOLE] [DEBUG MODE] STEP 4/6: Mock YOLO detection")
826
+ yield (None, "Step 4/6: Running YOLO object detection...", screenshot_path, eval_dir)
827
+ time.sleep(1)
828
+ print(f"[CONSOLE] [DEBUG MODE] Mock YOLO completed")
829
+
830
+ # Step 5: Combine results (mock)
831
+ print(f"[CONSOLE] [DEBUG MODE] STEP 5/6: Mock combining results")
832
+ yield (None, "Step 5/6: Combining OCR and element detection results...", screenshot_path, eval_dir)
833
+ time.sleep(0.3)
834
+ print(f"[CONSOLE] [DEBUG MODE] Mock combining completed")
835
+
836
+ # Step 6: Gemini analysis (mock)
837
+ print(f"[CONSOLE] [DEBUG MODE] STEP 6/6: Mock Gemini analysis")
838
+ yield (None, "Step 6/6: Analyzing for deceptive patterns with Gemini...", screenshot_path, eval_dir)
839
+ yield (None, "πŸ”§ Preparing data for Gemini analysis...", screenshot_path, eval_dir)
840
+
841
+ total_elements = len(debug_table_df) if debug_table_df is not None else 0
842
+ yield (None, f"πŸ“Š Processing {total_elements} UI elements for deceptive pattern analysis...", screenshot_path, eval_dir)
843
+ time.sleep(0.4)
844
+
845
+ print(f"[CONSOLE] [DEBUG MODE] Mock Gemini analysis completed")
846
+
847
+ # Return the debug data
848
+ deceptive_count = 0
849
+ if debug_table_df is not None and 'Deceptive Design Category' in debug_table_df.columns:
850
+ deceptive_count = len(debug_table_df[debug_table_df['Deceptive Design Category'].str.lower() != 'non-deceptive'])
851
+
852
+ yield (None, f"πŸ“Š Analysis complete! Found {deceptive_count} deceptive patterns out of {total_elements} UI elements", screenshot_path, eval_dir)
853
+ yield (None, "🎨 Creating annotated screenshot with colored highlights...", screenshot_path, eval_dir)
854
+
855
+ # Use the debug annotated image
856
+ annotated_path = screenshot_path
857
+ if debug_annotated_image and os.path.exists(debug_annotated_image):
858
+ annotated_path = os.path.join(eval_dir, "annotated_screenshot.png")
859
+ shutil.copy(debug_annotated_image, annotated_path)
860
+ print(f"[CONSOLE] [DEBUG MODE] Copied annotated image to: {annotated_path}")
861
+
862
+ status_message = "βœ… Analysis complete! All elements annotated with colored bounding boxes."
863
+ yield (debug_table_df, status_message, annotated_path, eval_dir)
864
+
865
+ print(f"[CONSOLE] [DEBUG MODE] Mock analysis pipeline completed successfully")
866
+
867
+ except Exception as e:
868
+ print(f"[CONSOLE] [DEBUG MODE] Error in mock analysis: {str(e)}")
869
+ yield (None, f"❌ Error in debug mode: {str(e)}", None, eval_dir)
870
+ raise gr.Error(f"Debug mode error: {str(e)}")
871
+
872
+
873
  # Create the Gradio interface
874
  def create_interface():
875
  global scheduler, dataset_dir, jsonl_path
 
1113
  # Detailed results table spanning both columns (full width)
1114
  results_dataframe = gr.Dataframe(
1115
  label="Detailed Results (Scroll right to see all columns)",
1116
+ visible=False,
1117
+ wrap=True,
1118
+ column_widths=["14%", "5%", "8%", "8%", "3%", "11%", "11%", "9%", "9%", "19%"] # First column (Text) gets 15% width, others auto-sized
1119
  )
1120
 
1121
  # Download button for results CSV
 
1309
  else:
1310
  print(f"[CONSOLE] Warning: Image path not found or invalid: {dataset_image_path}")
1311
  image_df = pd.DataFrame([{"id": save_url, "image": None, "annotated_image": None}])
1312
+
1313
+ if not DEBUG_MODE:
1314
+ dataset_upload.update_dataset_with_new_splits(save_dict)
1315
+ dataset_upload.update_dataset_with_new_images(image_df, scheduler=scheduler, dataset_dir=dataset_dir, jsonl_path=jsonl_path)
1316
 
1317
  # Prepare CSV for download
1318
  csv_file_path = save_results_to_csv(final_result, url)
 
1406
  print(f"[CONSOLE] Failed to decrypt system prompts, exiting...")
1407
  exit(1)
1408
 
1409
+ # ===== DEBUG MODE CONFIGURATION =====
1410
+ # Check if debug mode is enabled via environment variable
1411
+ debug_mode_env = os.environ.get("DEBUG_MODE", "false").lower()
1412
+ if debug_mode_env in ["true", "1", "yes", "on"]:
1413
+ DEBUG_MODE = True
1414
+ print(f"[CONSOLE] ===== DEBUG MODE ENABLED =====")
1415
+
1416
+ # Get debug configuration from environment variables
1417
+ debug_table_split = os.environ.get("DEBUG_TABLE_SPLIT", "")
1418
+ debug_image_id = os.environ.get("DEBUG_TABLE_SPLIT", "")
1419
+
1420
+ print(f"[CONSOLE] [DEBUG MODE] Table Split: {debug_table_split}")
1421
+ print(f"[CONSOLE] [DEBUG MODE] Image ID: {debug_image_id}")
1422
+
1423
+ # Load debug data from HuggingFace datasets
1424
+ try:
1425
+ repo_id = os.environ.get("REPO_ID")
1426
+ image_repo_id = os.environ.get("IMAGE_REPO_ID")
1427
+
1428
+ if not repo_id or not image_repo_id:
1429
+ print(f"[CONSOLE] [DEBUG MODE] ERROR: REPO_ID or IMAGE_REPO_ID not set in environment")
1430
+ print(f"[CONSOLE] [DEBUG MODE] REPO_ID: {repo_id}")
1431
+ print(f"[CONSOLE] [DEBUG MODE] IMAGE_REPO_ID: {image_repo_id}")
1432
+ else:
1433
+ print(f"[CONSOLE] [DEBUG MODE] Loading data from REPO_ID: {repo_id}")
1434
+ print(f"[CONSOLE] [DEBUG MODE] Loading images from IMAGE_REPO_ID: {image_repo_id}")
1435
+
1436
+ # Load table data
1437
+ DEBUG_TABLE_DF = load_debug_table_data(repo_id, debug_table_split)
1438
+ print(f"[CONSOLE] [DEBUG MODE] Table loaded: {len(DEBUG_TABLE_DF) if DEBUG_TABLE_DF is not None else 0} rows")
1439
+
1440
+ # Load images
1441
+ DEBUG_ORIGINAL_IMAGE, DEBUG_ANNOTATED_IMAGE = load_debug_images(image_repo_id, debug_image_id)
1442
+ print(f"[CONSOLE] [DEBUG MODE] Original image: {DEBUG_ORIGINAL_IMAGE}")
1443
+ print(f"[CONSOLE] [DEBUG MODE] Annotated image: {DEBUG_ANNOTATED_IMAGE}")
1444
+
1445
+ if DEBUG_TABLE_DF is None or DEBUG_ORIGINAL_IMAGE is None:
1446
+ print(f"[CONSOLE] [DEBUG MODE] WARNING: Failed to load debug data, debug mode may not work correctly")
1447
+ except Exception as e:
1448
+ print(f"[CONSOLE] [DEBUG MODE] ERROR loading debug data: {e}")
1449
+ print(f"[CONSOLE] [DEBUG MODE] Debug mode will use fallback dummy data")
1450
+ else:
1451
+ print(f"[CONSOLE] Debug mode is OFF (set DEBUG_MODE=true to enable)")
1452
 
1453
  print(f"[CONSOLE] ===== STARTING GRADIO APPLICATION =====")
1454
  print(f"[CONSOLE] Creating Gradio interface...")