Spaces:

danielrosehill
/

SATINT-Analyst

Running

App Files Files Community

SATINT-Analyst / app.py

danielrosehill

Fix annotation mode and HuggingFace push issues

ea95d12 29 days ago

raw

history blame

15.3 kB

	import gradio as gr
	import os
	from PIL import Image, ImageDraw, ImageFont
	import io
	import base64
	from openai import OpenAI
	import re
	import json

	def encode_image(image):
	"""Convert PIL Image to base64 string for API"""
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG", quality=95)
	return base64.b64encode(buffered.getvalue()).decode('utf-8')

	def draw_annotations(image, annotations):
	"""
	Draw numbered annotations on the image

	Args:
	image: PIL Image object
	annotations: List of dicts with 'x', 'y', 'label' keys (coordinates are 0-1 normalized)

	Returns:
	PIL Image with annotations drawn
	"""
	# Create a copy to avoid modifying original
	img_copy = image.copy()
	draw = ImageDraw.Draw(img_copy)

	# Get image dimensions
	width, height = img_copy.size

	# Try to load a better font, fall back to default if not available
	try:
	font_size = max(20, min(width, height) // 40)
	font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
	except:
	font = ImageFont.load_default()

	# Draw each annotation
	for i, ann in enumerate(annotations, 1):
	# Convert normalized coordinates to pixel coordinates
	x = int(ann['x'] * width)
	y = int(ann['y'] * height)

	# Circle radius based on image size
	radius = max(15, min(width, height) // 80)

	# Draw outer circle (white border)
	draw.ellipse(
	[(x - radius - 2, y - radius - 2), (x + radius + 2, y + radius + 2)],
	fill='white',
	outline='white'
	)

	# Draw inner circle (red)
	draw.ellipse(
	[(x - radius, y - radius), (x + radius, y + radius)],
	fill='red',
	outline='white',
	width=2
	)

	# Draw number
	number_text = str(i)
	# Get text bounding box for centering
	bbox = draw.textbbox((0, 0), number_text, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]

	# Draw text centered in circle
	text_x = x - text_width // 2
	text_y = y - text_height // 2
	draw.text((text_x, text_y), number_text, fill='white', font=font)

	return img_copy

	def analyze_satellite_image(image, geolocation, brief, analysis_mode, api_key):
	"""
	Analyze satellite imagery using Meta Llama Vision via OpenRouter

	Args:
	image: PIL Image object
	geolocation: String with coordinates in decimal notation
	brief: User's analysis requirements and context
	analysis_mode: "text_only" or "annotated"
	api_key: OpenRouter API key
	"""
	if not api_key:
	return "Please provide your OpenRouter API key to proceed.", None

	if not image:
	return "Please upload a satellite image.", None

	try:
	# Use OpenRouter for Meta Llama 3.2 Vision
	client = OpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key=api_key
	)

	# Base system prompt for SATINT analyst role
	system_prompt = """You are a seasoned satellite imagery intelligence (SATINT) analyst with decades of experience in analyzing overhead reconnaissance imagery. Your role is to provide objective, professional analysis of satellite imagery.

	Your analysis should be:
	- Non-emotional and factual
	- Precise in describing observable features
	- Professional in tone and terminology
	- Comprehensive yet concise
	- Based solely on what can be observed in the imagery

	When geolocation is provided, incorporate geographical and contextual knowledge to enhance your analysis. Consider terrain, climate, regional characteristics, and typical infrastructure patterns for that location.

	When a brief is provided, tailor your analysis to address the specific requirements while maintaining professional objectivity."""

	# Prepare the user message based on analysis mode
	user_message_parts = []

	# Add geolocation context if provided
	location_context = ""
	if geolocation and geolocation.strip():
	location_context = f"\n\nGEOLOCATION: {geolocation} (decimal notation)"

	# Add brief context if provided
	brief_context = ""
	if brief and brief.strip():
	brief_context = f"\n\nANALYSIS BRIEF: {brief}"

	if analysis_mode == "text_only":
	instruction = f"""Analyze this satellite image and provide a professional intelligence assessment.{location_context}{brief_context}

	Provide your analysis in a structured format covering:
	1. Overview and general observations
	2. Key features and infrastructure identified
	3. Notable patterns or anomalies
	4. Assessment and implications (if relevant to the brief)"""

	else: # annotated mode
	instruction = f"""Analyze this satellite image and provide a professional intelligence assessment with annotations.{location_context}{brief_context}

	You MUST format your response in TWO sections:

	SECTION 1 - ANNOTATIONS (JSON):
	Provide a JSON array of annotation points. Each point should have:
	- "x": horizontal position (0.0 to 1.0, where 0.0 is left edge, 1.0 is right edge)
	- "y": vertical position (0.0 to 1.0, where 0.0 is top edge, 1.0 is bottom edge)
	- "label": brief description of the feature

	Start this section with exactly: ANNOTATIONS:
	Then provide valid JSON on the next line.

	Example format:
	ANNOTATIONS:
	[
	{{"x": 0.25, "y": 0.35, "label": "Military installation"}},
	{{"x": 0.75, "y": 0.60, "label": "Vehicle staging area"}}
	]

	SECTION 2 - ANALYSIS:
	Provide your detailed analysis referencing the numbered annotations (1, 2, 3, etc.) that will be drawn on the image:
	1. Key features identified (reference annotation numbers)
	2. Overview and general observations
	3. Notable patterns or anomalies
	4. Assessment and implications (if relevant to the brief)

	Remember: The annotations will be numbered automatically in the order you list them."""

	# Encode image
	image_data = encode_image(image)

	# Make API call to Llama 3.2 Vision via OpenRouter
	response = client.chat.completions.create(
	model="meta-llama/llama-3.2-90b-vision-instruct",
	messages=[
	{
	"role": "system",
	"content": system_prompt
	},
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": instruction
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{image_data}"
	}
	}
	]
	}
	],
	max_tokens=4096,
	temperature=0.7
	)

	analysis_text = response.choices[0].message.content

	# For annotated mode, parse annotations and draw on image
	if analysis_mode == "annotated":
	try:
	# Extract annotations JSON from response
	annotations = []
	annotated_image = image

	# Look for ANNOTATIONS: section
	if "ANNOTATIONS:" not in analysis_text:
	# Model didn't provide annotations section
	error_msg = """
	⚠️ ANNOTATION MODE ERROR

	The AI model did not provide an ANNOTATIONS: section in its response.
	This means it didn't follow the annotation format instructions.

	Try again, or use Text Only mode for standard analysis.

	---

	AI Response:

	"""
	return error_msg + analysis_text, image

	if "ANNOTATIONS:" in analysis_text:
	# Extract the JSON part
	parts = analysis_text.split("ANNOTATIONS:")
	if len(parts) > 1:
	json_part = parts[1].split("SECTION 2")[0].strip()
	# Also try splitting by "ANALYSIS:" if SECTION 2 not found
	if "ANALYSIS:" in json_part:
	json_part = json_part.split("ANALYSIS:")[0].strip()

	# Try to extract JSON array
	json_match = re.search(r'\[.*?\]', json_part, re.DOTALL)
	if json_match:
	json_str = json_match.group(0)
	annotations = json.loads(json_str)

	# Draw annotations on image
	if annotations and len(annotations) > 0:
	annotated_image = draw_annotations(image, annotations)

	# Add annotation count to the analysis
	annotation_count_msg = f"\n\n✓ {len(annotations)} annotation(s) marked on image\n\n"

	# Clean up the analysis text to remove JSON section
	# Keep only the analysis part
	if "ANALYSIS:" in analysis_text:
	analysis_text = annotation_count_msg + "ANALYSIS:\n" + analysis_text.split("ANALYSIS:")[1].strip()
	elif "SECTION 2" in analysis_text:
	analysis_text = annotation_count_msg + analysis_text.split("SECTION 2")[1].strip()
	if analysis_text.startswith("- ANALYSIS:"):
	analysis_text = analysis_text[12:].strip()
	else:
	# Annotations array was empty
	analysis_text = "\n\n⚠️ No annotations provided by AI model\n\n" + analysis_text

	return analysis_text, annotated_image

	except Exception as e:
	# If annotation parsing fails, return original image with a detailed note
	error_msg = f"""
	⚠️ ANNOTATION MODE ERROR

	The AI model's response could not be parsed for annotations. This usually happens when:
	- The model doesn't return properly formatted JSON
	- The ANNOTATIONS: section is missing or malformed
	- The coordinate values are invalid

	Error details: {str(e)}

	---

	Original AI Response:

	{analysis_text}
	"""
	return error_msg, image
	else:
	return analysis_text, None

	except Exception as e:
	if "authentication" in str(e).lower() or "unauthorized" in str(e).lower():
	return "Authentication failed. Please check your OpenRouter API key.", None
	return f"Error during analysis: {str(e)}", None

	# Create Gradio interface
	with gr.Blocks(title="SATINT Analyst - Satellite Imagery Analysis", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🛰️ SATINT Analyst
	### Professional Satellite Imagery Intelligence Analysis

	Upload satellite imagery and receive professional intelligence analysis from an AI-powered SATINT analyst.
	Powered by Meta Llama 3.2 Vision (90B) for uncensored, objective analysis.

	Note: This application requires your own OpenRouter API key (BYOK - Bring Your Own Key).
	Get your API key at [openrouter.ai](https://openrouter.ai/keys)
	""")

	with gr.Row():
	with gr.Column(scale=1):
	api_key_input = gr.Textbox(
	label="OpenRouter API Key",
	placeholder="sk-or-v1-...",
	type="password",
	info="Your API key is only used for this session and is not stored."
	)

	image_input = gr.Image(
	label="Upload Satellite Image",
	type="pil",
	height=400
	)

	geolocation_input = gr.Textbox(
	label="Geolocation (Optional)",
	placeholder="e.g., 38.8977, -77.0365 (decimal notation: latitude, longitude)",
	info="Provide coordinates in decimal format for enhanced contextual analysis"
	)

	brief_input = gr.Textbox(
	label="Analysis Brief",
	placeholder="Describe what you want analyzed (e.g., 'Identify infrastructure changes', 'Assess military installations', 'Evaluate agricultural land use')",
	lines=3,
	info="Provide context and specific requirements for the analysis"
	)

	analysis_mode = gr.Radio(
	choices=["text_only", "annotated"],
	value="text_only",
	label="Analysis Mode",
	info="Text Only: Written analysis only \| Annotated: Analysis with numbered markers drawn on the image"
	)

	analyze_btn = gr.Button("Analyze Imagery", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### Intelligence Analysis")
	with gr.Row():
	copy_btn = gr.Button("📋 Copy to Clipboard", size="sm", scale=0)
	analysis_output = gr.Markdown(
	value="Analysis will appear here...",
	height=600,
	elem_classes="analysis-box"
	)
	# Hidden textbox to hold raw text for copying
	analysis_text_raw = gr.Textbox(visible=False)

	annotated_output = gr.Image(
	label="Annotated Image",
	visible=True
	)

	gr.Markdown("""
	---
	### Usage Tips
	- Geolocation: Use decimal notation (e.g., 38.8977, -77.0365) for latitude and longitude
	- Brief: Provide specific questions or focus areas for more targeted analysis
	- Text Only Mode: Receive a detailed written analysis with markdown formatting
	- Annotated Mode: Receive analysis with numbered annotations drawn on the image referencing key features
	- Copy Button: Click the clipboard button to copy the analysis text

	### Privacy & Model
	- Model: Meta Llama 3.2 Vision 90B (via OpenRouter)
	- Your API key is used only for this session and is not stored
	- Images are processed through OpenRouter's API
	- Get your OpenRouter API key at [openrouter.ai/keys](https://openrouter.ai/keys)
	""")

	# Set up the analyze button
	def process_analysis(image, geolocation, brief, analysis_mode, api_key):
	"""Wrapper to return results for both markdown and raw text"""
	text, img = analyze_satellite_image(image, geolocation, brief, analysis_mode, api_key)
	return text, text, img # markdown display, raw text for copying, image

	analyze_btn.click(
	fn=process_analysis,
	inputs=[image_input, geolocation_input, brief_input, analysis_mode, api_key_input],
	outputs=[analysis_output, analysis_text_raw, annotated_output]
	)

	# Set up copy button to copy from hidden textbox
	copy_btn.click(
	fn=lambda x: x,
	inputs=[analysis_text_raw],
	outputs=[],
	js="(text) => {navigator.clipboard.writeText(text); return text;}"
	)

	if __name__ == "__main__":
	demo.launch()