Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -422,6 +422,14 @@ def generate_alt_text_for_image(pil_image):
|
|
| 422 |
print("β Gemini model not initialized for alt text generation")
|
| 423 |
return "Image description unavailable"
|
| 424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
# Create a detailed prompt for alt text generation
|
| 426 |
prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
|
| 427 |
|
|
@@ -433,17 +441,36 @@ Focus on:
|
|
| 433 |
|
| 434 |
Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
|
| 435 |
|
| 436 |
-
print(f"π Generating alt text for image...")
|
| 437 |
|
| 438 |
# Generate alt text using Gemini API with proper multimodal input
|
|
|
|
| 439 |
response = model.generate_content([prompt, pil_image])
|
| 440 |
|
|
|
|
|
|
|
|
|
|
| 441 |
if hasattr(response, 'text') and response.text:
|
| 442 |
alt_text = response.text.strip()
|
| 443 |
print(f"β
Alt text generated: {alt_text[:100]}...")
|
| 444 |
else:
|
| 445 |
-
print(f"β No text in response: {response}")
|
| 446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
|
| 448 |
# Clean up the alt text
|
| 449 |
alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')
|
|
|
|
| 422 |
print("β Gemini model not initialized for alt text generation")
|
| 423 |
return "Image description unavailable"
|
| 424 |
|
| 425 |
+
# Debug: Check image format and properties
|
| 426 |
+
print(f"π Image format: {pil_image.format}, mode: {pil_image.mode}, size: {pil_image.size}")
|
| 427 |
+
|
| 428 |
+
# Ensure image is in RGB mode (required for Gemini API)
|
| 429 |
+
if pil_image.mode != 'RGB':
|
| 430 |
+
print(f"Converting image from {pil_image.mode} to RGB")
|
| 431 |
+
pil_image = pil_image.convert('RGB')
|
| 432 |
+
|
| 433 |
# Create a detailed prompt for alt text generation
|
| 434 |
prompt = """You are an accessibility expert creating alt text for images to help visually impaired users understand visual content. Analyze this image and provide a clear, concise description that captures the essential visual information.
|
| 435 |
|
|
|
|
| 441 |
|
| 442 |
Provide a descriptive alt text in 1-2 sentences that is informative but not overly verbose. Start directly with the description without saying "This image shows" or similar phrases."""
|
| 443 |
|
| 444 |
+
print(f"π Generating alt text for image with Gemma 3n...")
|
| 445 |
|
| 446 |
# Generate alt text using Gemini API with proper multimodal input
|
| 447 |
+
# Pass the PIL image directly - Gemini API handles PIL Image objects
|
| 448 |
response = model.generate_content([prompt, pil_image])
|
| 449 |
|
| 450 |
+
print(f"π‘ API response received: {type(response)}")
|
| 451 |
+
print(f"π‘ Response attributes: {dir(response)}")
|
| 452 |
+
|
| 453 |
if hasattr(response, 'text') and response.text:
|
| 454 |
alt_text = response.text.strip()
|
| 455 |
print(f"β
Alt text generated: {alt_text[:100]}...")
|
| 456 |
else:
|
| 457 |
+
print(f"β No text in response. Response: {response}")
|
| 458 |
+
# Try to access response differently
|
| 459 |
+
if hasattr(response, 'candidates') and response.candidates:
|
| 460 |
+
candidate = response.candidates[0]
|
| 461 |
+
if hasattr(candidate, 'content') and candidate.content:
|
| 462 |
+
if hasattr(candidate.content, 'parts') and candidate.content.parts:
|
| 463 |
+
alt_text = candidate.content.parts[0].text.strip()
|
| 464 |
+
print(f"β
Alt text from candidates: {alt_text[:100]}...")
|
| 465 |
+
else:
|
| 466 |
+
print(f"β No parts in content")
|
| 467 |
+
return "Image description unavailable"
|
| 468 |
+
else:
|
| 469 |
+
print(f"β No content in candidate")
|
| 470 |
+
return "Image description unavailable"
|
| 471 |
+
else:
|
| 472 |
+
print(f"β No candidates in response")
|
| 473 |
+
return "Image description unavailable"
|
| 474 |
|
| 475 |
# Clean up the alt text
|
| 476 |
alt_text = alt_text.replace('\n', ' ').replace('\r', ' ')
|