Spaces:
Build error
Build error
Commit
·
26c12e7
1
Parent(s):
125e1ba
Update app.py
Browse files
app.py
CHANGED
|
@@ -51,7 +51,6 @@ def compute_itm_score(image, statement):
|
|
| 51 |
score = itm_scores[:, 1].item()
|
| 52 |
logging.info('Finished compute_itm_score')
|
| 53 |
return score
|
| 54 |
-
|
| 55 |
def generate_caption(processor, model, image):
|
| 56 |
logging.info('Starting generate_caption')
|
| 57 |
inputs = processor(images=image, return_tensors="pt").to(device)
|
|
@@ -74,7 +73,7 @@ def save_dataframe_to_csv(df):
|
|
| 74 |
return temp_file_path
|
| 75 |
|
| 76 |
# Main function to perform image captioning and image-text matching
|
| 77 |
-
def process_images_and_statements(image):
|
| 78 |
logging.info('Starting process_images_and_statements')
|
| 79 |
|
| 80 |
# Generate image caption for the uploaded image using git-large-r-textcaps
|
|
@@ -99,10 +98,11 @@ def process_images_and_statements(image):
|
|
| 99 |
final_score = ((weight_textual_similarity * textual_similarity_score) +
|
| 100 |
(weight_statement * itm_score_statement))
|
| 101 |
|
| 102 |
-
# Append the result to the results_list
|
| 103 |
results_list.append({
|
|
|
|
| 104 |
'Statement': statement,
|
| 105 |
-
'Generated Caption': caption,
|
| 106 |
'Textual Similarity Score': f"{textual_similarity_score:.2f}%", # Format as percentage with two decimal places
|
| 107 |
'ITM Score': f"{itm_score_statement:.2f}%", # Format as percentage with two decimal places
|
| 108 |
'Final Combined Score': f"{final_score:.2f}%" # Format as percentage with two decimal places
|
|
@@ -119,18 +119,24 @@ def process_images_and_statements(image):
|
|
| 119 |
# Return both the DataFrame and the CSV data for the Gradio interface
|
| 120 |
return results_df, csv_results # <--- Return results_df and csv_results
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# Gradio interface
|
| 123 |
image_input = gr.inputs.Image()
|
|
|
|
| 124 |
output_df = gr.outputs.Dataframe(type="pandas", label="Results")
|
| 125 |
output_csv = gr.outputs.File(label="Download CSV")
|
| 126 |
|
| 127 |
iface = gr.Interface(
|
| 128 |
-
fn=
|
| 129 |
-
inputs=image_input,
|
| 130 |
-
outputs=[output_df, output_csv],
|
| 131 |
title="Image Captioning and Image-Text Matching",
|
| 132 |
theme='sudeepshouche/minimalist',
|
| 133 |
css=".output { flex-direction: column; } .output .outputs { width: 100%; }" # Custom CSS
|
| 134 |
)
|
| 135 |
|
| 136 |
-
|
|
|
|
|
|
| 51 |
score = itm_scores[:, 1].item()
|
| 52 |
logging.info('Finished compute_itm_score')
|
| 53 |
return score
|
|
|
|
| 54 |
def generate_caption(processor, model, image):
|
| 55 |
logging.info('Starting generate_caption')
|
| 56 |
inputs = processor(images=image, return_tensors="pt").to(device)
|
|
|
|
| 73 |
return temp_file_path
|
| 74 |
|
| 75 |
# Main function to perform image captioning and image-text matching
|
| 76 |
+
def process_images_and_statements(image, filename):
|
| 77 |
logging.info('Starting process_images_and_statements')
|
| 78 |
|
| 79 |
# Generate image caption for the uploaded image using git-large-r-textcaps
|
|
|
|
| 98 |
final_score = ((weight_textual_similarity * textual_similarity_score) +
|
| 99 |
(weight_statement * itm_score_statement))
|
| 100 |
|
| 101 |
+
# Append the result to the results_list, including the image filename
|
| 102 |
results_list.append({
|
| 103 |
+
'Image Filename': filename, # Add the image filename to the output
|
| 104 |
'Statement': statement,
|
| 105 |
+
'Generated Caption': caption,
|
| 106 |
'Textual Similarity Score': f"{textual_similarity_score:.2f}%", # Format as percentage with two decimal places
|
| 107 |
'ITM Score': f"{itm_score_statement:.2f}%", # Format as percentage with two decimal places
|
| 108 |
'Final Combined Score': f"{final_score:.2f}%" # Format as percentage with two decimal places
|
|
|
|
| 119 |
# Return both the DataFrame and the CSV data for the Gradio interface
|
| 120 |
return results_df, csv_results # <--- Return results_df and csv_results
|
| 121 |
|
| 122 |
+
# Define a function to handle the user's input
|
| 123 |
+
def handle_input(image, filename):
|
| 124 |
+
# Call the process_images_and_statements function with the image and filename
|
| 125 |
+
return process_images_and_statements(image, filename.name)
|
| 126 |
# Gradio interface
|
| 127 |
image_input = gr.inputs.Image()
|
| 128 |
+
file_input = gr.inputs.File() # Add an input element to allow users to provide the image filename
|
| 129 |
output_df = gr.outputs.Dataframe(type="pandas", label="Results")
|
| 130 |
output_csv = gr.outputs.File(label="Download CSV")
|
| 131 |
|
| 132 |
iface = gr.Interface(
|
| 133 |
+
fn=handle_input, # Use the handle_input function to handle user input
|
| 134 |
+
inputs=[image_input, file_input], # Include both the image and filename inputs
|
| 135 |
+
outputs=[output_df, output_csv],
|
| 136 |
title="Image Captioning and Image-Text Matching",
|
| 137 |
theme='sudeepshouche/minimalist',
|
| 138 |
css=".output { flex-direction: column; } .output .outputs { width: 100%; }" # Custom CSS
|
| 139 |
)
|
| 140 |
|
| 141 |
+
# Launch the Gradio interface
|
| 142 |
+
iface.launch()
|