Spaces:
Running
Running
hyzhang00
commited on
Commit
·
c343f53
1
Parent(s):
f18fd81
update on code
Browse files- app.py +27 -34
- {caption_anything → backend/caption_anything}/__init__.py +0 -0
- {caption_anything → backend/caption_anything}/captioner/README.md +0 -0
- {caption_anything → backend/caption_anything}/captioner/__init__.py +0 -0
- {caption_anything → backend/caption_anything}/captioner/base_captioner.py +0 -0
- {caption_anything → backend/caption_anything}/captioner/blip.py +0 -0
- {caption_anything → backend/caption_anything}/captioner/blip2.py +0 -0
- {caption_anything → backend/caption_anything}/captioner/git.py +0 -0
- {caption_anything → backend/caption_anything}/captioner/modeling_blip.py +0 -0
- {caption_anything → backend/caption_anything}/captioner/modeling_git.py +0 -0
- {caption_anything → backend/caption_anything}/captioner/vit_pixel_masks_utils.py +0 -0
- {caption_anything → backend/caption_anything}/model.py +0 -0
- {caption_anything → backend/caption_anything}/segmenter/__init__.py +0 -0
- {caption_anything → backend/caption_anything}/segmenter/base_segmenter.py +0 -0
- {caption_anything → backend/caption_anything}/segmenter/readme.md +0 -0
- {caption_anything → backend/caption_anything}/text_refiner/README.md +0 -0
- {caption_anything → backend/caption_anything}/text_refiner/__init__.py +0 -0
- {caption_anything → backend/caption_anything}/text_refiner/text_refiner.py +0 -0
- {caption_anything → backend/caption_anything}/utils/chatbot.py +0 -0
- {caption_anything → backend/caption_anything}/utils/densecap_painter.py +0 -0
- {caption_anything → backend/caption_anything}/utils/image_editing_utils.py +0 -0
- {caption_anything → backend/caption_anything}/utils/parser.py +0 -0
- {caption_anything → backend/caption_anything}/utils/utils.py +0 -0
app.py
CHANGED
|
@@ -11,11 +11,11 @@ from PIL import Image
|
|
| 11 |
import emoji
|
| 12 |
from langchain_community.chat_models import ChatOpenAI
|
| 13 |
from langchain.schema import HumanMessage
|
| 14 |
-
from caption_anything.model import CaptionAnything
|
| 15 |
-
from caption_anything.utils.utils import mask_painter, seg_model_map, prepare_segmenter, image_resize
|
| 16 |
-
from caption_anything.utils.parser import parse_augment
|
| 17 |
-
from caption_anything.captioner import build_captioner
|
| 18 |
-
from caption_anything.segmenter import build_segmenter
|
| 19 |
from backend.chatbox import build_chatbot_tools, get_new_image_name
|
| 20 |
from segment_anything import sam_model_registry
|
| 21 |
import easyocr
|
|
@@ -82,7 +82,19 @@ try:
|
|
| 82 |
except Exception as e:
|
| 83 |
print(f"Error in building chatbot tools: {e}")
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
def build_caption_anything_with_models(args, api_key="", captioner=None, sam_model=None, ocr_reader=None, text_refiner=None,
|
| 88 |
session_id=None):
|
|
@@ -190,7 +202,6 @@ async def chat_input_callback(*args):
|
|
| 190 |
audio = await texttospeech(read_info,language,gender)
|
| 191 |
return state, state, aux_state, audio,log_state,history
|
| 192 |
|
| 193 |
-
|
| 194 |
|
| 195 |
async def upload_callback(image_input,state, log_state, task_type, openai_api_key=None,language="English",narritive=None,history=None,autoplay=True,session="Session 1"):
|
| 196 |
print("narritive", narritive)
|
|
@@ -221,7 +232,6 @@ async def upload_callback(image_input,state, log_state, task_type, openai_api_ke
|
|
| 221 |
print('upload_callback: add caption to chatGPT memory')
|
| 222 |
new_image_path = get_new_image_name('chat_image', func_name='upload')
|
| 223 |
image_input.save(new_image_path)
|
| 224 |
-
print("img_path",new_image_path)
|
| 225 |
paragraph = get_gpt_response(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
|
| 226 |
if task_type=="task 3":
|
| 227 |
name="Along the River During the Qingming Festival"
|
|
@@ -253,23 +263,23 @@ async def upload_callback(image_input,state, log_state, task_type, openai_api_ke
|
|
| 253 |
gender=gender.lower()
|
| 254 |
|
| 255 |
if language=="English":
|
| 256 |
-
if
|
| 257 |
msg=f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
|
| 258 |
|
| 259 |
-
elif
|
| 260 |
msg=f"🧑🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
|
| 261 |
|
| 262 |
-
elif
|
| 263 |
msg=f"🎨 Hello, Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with relevant insights and thoughts from the perspective of the objects within the painting"
|
| 264 |
|
| 265 |
elif language=="Chinese":
|
| 266 |
-
if
|
| 267 |
msg=f"🤖 你好,我是 EyeSee。让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供相关的信息。"
|
| 268 |
|
| 269 |
-
elif
|
| 270 |
msg=f"🧑🎨 你好,我是{artist}。欢迎探索我的画作《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供我的创作背后的相关见解和想法。"
|
| 271 |
|
| 272 |
-
elif
|
| 273 |
msg=f"🎨 你好,让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会从画面上事物的视角为你提供相关的见解和想法。"
|
| 274 |
|
| 275 |
|
|
@@ -331,8 +341,6 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
| 331 |
out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
|
| 332 |
# state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
|
| 333 |
|
| 334 |
-
|
| 335 |
-
|
| 336 |
if language=="English":
|
| 337 |
if prompt["input_label"][-1]==1:
|
| 338 |
msg="You've added an area at {}. ".format(prompt["input_point"][-1])
|
|
@@ -362,21 +370,6 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
| 362 |
return state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
|
| 363 |
|
| 364 |
|
| 365 |
-
query_focus_en = [
|
| 366 |
-
"Provide a description of the item.",
|
| 367 |
-
"Provide a description and analysis of the item.",
|
| 368 |
-
"Provide a description, analysis, and interpretation of the item.",
|
| 369 |
-
"Evaluate the item."
|
| 370 |
-
]
|
| 371 |
-
|
| 372 |
-
query_focus_zh = [
|
| 373 |
-
"请描述一下这个物体。",
|
| 374 |
-
"请描述和分析一下这个物体。",
|
| 375 |
-
"请描述、分析和解释一下这个物体。",
|
| 376 |
-
"请以艺术鉴赏的角度评价一下这个物体。"
|
| 377 |
-
]
|
| 378 |
-
|
| 379 |
-
|
| 380 |
async def submit_caption(naritive, state,length, sentiment, factuality, language,
|
| 381 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 382 |
autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path, gender,log_state,history):
|
|
@@ -422,7 +415,6 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
|
|
| 422 |
# save history
|
| 423 |
history.append({"role": "user", "content": user_query})
|
| 424 |
history.append({"role": "assistant", "content": focus_info})
|
| 425 |
-
|
| 426 |
|
| 427 |
print("new_cap",focus_info)
|
| 428 |
read_info = re.sub(r'[#[\]!*]','',focus_info)
|
|
@@ -430,7 +422,6 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
|
|
| 430 |
print("read info",read_info)
|
| 431 |
gender="male"
|
| 432 |
|
| 433 |
-
|
| 434 |
try:
|
| 435 |
if autoplay==False:
|
| 436 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,log_state,history
|
|
@@ -598,6 +589,9 @@ def get_recommendationscore(index,score,log_state):
|
|
| 598 |
log_state+=[("%% recommendation %%",None)]
|
| 599 |
return log_state
|
| 600 |
|
|
|
|
|
|
|
|
|
|
| 601 |
|
| 602 |
def toggle_icons_and_update_prompt(point_prompt):
|
| 603 |
new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
|
|
@@ -608,8 +602,7 @@ def toggle_icons_and_update_prompt(point_prompt):
|
|
| 608 |
|
| 609 |
return new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
|
| 610 |
|
| 611 |
-
|
| 612 |
-
minus_icon_path="assets/icons/minus-square.png"
|
| 613 |
|
| 614 |
|
| 615 |
with open('styles.css', 'r') as file:
|
|
|
|
| 11 |
import emoji
|
| 12 |
from langchain_community.chat_models import ChatOpenAI
|
| 13 |
from langchain.schema import HumanMessage
|
| 14 |
+
from backend.caption_anything.model import CaptionAnything
|
| 15 |
+
from backend.caption_anything.utils.utils import mask_painter, seg_model_map, prepare_segmenter, image_resize
|
| 16 |
+
from backend.caption_anything.utils.parser import parse_augment
|
| 17 |
+
from backend.caption_anything.captioner import build_captioner
|
| 18 |
+
from backend.caption_anything.segmenter import build_segmenter
|
| 19 |
from backend.chatbox import build_chatbot_tools, get_new_image_name
|
| 20 |
from segment_anything import sam_model_registry
|
| 21 |
import easyocr
|
|
|
|
| 82 |
except Exception as e:
|
| 83 |
print(f"Error in building chatbot tools: {e}")
|
| 84 |
|
| 85 |
+
query_focus_en = [
|
| 86 |
+
"Provide a description of the item.",
|
| 87 |
+
"Provide a description and analysis of the item.",
|
| 88 |
+
"Provide a description, analysis, and interpretation of the item.",
|
| 89 |
+
"Evaluate the item."
|
| 90 |
+
]
|
| 91 |
|
| 92 |
+
query_focus_zh = [
|
| 93 |
+
"请描述一下这个物体。",
|
| 94 |
+
"请描述和分析一下这个物体。",
|
| 95 |
+
"请描述、分析和解释一下这个物体。",
|
| 96 |
+
"请以艺术鉴赏的角度评价一下这个物体。"
|
| 97 |
+
]
|
| 98 |
|
| 99 |
def build_caption_anything_with_models(args, api_key="", captioner=None, sam_model=None, ocr_reader=None, text_refiner=None,
|
| 100 |
session_id=None):
|
|
|
|
| 202 |
audio = await texttospeech(read_info,language,gender)
|
| 203 |
return state, state, aux_state, audio,log_state,history
|
| 204 |
|
|
|
|
| 205 |
|
| 206 |
async def upload_callback(image_input,state, log_state, task_type, openai_api_key=None,language="English",narritive=None,history=None,autoplay=True,session="Session 1"):
|
| 207 |
print("narritive", narritive)
|
|
|
|
| 232 |
print('upload_callback: add caption to chatGPT memory')
|
| 233 |
new_image_path = get_new_image_name('chat_image', func_name='upload')
|
| 234 |
image_input.save(new_image_path)
|
|
|
|
| 235 |
paragraph = get_gpt_response(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
|
| 236 |
if task_type=="task 3":
|
| 237 |
name="Along the River During the Qingming Festival"
|
|
|
|
| 263 |
gender=gender.lower()
|
| 264 |
|
| 265 |
if language=="English":
|
| 266 |
+
if PromptTemplates.NARRATIVE_MAPPING[narritive]==0 :
|
| 267 |
msg=f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
|
| 268 |
|
| 269 |
+
elif PromptTemplates.NARRATIVE_MAPPING[narritive]==1:
|
| 270 |
msg=f"🧑🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
|
| 271 |
|
| 272 |
+
elif PromptTemplates.NARRATIVE_MAPPING[narritive]==2:
|
| 273 |
msg=f"🎨 Hello, Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with relevant insights and thoughts from the perspective of the objects within the painting"
|
| 274 |
|
| 275 |
elif language=="Chinese":
|
| 276 |
+
if PromptTemplates.NARRATIVE_MAPPING[narritive]==0:
|
| 277 |
msg=f"🤖 你好,我是 EyeSee。让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供相关的信息。"
|
| 278 |
|
| 279 |
+
elif PromptTemplates.NARRATIVE_MAPPING[narritive]==1:
|
| 280 |
msg=f"🧑🎨 你好,我是{artist}。欢迎探索我的画作《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会为你提供我的创作背后的相关见解和想法。"
|
| 281 |
|
| 282 |
+
elif PromptTemplates.NARRATIVE_MAPPING[narritive]==2:
|
| 283 |
msg=f"🎨 你好,让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域,并选择四种信息类型之一:描述、分析、解读和评判。根据你的选择,我会从画面上事物的视角为你提供相关的见解和想法。"
|
| 284 |
|
| 285 |
|
|
|
|
| 341 |
out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
|
| 342 |
# state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
|
| 343 |
|
|
|
|
|
|
|
| 344 |
if language=="English":
|
| 345 |
if prompt["input_label"][-1]==1:
|
| 346 |
msg="You've added an area at {}. ".format(prompt["input_point"][-1])
|
|
|
|
| 370 |
return state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
|
| 371 |
|
| 372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
async def submit_caption(naritive, state,length, sentiment, factuality, language,
|
| 374 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 375 |
autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path, gender,log_state,history):
|
|
|
|
| 415 |
# save history
|
| 416 |
history.append({"role": "user", "content": user_query})
|
| 417 |
history.append({"role": "assistant", "content": focus_info})
|
|
|
|
| 418 |
|
| 419 |
print("new_cap",focus_info)
|
| 420 |
read_info = re.sub(r'[#[\]!*]','',focus_info)
|
|
|
|
| 422 |
print("read info",read_info)
|
| 423 |
gender="male"
|
| 424 |
|
|
|
|
| 425 |
try:
|
| 426 |
if autoplay==False:
|
| 427 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,log_state,history
|
|
|
|
| 589 |
log_state+=[("%% recommendation %%",None)]
|
| 590 |
return log_state
|
| 591 |
|
| 592 |
+
|
| 593 |
+
add_icon_path="assets/icons/plus-square-blue.png"
|
| 594 |
+
minus_icon_path="assets/icons/minus-square.png"
|
| 595 |
|
| 596 |
def toggle_icons_and_update_prompt(point_prompt):
|
| 597 |
new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
|
|
|
|
| 602 |
|
| 603 |
return new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
|
| 604 |
|
| 605 |
+
|
|
|
|
| 606 |
|
| 607 |
|
| 608 |
with open('styles.css', 'r') as file:
|
{caption_anything → backend/caption_anything}/__init__.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/README.md
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/__init__.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/base_captioner.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/blip.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/blip2.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/git.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/modeling_blip.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/modeling_git.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/captioner/vit_pixel_masks_utils.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/model.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/segmenter/__init__.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/segmenter/base_segmenter.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/segmenter/readme.md
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/text_refiner/README.md
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/text_refiner/__init__.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/text_refiner/text_refiner.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/utils/chatbot.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/utils/densecap_painter.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/utils/image_editing_utils.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/utils/parser.py
RENAMED
|
File without changes
|
{caption_anything → backend/caption_anything}/utils/utils.py
RENAMED
|
File without changes
|