EyeSee_chi

Running

App Files Files Community

hyzhang00 commited on Feb 3

Commit

c343f53

1 Parent(s): f18fd81

update on code

Browse files

Files changed (23) hide show

app.py +27 -34
{caption_anything → backend/caption_anything}/__init__.py +0 -0
{caption_anything → backend/caption_anything}/captioner/README.md +0 -0
{caption_anything → backend/caption_anything}/captioner/__init__.py +0 -0
{caption_anything → backend/caption_anything}/captioner/base_captioner.py +0 -0
{caption_anything → backend/caption_anything}/captioner/blip.py +0 -0
{caption_anything → backend/caption_anything}/captioner/blip2.py +0 -0
{caption_anything → backend/caption_anything}/captioner/git.py +0 -0
{caption_anything → backend/caption_anything}/captioner/modeling_blip.py +0 -0
{caption_anything → backend/caption_anything}/captioner/modeling_git.py +0 -0
{caption_anything → backend/caption_anything}/captioner/vit_pixel_masks_utils.py +0 -0
{caption_anything → backend/caption_anything}/model.py +0 -0
{caption_anything → backend/caption_anything}/segmenter/__init__.py +0 -0
{caption_anything → backend/caption_anything}/segmenter/base_segmenter.py +0 -0
{caption_anything → backend/caption_anything}/segmenter/readme.md +0 -0
{caption_anything → backend/caption_anything}/text_refiner/README.md +0 -0
{caption_anything → backend/caption_anything}/text_refiner/__init__.py +0 -0
{caption_anything → backend/caption_anything}/text_refiner/text_refiner.py +0 -0
{caption_anything → backend/caption_anything}/utils/chatbot.py +0 -0
{caption_anything → backend/caption_anything}/utils/densecap_painter.py +0 -0
{caption_anything → backend/caption_anything}/utils/image_editing_utils.py +0 -0
{caption_anything → backend/caption_anything}/utils/parser.py +0 -0
{caption_anything → backend/caption_anything}/utils/utils.py +0 -0

app.py CHANGED Viewed

@@ -11,11 +11,11 @@ from PIL import Image
 import emoji
 from langchain_community.chat_models import ChatOpenAI
 from langchain.schema import HumanMessage
-from caption_anything.model import CaptionAnything
-from caption_anything.utils.utils import mask_painter, seg_model_map, prepare_segmenter, image_resize
-from caption_anything.utils.parser import parse_augment
-from caption_anything.captioner import build_captioner
-from caption_anything.segmenter import build_segmenter
 from backend.chatbox import build_chatbot_tools, get_new_image_name
 from segment_anything import sam_model_registry
 import easyocr
@@ -82,7 +82,19 @@ try:
 except Exception as e:
     print(f"Error in building chatbot tools: {e}")
 def build_caption_anything_with_models(args, api_key="", captioner=None, sam_model=None, ocr_reader=None, text_refiner=None,
                                        session_id=None):
@@ -190,7 +202,6 @@ async def chat_input_callback(*args):
         audio = await texttospeech(read_info,language,gender)
         return state, state, aux_state, audio,log_state,history
 async def upload_callback(image_input,state, log_state, task_type, openai_api_key=None,language="English",narritive=None,history=None,autoplay=True,session="Session 1"):
     print("narritive", narritive)
@@ -221,7 +232,6 @@ async def upload_callback(image_input,state, log_state, task_type, openai_api_ke
     print('upload_callback: add caption to chatGPT memory')
     new_image_path = get_new_image_name('chat_image', func_name='upload')
     image_input.save(new_image_path)
-    print("img_path",new_image_path)
     paragraph = get_gpt_response(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
     if task_type=="task 3":
         name="Along the River During the Qingming Festival"
@@ -253,23 +263,23 @@ async def upload_callback(image_input,state, log_state, task_type, openai_api_ke
         gender=gender.lower()
     if language=="English":
-        if naritive_mapping[narritive]==0 :
             msg=f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
-        elif naritive_mapping[narritive]==1:
             msg=f"🧑‍🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
-        elif naritive_mapping[narritive]==2:
             msg=f"🎨 Hello, Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with relevant insights and thoughts from the perspective of the objects within the painting"
     elif language=="Chinese":
-        if naritive_mapping[narritive]==0:
             msg=f"🤖 你好，我是 EyeSee。让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域，并选择四种信息类型之一：描述、分析、解读和评判。根据你的选择，我会为你提供相关的信息。"
-        elif naritive_mapping[narritive]==1:
             msg=f"🧑‍🎨 你好，我是{artist}。欢迎探索我的画作《{name}》。你可以点击你感兴趣的区域，并选择四种信息类型之一：描述、分析、解读和评判。根据你的选择，我会为你提供我的创作背后的相关见解和想法。"
-        elif naritive_mapping[narritive]==2:
             msg=f"🎨 你好，让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域，并选择四种信息类型之一：描述、分析、解读和评判。根据你的选择，我会从画面上事物的视角为你提供相关的见解和想法。"
@@ -331,8 +341,6 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
     out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
     # state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
     if language=="English":
         if prompt["input_label"][-1]==1:
             msg="You've added an area at {}. ".format(prompt["input_point"][-1])
@@ -362,21 +370,6 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
     return state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
-query_focus_en = [
-    "Provide a description of the item.",
-    "Provide a description and analysis of the item.",
-    "Provide a description, analysis, and interpretation of the item.",
-    "Evaluate the item."
-]
-query_focus_zh = [
-    "请描述一下这个物体。",
-    "请描述和分析一下这个物体。",
-    "请描述、分析和解释一下这个物体。",
-    "请以艺术鉴赏的角度评价一下这个物体。"
-]
 async def submit_caption(naritive, state,length, sentiment, factuality, language,
                    out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
                    autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path, gender,log_state,history):
@@ -422,7 +415,6 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
         # save history
         history.append({"role": "user", "content": user_query})
         history.append({"role": "assistant", "content": focus_info})
         print("new_cap",focus_info)
         read_info = re.sub(r'[#[\]!*]','',focus_info)
@@ -430,7 +422,6 @@ async def submit_caption(naritive, state,length, sentiment, factuality, language
         print("read info",read_info)
         gender="male"
         try:
             if autoplay==False:
                 return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,log_state,history
@@ -598,6 +589,9 @@ def get_recommendationscore(index,score,log_state):
     log_state+=[("%% recommendation %%",None)]
     return log_state
 def toggle_icons_and_update_prompt(point_prompt):
     new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
@@ -608,8 +602,7 @@ def toggle_icons_and_update_prompt(point_prompt):
     return new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
-add_icon_path="assets/icons/plus-square-blue.png"
-minus_icon_path="assets/icons/minus-square.png"
 with open('styles.css', 'r') as file:

 import emoji
 from langchain_community.chat_models import ChatOpenAI
 from langchain.schema import HumanMessage
+from backend.caption_anything.model import CaptionAnything
+from backend.caption_anything.utils.utils import mask_painter, seg_model_map, prepare_segmenter, image_resize
+from backend.caption_anything.utils.parser import parse_augment
+from backend.caption_anything.captioner import build_captioner
+from backend.caption_anything.segmenter import build_segmenter
 from backend.chatbox import build_chatbot_tools, get_new_image_name
 from segment_anything import sam_model_registry
 import easyocr
 except Exception as e:
     print(f"Error in building chatbot tools: {e}")
+query_focus_en = [
+    "Provide a description of the item.",
+    "Provide a description and analysis of the item.",
+    "Provide a description, analysis, and interpretation of the item.",
+    "Evaluate the item."
+]
+query_focus_zh = [
+    "请描述一下这个物体。",
+    "请描述和分析一下这个物体。",
+    "请描述、分析和解释一下这个物体。",
+    "请以艺术鉴赏的角度评价一下这个物体。"
+]
 def build_caption_anything_with_models(args, api_key="", captioner=None, sam_model=None, ocr_reader=None, text_refiner=None,
                                        session_id=None):
         audio = await texttospeech(read_info,language,gender)
         return state, state, aux_state, audio,log_state,history
 async def upload_callback(image_input,state, log_state, task_type, openai_api_key=None,language="English",narritive=None,history=None,autoplay=True,session="Session 1"):
     print("narritive", narritive)
     print('upload_callback: add caption to chatGPT memory')
     new_image_path = get_new_image_name('chat_image', func_name='upload')
     image_input.save(new_image_path)
     paragraph = get_gpt_response(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
     if task_type=="task 3":
         name="Along the River During the Qingming Festival"
         gender=gender.lower()
     if language=="English":
+        if PromptTemplates.NARRATIVE_MAPPING[narritive]==0 :
             msg=f"🤖 Hi, I am EyeSee. Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant information."
+        elif PromptTemplates.NARRATIVE_MAPPING[narritive]==1:
             msg=f"🧑‍🎨 Hello, I am the {artist}. Welcome to explore my painting, '{name}'. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with the relevant insights and thoughts behind my creation."
+        elif PromptTemplates.NARRATIVE_MAPPING[narritive]==2:
             msg=f"🎨 Hello, Let's explore this painting '{name}' together. You can click on the area you're interested in and choose from four types of information: Description, Analysis, Interpretation, and Judgment. Based on your selection, I will provide you with relevant insights and thoughts from the perspective of the objects within the painting"
     elif language=="Chinese":
+        if PromptTemplates.NARRATIVE_MAPPING[narritive]==0:
             msg=f"🤖 你好，我是 EyeSee。让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域，并选择四种信息类型之一：描述、分析、解读和评判。根据你的选择，我会为你提供相关的信息。"
+        elif PromptTemplates.NARRATIVE_MAPPING[narritive]==1:
             msg=f"🧑‍🎨 你好，我是{artist}。欢迎探索我的画作《{name}》。你可以点击你感兴趣的区域，并选择四种信息类型之一：描述、分析、解读和评判。根据你的选择，我会为你提供我的创作背后的相关见解和想法。"
+        elif PromptTemplates.NARRATIVE_MAPPING[narritive]==2:
             msg=f"🎨 你好，让我们一起探索这幅画《{name}》。你可以点击你感兴趣的区域，并选择四种信息类型之一：描述、分析、解读和评判。根据你的选择，我会从画面上事物的视角为你提供相关的见解和想法。"
     out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
     # state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
     if language=="English":
         if prompt["input_label"][-1]==1:
             msg="You've added an area at {}. ".format(prompt["input_point"][-1])
     return state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
 async def submit_caption(naritive, state,length, sentiment, factuality, language,
                    out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
                    autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path, gender,log_state,history):
         # save history
         history.append({"role": "user", "content": user_query})
         history.append({"role": "assistant", "content": focus_info})
         print("new_cap",focus_info)
         read_info = re.sub(r'[#[\]!*]','',focus_info)
         print("read info",read_info)
         gender="male"
         try:
             if autoplay==False:
                 return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None,log_state,history
     log_state+=[("%% recommendation %%",None)]
     return log_state
+add_icon_path="assets/icons/plus-square-blue.png"
+minus_icon_path="assets/icons/minus-square.png"
 def toggle_icons_and_update_prompt(point_prompt):
     new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
     return new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
 with open('styles.css', 'r') as file:

{caption_anything → backend/caption_anything}/__init__.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/README.md RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/__init__.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/base_captioner.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/blip.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/blip2.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/git.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/modeling_blip.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/modeling_git.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/captioner/vit_pixel_masks_utils.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/model.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/segmenter/__init__.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/segmenter/base_segmenter.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/segmenter/readme.md RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/text_refiner/README.md RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/text_refiner/__init__.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/text_refiner/text_refiner.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/utils/chatbot.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/utils/densecap_painter.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/utils/image_editing_utils.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/utils/parser.py RENAMED Viewed

File without changes

{caption_anything → backend/caption_anything}/utils/utils.py RENAMED Viewed

File without changes