Spaces:
Sleeping
Sleeping
Niki Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,7 +28,6 @@ import re
|
|
| 28 |
import edge_tts
|
| 29 |
from langchain import __version__
|
| 30 |
import torch
|
| 31 |
-
import gradio as gr
|
| 32 |
from transformers import AutoProcessor, SiglipModel
|
| 33 |
import faiss
|
| 34 |
from huggingface_hub import hf_hub_download
|
|
@@ -38,6 +37,8 @@ import requests
|
|
| 38 |
import spaces
|
| 39 |
# Print the current version of LangChain
|
| 40 |
print(f"Current LangChain version: {__version__}")
|
|
|
|
|
|
|
| 41 |
# import tts
|
| 42 |
|
| 43 |
###############################################################################
|
|
@@ -46,9 +47,9 @@ print(f"Current LangChain version: {__version__}")
|
|
| 46 |
|
| 47 |
|
| 48 |
# import spaces #
|
| 49 |
-
import threading
|
| 50 |
|
| 51 |
-
lock = threading.Lock()
|
| 52 |
import os
|
| 53 |
# import uuid
|
| 54 |
# from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
|
|
@@ -94,220 +95,220 @@ from huggingface_hub import hf_hub_download
|
|
| 94 |
|
| 95 |
|
| 96 |
|
| 97 |
-
def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
def images_to_video(images, output_path, fps=30):
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
###############################################################################
|
| 128 |
-
# Configuration.
|
| 129 |
-
###############################################################################
|
| 130 |
-
|
| 131 |
-
import shutil
|
| 132 |
-
|
| 133 |
-
def find_cuda():
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
cuda_path = find_cuda()
|
| 151 |
-
|
| 152 |
-
if cuda_path:
|
| 153 |
-
|
| 154 |
-
else:
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
config_path = 'configs/instant-nerf-base.yaml'
|
| 158 |
-
config = OmegaConf.load(config_path)
|
| 159 |
-
config_name = os.path.basename(config_path).replace('.yaml', '')
|
| 160 |
-
model_config = config.model_config
|
| 161 |
-
infer_config = config.infer_config
|
| 162 |
-
|
| 163 |
-
IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
|
| 164 |
-
|
| 165 |
-
device = torch.device('cuda')
|
| 166 |
-
|
| 167 |
-
# load diffusion model
|
| 168 |
-
print('Loading diffusion model ...')
|
| 169 |
-
pipeline = DiffusionPipeline.from_pretrained(
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
)
|
| 174 |
-
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
| 175 |
-
|
| 176 |
-
)
|
| 177 |
-
|
| 178 |
-
# load custom white-background UNet
|
| 179 |
-
unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
|
| 180 |
-
state_dict = torch.load(unet_ckpt_path, map_location='cpu')
|
| 181 |
-
pipeline.unet.load_state_dict(state_dict, strict=True)
|
| 182 |
-
|
| 183 |
-
pipeline = pipeline.to(device)
|
| 184 |
-
|
| 185 |
-
# load reconstruction model
|
| 186 |
-
print('Loading reconstruction model ...')
|
| 187 |
-
model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_nerf_base.ckpt", repo_type="model")
|
| 188 |
-
model0 = instantiate_from_config(model_config)
|
| 189 |
-
state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
|
| 190 |
-
state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
|
| 191 |
-
model0.load_state_dict(state_dict, strict=True)
|
| 192 |
-
|
| 193 |
-
model0 = model0.to(device)
|
| 194 |
-
|
| 195 |
-
print('Loading Finished!')
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
def check_input_image(input_image):
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
|
| 206 |
-
def preprocess(input_image, do_remove_background):
|
| 207 |
|
| 208 |
-
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
|
| 214 |
-
|
| 215 |
|
| 216 |
|
| 217 |
-
# @spaces.GPU
|
| 218 |
-
def generate_mvs(input_image, sample_steps, sample_seed):
|
| 219 |
|
| 220 |
-
|
| 221 |
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
|
| 234 |
-
|
| 235 |
|
| 236 |
|
| 237 |
-
# @spaces.GPU
|
| 238 |
-
def make3d(images):
|
| 239 |
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
|
| 249 |
-
|
| 250 |
-
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
|
| 305 |
-
|
| 306 |
-
|
| 307 |
|
| 308 |
-
|
| 309 |
|
| 310 |
-
|
| 311 |
|
| 312 |
|
| 313 |
###############################################################################
|
|
@@ -471,10 +472,13 @@ examples = [
|
|
| 471 |
|
| 472 |
css = """
|
| 473 |
#warning {background-color: #FFCCCB}
|
| 474 |
-
.
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
|
|
|
|
|
|
|
|
|
| 478 |
"""
|
| 479 |
filtered_language_dict = {
|
| 480 |
'English': 'en-US-JennyNeural',
|
|
@@ -487,10 +491,10 @@ filtered_language_dict = {
|
|
| 487 |
}
|
| 488 |
|
| 489 |
focus_map = {
|
| 490 |
-
"
|
| 491 |
-
"
|
| 492 |
-
"
|
| 493 |
-
"
|
| 494 |
}
|
| 495 |
|
| 496 |
'''
|
|
@@ -616,17 +620,17 @@ def init_openai_api_key(api_key=""):
|
|
| 616 |
global gpt_state
|
| 617 |
gpt_state=1
|
| 618 |
# return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
|
| 619 |
-
return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]
|
| 620 |
else:
|
| 621 |
gpt_state=0
|
| 622 |
# return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
|
| 623 |
-
return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]
|
| 624 |
|
| 625 |
def init_wo_openai_api_key():
|
| 626 |
global gpt_state
|
| 627 |
gpt_state=0
|
| 628 |
# return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*3
|
| 629 |
-
return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]
|
| 630 |
|
| 631 |
def get_click_prompt(chat_input, click_state, click_mode):
|
| 632 |
inputs = json.loads(chat_input)
|
|
@@ -666,15 +670,17 @@ def update_click_state(click_state, caption, click_mode):
|
|
| 666 |
|
| 667 |
async def chat_input_callback(*args):
|
| 668 |
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay = args
|
|
|
|
| 669 |
if visual_chatgpt is not None:
|
| 670 |
-
state, _, aux_state, _ = visual_chatgpt.run_text(
|
| 671 |
last_text, last_response = state[-1]
|
| 672 |
print("last response",last_response)
|
| 673 |
-
if autoplay:
|
| 674 |
-
|
|
|
|
| 675 |
else:
|
| 676 |
-
audio=
|
| 677 |
-
|
| 678 |
else:
|
| 679 |
response = "Text refiner is not initilzed, please input openai api key."
|
| 680 |
state = state + [(chat_input, response)]
|
|
@@ -722,9 +728,9 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
| 722 |
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
| 723 |
print("memory",visual_chatgpt.agent.memory)
|
| 724 |
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
| 725 |
-
parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and
|
| 726 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
| 727 |
-
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["
|
| 728 |
# artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
| 729 |
|
| 730 |
|
|
@@ -736,7 +742,7 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
| 736 |
]
|
| 737 |
|
| 738 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
| 739 |
-
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"
|
| 740 |
|
| 741 |
|
| 742 |
|
|
@@ -774,7 +780,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
| 774 |
|
| 775 |
enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
|
| 776 |
out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
|
| 777 |
-
|
|
|
|
| 778 |
state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], prompt["input_label"]), None)]
|
| 779 |
update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
|
| 780 |
text = out['generated_captions']['raw_caption']
|
|
@@ -798,13 +805,11 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
| 798 |
|
| 799 |
print("new crop save",new_crop_save_path)
|
| 800 |
|
| 801 |
-
yield state, state, click_state, image_input_nobackground,
|
| 802 |
|
| 803 |
|
| 804 |
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
async def submit_caption(state, text_refiner, length, sentiment, factuality, language,
|
| 808 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 809 |
autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
| 810 |
print("state",state)
|
|
@@ -846,6 +851,9 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
|
|
| 846 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
| 847 |
# input_points=input_points, input_labels=input_labels)
|
| 848 |
try:
|
|
|
|
|
|
|
|
|
|
| 849 |
audio_output = await texttospeech(read_info, language, autoplay)
|
| 850 |
print("done")
|
| 851 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
|
@@ -858,16 +866,11 @@ async def submit_caption(state, text_refiner, length, sentiment, factuality, lan
|
|
| 858 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
| 859 |
|
| 860 |
else:
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
| 866 |
|
| 867 |
-
except Exception as e:
|
| 868 |
-
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
| 869 |
-
print(f"Error during TTS prediction: {str(e)}")
|
| 870 |
-
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
| 871 |
|
| 872 |
def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language):
|
| 873 |
|
|
@@ -1069,7 +1072,7 @@ async def inference_traject(origin_image,sketcher_image, enable_wiki, language,
|
|
| 1069 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
| 1070 |
print(f"Error during TTS prediction: {str(e)}")
|
| 1071 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
| 1072 |
-
return state, state, image_input,audio_output,crop_save_path
|
| 1073 |
|
| 1074 |
|
| 1075 |
else:
|
|
@@ -1222,58 +1225,58 @@ def cap_everything_withoutsound(image_input, visual_chatgpt, text_refiner,paragr
|
|
| 1222 |
# return state,dislike_res
|
| 1223 |
|
| 1224 |
|
| 1225 |
-
def get_style():
|
| 1226 |
-
|
| 1227 |
-
|
| 1228 |
-
|
| 1229 |
-
|
| 1230 |
-
|
| 1231 |
-
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
-
|
| 1238 |
-
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
|
| 1260 |
-
|
| 1261 |
-
|
| 1262 |
-
|
| 1263 |
-
|
| 1264 |
-
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
|
| 1269 |
-
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
|
| 1276 |
-
|
| 1277 |
|
| 1278 |
# def handle_like_dislike(like_data, like_state, dislike_state):
|
| 1279 |
# if like_data.liked:
|
|
@@ -1323,9 +1326,21 @@ def print_like_dislike(x: gr.LikeData,like_res,dislike_res,state):
|
|
| 1323 |
dislike_res.append(x.value)
|
| 1324 |
state = state + [(None, f"Disliked Received 👎")]
|
| 1325 |
return like_res,dislike_res,state
|
| 1326 |
-
|
| 1327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1329 |
|
| 1330 |
def create_ui():
|
| 1331 |
title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
|
|
@@ -1345,7 +1360,7 @@ def create_ui():
|
|
| 1345 |
]
|
| 1346 |
|
| 1347 |
with gr.Blocks(
|
| 1348 |
-
css=
|
| 1349 |
theme=gr.themes.Base()
|
| 1350 |
) as iface:
|
| 1351 |
state = gr.State([])
|
|
@@ -1370,6 +1385,7 @@ def create_ui():
|
|
| 1370 |
dislike_res=gr.State([])
|
| 1371 |
gr.Markdown(title)
|
| 1372 |
gr.Markdown(description)
|
|
|
|
| 1373 |
# with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
|
| 1374 |
# with gr.Column(scale=0.5):
|
| 1375 |
# # gr.Markdown("Left side content")
|
|
@@ -1392,9 +1408,9 @@ def create_ui():
|
|
| 1392 |
value="English", label="Language", interactive=True, elem_classes="custom-language"
|
| 1393 |
)
|
| 1394 |
length = gr.Slider(
|
| 1395 |
-
minimum=
|
| 1396 |
-
maximum=
|
| 1397 |
-
value=
|
| 1398 |
step=1,
|
| 1399 |
interactive=True,
|
| 1400 |
label="Generated Caption Length",
|
|
@@ -1416,8 +1432,7 @@ def create_ui():
|
|
| 1416 |
# auto_play = gr.Checkbox(label="Check to autoplay audio", value=False,scale=0.4)
|
| 1417 |
# output_audio = gr.HTML(label="Synthesised Audio",scale=0.6)
|
| 1418 |
|
| 1419 |
-
with gr.Row():
|
| 1420 |
-
|
| 1421 |
with gr.Column(scale=6):
|
| 1422 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
| 1423 |
with gr.Tab("Base(GPT Power)") as base_tab:
|
|
@@ -1426,7 +1441,7 @@ def create_ui():
|
|
| 1426 |
name_label_base = gr.Button(value="Name: ")
|
| 1427 |
artist_label_base = gr.Button(value="Artist: ")
|
| 1428 |
year_label_base = gr.Button(value="Year: ")
|
| 1429 |
-
material_label_base = gr.Button(value="
|
| 1430 |
|
| 1431 |
with gr.Tab("Base2") as base_tab2:
|
| 1432 |
image_input_base_2 = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
|
@@ -1434,52 +1449,55 @@ def create_ui():
|
|
| 1434 |
name_label_base2 = gr.Button(value="Name: ")
|
| 1435 |
artist_label_base2 = gr.Button(value="Artist: ")
|
| 1436 |
year_label_base2 = gr.Button(value="Year: ")
|
| 1437 |
-
material_label_base2 = gr.Button(value="
|
| 1438 |
|
| 1439 |
with gr.Tab("Click") as click_tab:
|
| 1440 |
-
image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
| 1441 |
-
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
| 1442 |
-
# example_image_click = gr.Image(type="pil", interactive=False, visible=False)
|
| 1443 |
-
with gr.Row():
|
| 1444 |
-
name_label = gr.Button(value="Name: ")
|
| 1445 |
-
artist_label = gr.Button(value="Artist: ")
|
| 1446 |
-
year_label = gr.Button(value="Year: ")
|
| 1447 |
-
material_label = gr.Button(value="Material: ")
|
| 1448 |
with gr.Row():
|
| 1449 |
-
with gr.Column():
|
|
|
|
|
|
|
| 1450 |
with gr.Row():
|
| 1451 |
-
|
| 1452 |
-
|
| 1453 |
-
|
| 1454 |
-
|
| 1455 |
-
|
| 1456 |
-
|
| 1457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1458 |
with gr.Row():
|
| 1459 |
-
point_prompt = gr.Radio(
|
| 1460 |
-
|
| 1461 |
-
|
| 1462 |
-
|
| 1463 |
-
|
| 1464 |
-
|
| 1465 |
click_mode = gr.Radio(
|
| 1466 |
choices=["Continuous", "Single"],
|
| 1467 |
value="Continuous",
|
| 1468 |
label="Clicking Mode",
|
| 1469 |
scale=5,
|
| 1470 |
interactive=True)
|
| 1471 |
-
|
| 1472 |
-
|
| 1473 |
-
|
| 1474 |
-
with gr.Row():
|
| 1475 |
-
clear_button_click = gr.Button(value="Clear Clicks", interactive=True,scale=2)
|
| 1476 |
-
clear_button_image = gr.Button(value="Clear Image", interactive=True,scale=2)
|
| 1477 |
-
|
| 1478 |
-
with gr.Tab("Trajectory (beta)") as traj_tab:
|
| 1479 |
# sketcher_input = ImageSketcher(type="pil", interactive=True, brush_radius=10,
|
| 1480 |
# elem_id="image_sketcher")
|
| 1481 |
-
sketcher_input = gr.ImageEditor(type="pil", interactive=True
|
| 1482 |
-
|
| 1483 |
with gr.Row():
|
| 1484 |
name_label_traj = gr.Button(value="Name: ")
|
| 1485 |
artist_label_traj = gr.Button(value="Artist: ")
|
|
@@ -1489,28 +1507,16 @@ def create_ui():
|
|
| 1489 |
with gr.Row():
|
| 1490 |
clear_button_sketcher = gr.Button(value="Clear Sketch", interactive=True)
|
| 1491 |
submit_button_sketcher = gr.Button(value="Submit", interactive=True)
|
| 1492 |
-
with gr.Row():
|
| 1493 |
-
with gr.Row():
|
| 1494 |
-
focus_type_sketch = gr.Radio(
|
| 1495 |
-
choices=["CFV-D", "CFV-DA", "CFV-DAI","PFV-DDA"],
|
| 1496 |
-
value="CFV-D",
|
| 1497 |
-
label="Information Type",
|
| 1498 |
-
interactive=True)
|
| 1499 |
-
Input_sketch = gr.Radio(
|
| 1500 |
-
choices=["Trace+Seg", "Trace"],
|
| 1501 |
-
value="Trace",
|
| 1502 |
-
label="Trace Type",
|
| 1503 |
-
interactive=True)
|
| 1504 |
|
| 1505 |
with gr.Column(visible=False,scale=4) as modules_need_gpt1:
|
| 1506 |
-
with gr.Row():
|
| 1507 |
sentiment = gr.Radio(
|
| 1508 |
choices=["Positive", "Natural", "Negative"],
|
| 1509 |
value="Natural",
|
| 1510 |
label="Sentiment",
|
| 1511 |
interactive=True,
|
| 1512 |
)
|
| 1513 |
-
|
| 1514 |
factuality = gr.Radio(
|
| 1515 |
choices=["Factual", "Imagination"],
|
| 1516 |
value="Factual",
|
|
@@ -1531,6 +1537,8 @@ def create_ui():
|
|
| 1531 |
value="No",
|
| 1532 |
label="Expert",
|
| 1533 |
interactive=True)
|
|
|
|
|
|
|
| 1534 |
with gr.Column(visible=True) as modules_not_need_gpt3:
|
| 1535 |
gr.Examples(
|
| 1536 |
examples=examples,
|
|
@@ -1541,7 +1549,7 @@ def create_ui():
|
|
| 1541 |
|
| 1542 |
|
| 1543 |
|
| 1544 |
-
with gr.Column(scale=
|
| 1545 |
with gr.Column(visible=True) as module_key_input:
|
| 1546 |
openai_api_key = gr.Textbox(
|
| 1547 |
placeholder="Input openAI API key",
|
|
@@ -1563,20 +1571,16 @@ def create_ui():
|
|
| 1563 |
|
| 1564 |
with gr.Column(visible=False) as modules_not_need_gpt2:
|
| 1565 |
with gr.Blocks():
|
| 1566 |
-
chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=600)
|
| 1567 |
with gr.Column(visible=False) as modules_need_gpt3:
|
| 1568 |
-
chat_input = gr.
|
| 1569 |
with gr.Row():
|
| 1570 |
-
clear_button_text = gr.Button(value="Clear
|
| 1571 |
-
|
|
|
|
| 1572 |
# upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
|
| 1573 |
# downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
|
| 1574 |
-
|
| 1575 |
-
|
| 1576 |
-
with gr.Row():
|
| 1577 |
-
export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
|
| 1578 |
-
with gr.Row():
|
| 1579 |
-
chat_log_file = gr.File(label="Download Chat Log")
|
| 1580 |
|
| 1581 |
# TTS interface hidden initially
|
| 1582 |
with gr.Column(visible=False) as tts_interface:
|
|
@@ -1689,6 +1693,15 @@ def create_ui():
|
|
| 1689 |
# show_share_button=True,
|
| 1690 |
# show_download_button=True
|
| 1691 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1692 |
|
| 1693 |
|
| 1694 |
|
|
@@ -1727,7 +1740,7 @@ def create_ui():
|
|
| 1727 |
# outputs=[result, seed],
|
| 1728 |
# api_name="run",
|
| 1729 |
# )
|
| 1730 |
-
|
| 1731 |
fn=infer,
|
| 1732 |
inputs=[new_crop_save_path],
|
| 1733 |
outputs=[result]
|
|
@@ -1742,106 +1755,106 @@ def create_ui():
|
|
| 1742 |
# this part is for 3d generate.
|
| 1743 |
###############################################################################
|
| 1744 |
|
| 1745 |
-
with gr.Row(variant="panel",visible=False) as d3_model:
|
| 1746 |
-
|
| 1747 |
-
|
| 1748 |
-
|
| 1749 |
-
|
| 1750 |
-
|
| 1751 |
-
|
| 1752 |
-
|
| 1753 |
-
|
| 1754 |
-
|
| 1755 |
-
|
| 1756 |
-
|
| 1757 |
-
|
| 1758 |
-
|
| 1759 |
-
|
| 1760 |
-
|
| 1761 |
-
|
| 1762 |
-
|
| 1763 |
-
|
| 1764 |
-
|
| 1765 |
-
|
| 1766 |
-
|
| 1767 |
-
|
| 1768 |
-
|
| 1769 |
-
|
| 1770 |
-
|
| 1771 |
|
| 1772 |
-
|
| 1773 |
-
|
| 1774 |
-
|
| 1775 |
-
|
| 1776 |
-
|
| 1777 |
-
|
| 1778 |
-
|
| 1779 |
|
| 1780 |
-
|
| 1781 |
-
|
| 1782 |
|
| 1783 |
-
|
| 1784 |
-
|
| 1785 |
-
|
| 1786 |
-
|
| 1787 |
-
|
| 1788 |
-
|
| 1789 |
-
|
| 1790 |
-
|
| 1791 |
-
|
| 1792 |
-
|
| 1793 |
-
|
| 1794 |
-
|
| 1795 |
|
| 1796 |
-
|
| 1797 |
|
| 1798 |
-
|
| 1799 |
-
|
| 1800 |
-
|
| 1801 |
-
|
| 1802 |
-
|
| 1803 |
-
|
| 1804 |
-
|
| 1805 |
|
| 1806 |
-
|
| 1807 |
-
|
| 1808 |
-
|
| 1809 |
-
|
| 1810 |
-
|
| 1811 |
-
|
| 1812 |
-
|
| 1813 |
|
| 1814 |
-
|
| 1815 |
-
|
| 1816 |
-
|
| 1817 |
-
|
| 1818 |
-
|
| 1819 |
-
|
| 1820 |
-
|
| 1821 |
-
|
| 1822 |
-
|
| 1823 |
-
|
| 1824 |
-
|
| 1825 |
-
|
| 1826 |
-
|
| 1827 |
|
| 1828 |
|
| 1829 |
|
| 1830 |
|
| 1831 |
-
mv_images = gr.State()
|
| 1832 |
|
| 1833 |
-
chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
|
| 1834 |
|
| 1835 |
-
submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
|
| 1836 |
-
|
| 1837 |
-
|
| 1838 |
-
|
| 1839 |
|
| 1840 |
-
).success(
|
| 1841 |
-
|
| 1842 |
-
|
| 1843 |
-
|
| 1844 |
-
)
|
| 1845 |
|
| 1846 |
###############################################################################
|
| 1847 |
# above part is for 3d generate.
|
|
@@ -1868,13 +1881,13 @@ def create_ui():
|
|
| 1868 |
|
| 1869 |
|
| 1870 |
|
| 1871 |
-
clear_button_sketcher.click(
|
| 1872 |
-
|
| 1873 |
-
|
| 1874 |
-
|
| 1875 |
-
|
| 1876 |
-
|
| 1877 |
-
)
|
| 1878 |
|
| 1879 |
|
| 1880 |
|
|
@@ -1882,11 +1895,11 @@ def create_ui():
|
|
| 1882 |
|
| 1883 |
openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
|
| 1884 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
| 1885 |
-
modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,
|
| 1886 |
enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
|
| 1887 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
| 1888 |
modules_not_need_gpt,
|
| 1889 |
-
modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,
|
| 1890 |
# openai_api_key.submit(init_openai_api_key,
|
| 1891 |
# outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
| 1892 |
# modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
|
|
@@ -1898,7 +1911,7 @@ def create_ui():
|
|
| 1898 |
disable_chatGPT_button.click(init_wo_openai_api_key,
|
| 1899 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
| 1900 |
modules_not_need_gpt,
|
| 1901 |
-
modules_not_need_gpt2, tts_interface,module_key_input, module_notification_box, text_refiner, visual_chatgpt, notification_box,
|
| 1902 |
|
| 1903 |
artist_label_base2.click(
|
| 1904 |
get_artistinfo,
|
|
@@ -1995,23 +2008,23 @@ def create_ui():
|
|
| 1995 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
| 1996 |
paragraph,artist])
|
| 1997 |
|
| 1998 |
-
image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
|
| 1999 |
-
|
| 2000 |
-
|
| 2001 |
-
|
| 2002 |
-
|
| 2003 |
|
| 2004 |
-
image_input.upload(upload_callback, [image_input, state, visual_chatgpt,openai_api_key],
|
| 2005 |
-
|
| 2006 |
-
|
| 2007 |
-
|
| 2008 |
-
|
| 2009 |
|
| 2010 |
-
sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
|
| 2011 |
-
|
| 2012 |
-
|
| 2013 |
-
|
| 2014 |
-
|
| 2015 |
|
| 2016 |
# image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
|
| 2017 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
|
@@ -2022,9 +2035,9 @@ def create_ui():
|
|
| 2022 |
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
| 2023 |
[chatbot, state, aux_state,output_audio])
|
| 2024 |
chat_input.submit(lambda: "", None, chat_input)
|
| 2025 |
-
submit_button_text.click(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
| 2026 |
-
|
| 2027 |
-
submit_button_text.click(lambda: "", None, chat_input)
|
| 2028 |
example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
|
| 2029 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
| 2030 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
|
@@ -2068,37 +2081,103 @@ def create_ui():
|
|
| 2068 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
| 2069 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 2070 |
],
|
| 2071 |
-
outputs=[chatbot, state, click_state, image_input,
|
| 2072 |
show_progress=False, queue=True
|
| 2073 |
)
|
| 2074 |
|
| 2075 |
|
| 2076 |
-
|
| 2077 |
submit_caption,
|
| 2078 |
inputs=[
|
| 2079 |
-
state,
|
| 2080 |
-
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 2081 |
-
auto_play,paragraph,focus_type,openai_api_key,new_crop_save_path
|
| 2082 |
],
|
| 2083 |
outputs=[
|
| 2084 |
-
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
|
| 2085 |
-
output_audio
|
| 2086 |
],
|
| 2087 |
show_progress=True,
|
| 2088 |
queue=True
|
| 2089 |
)
|
| 2090 |
-
|
| 2091 |
|
| 2092 |
-
|
| 2093 |
-
|
| 2094 |
-
|
| 2095 |
-
|
| 2096 |
-
|
| 2097 |
-
|
| 2098 |
-
|
| 2099 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2100 |
)
|
| 2101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2102 |
export_button.click(
|
| 2103 |
export_chat_log,
|
| 2104 |
inputs=[state,paragraph,like_res,dislike_res],
|
|
@@ -2129,4 +2208,4 @@ if __name__ == '__main__':
|
|
| 2129 |
iface = create_ui()
|
| 2130 |
iface.queue(api_open=False, max_size=10)
|
| 2131 |
# iface.queue(concurrency_count=5, api_open=False, max_size=10)
|
| 2132 |
-
iface.launch(server_name="0.0.0.0")
|
|
|
|
| 28 |
import edge_tts
|
| 29 |
from langchain import __version__
|
| 30 |
import torch
|
|
|
|
| 31 |
from transformers import AutoProcessor, SiglipModel
|
| 32 |
import faiss
|
| 33 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 37 |
import spaces
|
| 38 |
# Print the current version of LangChain
|
| 39 |
print(f"Current LangChain version: {__version__}")
|
| 40 |
+
|
| 41 |
+
print("testing testing")
|
| 42 |
# import tts
|
| 43 |
|
| 44 |
###############################################################################
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
# import spaces #
|
| 50 |
+
# import threading
|
| 51 |
|
| 52 |
+
# lock = threading.Lock()
|
| 53 |
import os
|
| 54 |
# import uuid
|
| 55 |
# from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
|
| 98 |
+
# def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False):
|
| 99 |
+
# """
|
| 100 |
+
# Get the rendering camera parameters.
|
| 101 |
+
# """
|
| 102 |
+
# c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
|
| 103 |
+
# if is_flexicubes:
|
| 104 |
+
# cameras = torch.linalg.inv(c2ws)
|
| 105 |
+
# cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
|
| 106 |
+
# else:
|
| 107 |
+
# extrinsics = c2ws.flatten(-2)
|
| 108 |
+
# intrinsics = FOV_to_intrinsics(50.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
|
| 109 |
+
# cameras = torch.cat([extrinsics, intrinsics], dim=-1)
|
| 110 |
+
# cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
|
| 111 |
+
# return cameras
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# def images_to_video(images, output_path, fps=30):
|
| 115 |
+
# # images: (N, C, H, W)
|
| 116 |
+
# os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 117 |
+
# frames = []
|
| 118 |
+
# for i in range(images.shape[0]):
|
| 119 |
+
# frame = (images[i].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8).clip(0, 255)
|
| 120 |
+
# assert frame.shape[0] == images.shape[2] and frame.shape[1] == images.shape[3], \
|
| 121 |
+
# f"Frame shape mismatch: {frame.shape} vs {images.shape}"
|
| 122 |
+
# assert frame.min() >= 0 and frame.max() <= 255, \
|
| 123 |
+
# f"Frame value out of range: {frame.min()} ~ {frame.max()}"
|
| 124 |
+
# frames.append(frame)
|
| 125 |
+
# imageio.mimwrite(output_path, np.stack(frames), fps=fps, codec='h264')
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
# ###############################################################################
|
| 129 |
+
# # Configuration.
|
| 130 |
+
# ###############################################################################
|
| 131 |
+
|
| 132 |
+
# import shutil
|
| 133 |
+
|
| 134 |
+
# def find_cuda():
|
| 135 |
+
# # Check if CUDA_HOME or CUDA_PATH environment variables are set
|
| 136 |
+
# cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
|
| 137 |
+
|
| 138 |
+
# if cuda_home and os.path.exists(cuda_home):
|
| 139 |
+
# return cuda_home
|
| 140 |
+
|
| 141 |
+
# # Search for the nvcc executable in the system's PATH
|
| 142 |
+
# nvcc_path = shutil.which('nvcc')
|
| 143 |
+
|
| 144 |
+
# if nvcc_path:
|
| 145 |
+
# # Remove the 'bin/nvcc' part to get the CUDA installation path
|
| 146 |
+
# cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
|
| 147 |
+
# return cuda_path
|
| 148 |
+
|
| 149 |
+
# return None
|
| 150 |
+
|
| 151 |
+
# cuda_path = find_cuda()
|
| 152 |
+
|
| 153 |
+
# if cuda_path:
|
| 154 |
+
# print(f"CUDA installation found at: {cuda_path}")
|
| 155 |
+
# else:
|
| 156 |
+
# print("CUDA installation not found")
|
| 157 |
+
|
| 158 |
+
# config_path = 'configs/instant-nerf-base.yaml'
|
| 159 |
+
# config = OmegaConf.load(config_path)
|
| 160 |
+
# config_name = os.path.basename(config_path).replace('.yaml', '')
|
| 161 |
+
# model_config = config.model_config
|
| 162 |
+
# infer_config = config.infer_config
|
| 163 |
+
|
| 164 |
+
# IS_FLEXICUBES = True if config_name.startswith('instant-mesh') else False
|
| 165 |
+
|
| 166 |
+
# device = torch.device('cuda')
|
| 167 |
+
|
| 168 |
+
# # load diffusion model
|
| 169 |
+
# print('Loading diffusion model ...')
|
| 170 |
+
# pipeline = DiffusionPipeline.from_pretrained(
|
| 171 |
+
# "sudo-ai/zero123plus-v1.2",
|
| 172 |
+
# custom_pipeline="zero123plus",
|
| 173 |
+
# torch_dtype=torch.float16,
|
| 174 |
+
# )
|
| 175 |
+
# pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
| 176 |
+
# pipeline.scheduler.config, timestep_spacing='trailing'
|
| 177 |
+
# )
|
| 178 |
+
|
| 179 |
+
# # load custom white-background UNet
|
| 180 |
+
# unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
|
| 181 |
+
# state_dict = torch.load(unet_ckpt_path, map_location='cpu')
|
| 182 |
+
# pipeline.unet.load_state_dict(state_dict, strict=True)
|
| 183 |
+
|
| 184 |
+
# pipeline = pipeline.to(device)
|
| 185 |
+
|
| 186 |
+
# # load reconstruction model
|
| 187 |
+
# print('Loading reconstruction model ...')
|
| 188 |
+
# model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_nerf_base.ckpt", repo_type="model")
|
| 189 |
+
# model0 = instantiate_from_config(model_config)
|
| 190 |
+
# state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict']
|
| 191 |
+
# state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k}
|
| 192 |
+
# model0.load_state_dict(state_dict, strict=True)
|
| 193 |
+
|
| 194 |
+
# model0 = model0.to(device)
|
| 195 |
+
|
| 196 |
+
# print('Loading Finished!')
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# def check_input_image(input_image):
|
| 200 |
+
# if input_image is None:
|
| 201 |
+
# raise gr.Error("No image uploaded!")
|
| 202 |
+
# image = None
|
| 203 |
+
# else:
|
| 204 |
+
# image = Image.open(input_image)
|
| 205 |
+
# return image
|
| 206 |
|
| 207 |
+
# def preprocess(input_image, do_remove_background):
|
| 208 |
|
| 209 |
+
# rembg_session = rembg.new_session() if do_remove_background else None
|
| 210 |
|
| 211 |
+
# if do_remove_background:
|
| 212 |
+
# input_image = remove_background(input_image, rembg_session)
|
| 213 |
+
# input_image = resize_foreground(input_image, 0.85)
|
| 214 |
|
| 215 |
+
# return input_image
|
| 216 |
|
| 217 |
|
| 218 |
+
# # @spaces.GPU
|
| 219 |
+
# def generate_mvs(input_image, sample_steps, sample_seed):
|
| 220 |
|
| 221 |
+
# seed_everything(sample_seed)
|
| 222 |
|
| 223 |
+
# # sampling
|
| 224 |
+
# z123_image = pipeline(
|
| 225 |
+
# input_image,
|
| 226 |
+
# num_inference_steps=sample_steps
|
| 227 |
+
# ).images[0]
|
| 228 |
|
| 229 |
+
# show_image = np.asarray(z123_image, dtype=np.uint8)
|
| 230 |
+
# show_image = torch.from_numpy(show_image) # (960, 640, 3)
|
| 231 |
+
# show_image = rearrange(show_image, '(n h) (m w) c -> (n m) h w c', n=3, m=2)
|
| 232 |
+
# show_image = rearrange(show_image, '(n m) h w c -> (n h) (m w) c', n=2, m=3)
|
| 233 |
+
# show_image = Image.fromarray(show_image.numpy())
|
| 234 |
|
| 235 |
+
# return z123_image, show_image
|
| 236 |
|
| 237 |
|
| 238 |
+
# # @spaces.GPU
|
| 239 |
+
# def make3d(images):
|
| 240 |
|
| 241 |
+
# global model0
|
| 242 |
+
# if IS_FLEXICUBES:
|
| 243 |
+
# model0.init_flexicubes_geometry(device)
|
| 244 |
+
# model0 = model0.eval()
|
| 245 |
|
| 246 |
+
# images = np.asarray(images, dtype=np.float32) / 255.0
|
| 247 |
+
# images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float() # (3, 960, 640)
|
| 248 |
+
# images = rearrange(images, 'c (n h) (m w) -> (n m) c h w', n=3, m=2) # (6, 3, 320, 320)
|
| 249 |
|
| 250 |
+
# input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device)
|
| 251 |
+
# render_cameras = get_render_cameras(batch_size=1, radius=2.5, is_flexicubes=IS_FLEXICUBES).to(device)
|
| 252 |
|
| 253 |
+
# images = images.unsqueeze(0).to(device)
|
| 254 |
+
# images = v2.functional.resize(images, (320, 320), interpolation=3, antialias=True).clamp(0, 1)
|
| 255 |
|
| 256 |
+
# mesh_fpath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
|
| 257 |
+
# print(mesh_fpath)
|
| 258 |
+
# mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
|
| 259 |
+
# mesh_dirname = os.path.dirname(mesh_fpath)
|
| 260 |
+
# video_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.mp4")
|
| 261 |
+
# mesh_glb_fpath = os.path.join(mesh_dirname, f"{mesh_basename}.glb")
|
| 262 |
|
| 263 |
+
# with torch.no_grad():
|
| 264 |
+
# # get triplane
|
| 265 |
+
# planes = model0.forward_planes(images, input_cameras)
|
| 266 |
|
| 267 |
+
# # # get video
|
| 268 |
+
# # chunk_size = 20 if IS_FLEXICUBES else 1
|
| 269 |
+
# # render_size = 384
|
| 270 |
|
| 271 |
+
# # frames = []
|
| 272 |
+
# # for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
|
| 273 |
+
# # if IS_FLEXICUBES:
|
| 274 |
+
# # frame = model.forward_geometry(
|
| 275 |
+
# # planes,
|
| 276 |
+
# # render_cameras[:, i:i+chunk_size],
|
| 277 |
+
# # render_size=render_size,
|
| 278 |
+
# # )['img']
|
| 279 |
+
# # else:
|
| 280 |
+
# # frame = model.synthesizer(
|
| 281 |
+
# # planes,
|
| 282 |
+
# # cameras=render_cameras[:, i:i+chunk_size],
|
| 283 |
+
# # render_size=render_size,
|
| 284 |
+
# # )['images_rgb']
|
| 285 |
+
# # frames.append(frame)
|
| 286 |
+
# # frames = torch.cat(frames, dim=1)
|
| 287 |
+
|
| 288 |
+
# # images_to_video(
|
| 289 |
+
# # frames[0],
|
| 290 |
+
# # video_fpath,
|
| 291 |
+
# # fps=30,
|
| 292 |
+
# # )
|
| 293 |
+
|
| 294 |
+
# # print(f"Video saved to {video_fpath}")
|
| 295 |
+
|
| 296 |
+
# # get mesh
|
| 297 |
+
# mesh_out = model0.extract_mesh(
|
| 298 |
+
# planes,
|
| 299 |
+
# use_texture_map=False,
|
| 300 |
+
# **infer_config,
|
| 301 |
+
# )
|
| 302 |
+
|
| 303 |
+
# vertices, faces, vertex_colors = mesh_out
|
| 304 |
+
# vertices = vertices[:, [1, 2, 0]]
|
| 305 |
|
| 306 |
+
# save_glb(vertices, faces, vertex_colors, mesh_glb_fpath)
|
| 307 |
+
# save_obj(vertices, faces, vertex_colors, mesh_fpath)
|
| 308 |
|
| 309 |
+
# print(f"Mesh saved to {mesh_fpath}")
|
| 310 |
|
| 311 |
+
# return mesh_fpath, mesh_glb_fpath
|
| 312 |
|
| 313 |
|
| 314 |
###############################################################################
|
|
|
|
| 472 |
|
| 473 |
css = """
|
| 474 |
#warning {background-color: #FFCCCB}
|
| 475 |
+
.tools_button {
|
| 476 |
+
background: white;
|
| 477 |
+
border: none !important;
|
| 478 |
+
box-shadow: none !important;
|
| 479 |
+
}
|
| 480 |
+
#tool_box {max-width: 50px}
|
| 481 |
+
|
| 482 |
"""
|
| 483 |
filtered_language_dict = {
|
| 484 |
'English': 'en-US-JennyNeural',
|
|
|
|
| 491 |
}
|
| 492 |
|
| 493 |
focus_map = {
|
| 494 |
+
"D":0,
|
| 495 |
+
"DA":1,
|
| 496 |
+
"DAI":2,
|
| 497 |
+
"DDA":3
|
| 498 |
}
|
| 499 |
|
| 500 |
'''
|
|
|
|
| 620 |
global gpt_state
|
| 621 |
gpt_state=1
|
| 622 |
# return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]*3
|
| 623 |
+
return [gr.update(visible=True)]+[gr.update(visible=False)]+[gr.update(visible=True)]*3+[gr.update(visible=False)]+ [gr.update(visible=False)]*3 + [text_refiner, visual_chatgpt, None]+[gr.update(visible=True)]
|
| 624 |
else:
|
| 625 |
gpt_state=0
|
| 626 |
# return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]*3
|
| 627 |
+
return [gr.update(visible=False)]*7 + [gr.update(visible=True)]*2 + [text_refiner, visual_chatgpt, 'Your OpenAI API Key is not available']+[gr.update(visible=False)]
|
| 628 |
|
| 629 |
def init_wo_openai_api_key():
|
| 630 |
global gpt_state
|
| 631 |
gpt_state=0
|
| 632 |
# return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]*3
|
| 633 |
+
return [gr.update(visible=False)]*4 + [gr.update(visible=True)]+ [gr.update(visible=False)]+[gr.update(visible=True)]+[gr.update(visible=False)]*2 + [None, None, None]+[gr.update(visible=False)]
|
| 634 |
|
| 635 |
def get_click_prompt(chat_input, click_state, click_mode):
|
| 636 |
inputs = json.loads(chat_input)
|
|
|
|
| 670 |
|
| 671 |
async def chat_input_callback(*args):
|
| 672 |
visual_chatgpt, chat_input, click_state, state, aux_state ,language , autoplay = args
|
| 673 |
+
message = chat_input["text"]
|
| 674 |
if visual_chatgpt is not None:
|
| 675 |
+
state, _, aux_state, _ = visual_chatgpt.run_text(message, state, aux_state)
|
| 676 |
last_text, last_response = state[-1]
|
| 677 |
print("last response",last_response)
|
| 678 |
+
if autoplay==False:
|
| 679 |
+
return state, state, aux_state, None
|
| 680 |
+
|
| 681 |
else:
|
| 682 |
+
audio = await texttospeech(last_response,language,autoplay)
|
| 683 |
+
return state, state, aux_state, audio
|
| 684 |
else:
|
| 685 |
response = "Text refiner is not initilzed, please input openai api key."
|
| 686 |
state = state + [(chat_input, response)]
|
|
|
|
| 728 |
visual_chatgpt.agent.memory.save_context({"input": Human_prompt}, {"output": AI_prompt})
|
| 729 |
print("memory",visual_chatgpt.agent.memory)
|
| 730 |
# visual_chatgpt.agent.memory.buffer = visual_chatgpt.agent.memory.buffer + visual_chatgpt.global_prompt
|
| 731 |
+
parsed_data = get_gpt_response(openai_api_key, new_image_path,"Please provide the name, artist, year of creation (including the art historical period), and painting style used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\", \"artist\": \"Name of the artist\", \"year\": \"Year of creation (Art historical period)\", \"style\": \"Painting style used in the painting\" }")
|
| 732 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
| 733 |
+
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["style"]
|
| 734 |
# artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
| 735 |
|
| 736 |
|
|
|
|
| 742 |
]
|
| 743 |
|
| 744 |
return [state, state, image_input, click_state, image_input, image_input, image_input, image_input, image_embedding, \
|
| 745 |
+
original_size, input_size] + [f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Style: {material}"]*4 + [paragraph,artist]
|
| 746 |
|
| 747 |
|
| 748 |
|
|
|
|
| 780 |
|
| 781 |
enable_wiki = True if enable_wiki in ['True', 'TRUE', 'true', True, 'Yes', 'YES', 'yes'] else False
|
| 782 |
out = model.inference(image_input, prompt, controls, disable_gpt=True, enable_wiki=enable_wiki, verbose=True, args={'clip_filter': False})[0]
|
| 783 |
+
# state = state + [("You've selected image point at {}, ".format(prompt["input_point"]), None)]
|
| 784 |
+
|
| 785 |
state = state + [("Image point: {}, Input label: {}".format(prompt["input_point"], prompt["input_label"]), None)]
|
| 786 |
update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
|
| 787 |
text = out['generated_captions']['raw_caption']
|
|
|
|
| 805 |
|
| 806 |
print("new crop save",new_crop_save_path)
|
| 807 |
|
| 808 |
+
yield state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
|
| 809 |
|
| 810 |
|
| 811 |
|
| 812 |
+
async def submit_caption(state,length, sentiment, factuality, language,
|
|
|
|
|
|
|
| 813 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 814 |
autoplay,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
| 815 |
print("state",state)
|
|
|
|
| 851 |
# refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
| 852 |
# input_points=input_points, input_labels=input_labels)
|
| 853 |
try:
|
| 854 |
+
if autoplay==False:
|
| 855 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
|
| 856 |
+
|
| 857 |
audio_output = await texttospeech(read_info, language, autoplay)
|
| 858 |
print("done")
|
| 859 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
|
|
|
| 866 |
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, audio_output
|
| 867 |
|
| 868 |
else:
|
| 869 |
+
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
| 870 |
+
print(f"Error during TTS prediction: {str(e)}")
|
| 871 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None
|
| 872 |
+
|
|
|
|
| 873 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
|
| 875 |
def generate_prompt(focus_type, paragraph,length, sentiment, factuality, language):
|
| 876 |
|
|
|
|
| 1072 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
| 1073 |
print(f"Error during TTS prediction: {str(e)}")
|
| 1074 |
# return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
| 1075 |
+
return state, state, image_input,audio_output,crop_save_path
|
| 1076 |
|
| 1077 |
|
| 1078 |
else:
|
|
|
|
| 1225 |
# return state,dislike_res
|
| 1226 |
|
| 1227 |
|
| 1228 |
+
# def get_style():
|
| 1229 |
+
# current_version = version.parse(gr.__version__)
|
| 1230 |
+
# print(current_version)
|
| 1231 |
+
# if current_version <= version.parse('3.24.1'):
|
| 1232 |
+
# style = '''
|
| 1233 |
+
# #image_sketcher{min-height:500px}
|
| 1234 |
+
# #image_sketcher [data-testid="image"], #image_sketcher [data-testid="image"] > div{min-height: 500px}
|
| 1235 |
+
# #image_upload{min-height:500px}
|
| 1236 |
+
# #image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 500px}
|
| 1237 |
+
# .custom-language {
|
| 1238 |
+
# width: 20%;
|
| 1239 |
+
# }
|
| 1240 |
+
|
| 1241 |
+
# .custom-autoplay {
|
| 1242 |
+
# width: 40%;
|
| 1243 |
+
# }
|
| 1244 |
+
|
| 1245 |
+
# .custom-output {
|
| 1246 |
+
# width: 30%;
|
| 1247 |
+
# }
|
| 1248 |
+
|
| 1249 |
+
# '''
|
| 1250 |
+
# elif current_version <= version.parse('3.27'):
|
| 1251 |
+
# style = '''
|
| 1252 |
+
# #image_sketcher{min-height:500px}
|
| 1253 |
+
# #image_upload{min-height:500px}
|
| 1254 |
+
# .custom-language {
|
| 1255 |
+
# width: 20%;
|
| 1256 |
+
# }
|
| 1257 |
+
|
| 1258 |
+
# .custom-autoplay {
|
| 1259 |
+
# width: 40%;
|
| 1260 |
+
# }
|
| 1261 |
+
|
| 1262 |
+
# .custom-output {
|
| 1263 |
+
# width: 30%;
|
| 1264 |
+
# }
|
| 1265 |
+
# .custom-gallery {
|
| 1266 |
+
# display: flex;
|
| 1267 |
+
# flex-wrap: wrap;
|
| 1268 |
+
# justify-content: space-between;
|
| 1269 |
+
# }
|
| 1270 |
+
|
| 1271 |
+
# .custom-gallery img {
|
| 1272 |
+
# width: 48%;
|
| 1273 |
+
# margin-bottom: 10px;
|
| 1274 |
+
# }
|
| 1275 |
+
# '''
|
| 1276 |
+
# else:
|
| 1277 |
+
# style = None
|
| 1278 |
|
| 1279 |
+
# return style
|
| 1280 |
|
| 1281 |
# def handle_like_dislike(like_data, like_state, dislike_state):
|
| 1282 |
# if like_data.liked:
|
|
|
|
| 1326 |
dislike_res.append(x.value)
|
| 1327 |
state = state + [(None, f"Disliked Received 👎")]
|
| 1328 |
return like_res,dislike_res,state
|
| 1329 |
+
|
| 1330 |
+
|
| 1331 |
+
def toggle_icons_and_update_prompt(point_prompt):
|
| 1332 |
+
new_prompt = "Negative" if point_prompt == "Positive" else "Positive"
|
| 1333 |
+
new_add_icon = "assets/icons/plus-square-blue.png" if point_prompt == "Positive" else "assets/icons/plus-square.png"
|
| 1334 |
+
new_minus_icon = "assets/icons/minus-square.png" if point_prompt == "Positive" else "assets/icons/minus-square-blue.png"
|
| 1335 |
+
print(point_prompt)
|
| 1336 |
+
print(new_prompt)
|
| 1337 |
|
| 1338 |
+
return new_prompt, gr.update(icon=new_add_icon), gr.update(icon=new_minus_icon)
|
| 1339 |
+
|
| 1340 |
+
add_icon_path="assets/icons/plus-square-blue.png"
|
| 1341 |
+
minus_icon_path="assets/icons/minus-square.png"
|
| 1342 |
+
|
| 1343 |
+
print("this is a print test")
|
| 1344 |
|
| 1345 |
def create_ui():
|
| 1346 |
title = """<p><h1 align="center">EyeSee Anything in Art</h1></p>
|
|
|
|
| 1360 |
]
|
| 1361 |
|
| 1362 |
with gr.Blocks(
|
| 1363 |
+
css=css,
|
| 1364 |
theme=gr.themes.Base()
|
| 1365 |
) as iface:
|
| 1366 |
state = gr.State([])
|
|
|
|
| 1385 |
dislike_res=gr.State([])
|
| 1386 |
gr.Markdown(title)
|
| 1387 |
gr.Markdown(description)
|
| 1388 |
+
point_prompt = gr.State("Positive")
|
| 1389 |
# with gr.Row(align="right", visible=False, elem_id="top_row") as top_row:
|
| 1390 |
# with gr.Column(scale=0.5):
|
| 1391 |
# # gr.Markdown("Left side content")
|
|
|
|
| 1408 |
value="English", label="Language", interactive=True, elem_classes="custom-language"
|
| 1409 |
)
|
| 1410 |
length = gr.Slider(
|
| 1411 |
+
minimum=40,
|
| 1412 |
+
maximum=200,
|
| 1413 |
+
value=80,
|
| 1414 |
step=1,
|
| 1415 |
interactive=True,
|
| 1416 |
label="Generated Caption Length",
|
|
|
|
| 1432 |
# auto_play = gr.Checkbox(label="Check to autoplay audio", value=False,scale=0.4)
|
| 1433 |
# output_audio = gr.HTML(label="Synthesised Audio",scale=0.6)
|
| 1434 |
|
| 1435 |
+
with gr.Row():
|
|
|
|
| 1436 |
with gr.Column(scale=6):
|
| 1437 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
| 1438 |
with gr.Tab("Base(GPT Power)") as base_tab:
|
|
|
|
| 1441 |
name_label_base = gr.Button(value="Name: ")
|
| 1442 |
artist_label_base = gr.Button(value="Artist: ")
|
| 1443 |
year_label_base = gr.Button(value="Year: ")
|
| 1444 |
+
material_label_base = gr.Button(value="Style: ")
|
| 1445 |
|
| 1446 |
with gr.Tab("Base2") as base_tab2:
|
| 1447 |
image_input_base_2 = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
|
|
|
| 1449 |
name_label_base2 = gr.Button(value="Name: ")
|
| 1450 |
artist_label_base2 = gr.Button(value="Artist: ")
|
| 1451 |
year_label_base2 = gr.Button(value="Year: ")
|
| 1452 |
+
material_label_base2 = gr.Button(value="Style: ")
|
| 1453 |
|
| 1454 |
with gr.Tab("Click") as click_tab:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1455 |
with gr.Row():
|
| 1456 |
+
with gr.Column(scale=10,min_width=450):
|
| 1457 |
+
image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
| 1458 |
+
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
| 1459 |
with gr.Row():
|
| 1460 |
+
name_label = gr.Button(value="Name: ")
|
| 1461 |
+
artist_label = gr.Button(value="Artist: ")
|
| 1462 |
+
year_label = gr.Button(value="Year: ")
|
| 1463 |
+
material_label = gr.Button(value="Style: ")
|
| 1464 |
+
|
| 1465 |
+
|
| 1466 |
+
# example_image_click = gr.Image(type="pil", interactive=False, visible=False)
|
| 1467 |
+
# the tool column
|
| 1468 |
+
with gr.Column(scale=1,elem_id="tool_box",min_width=100):
|
| 1469 |
+
add_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=add_icon_path)
|
| 1470 |
+
minus_button = gr.Button(value="", interactive=True,elem_classes="tools_button",icon=minus_icon_path)
|
| 1471 |
+
clear_button_click = gr.Button(value="Reset", interactive=True,elem_classes="tools_button")
|
| 1472 |
+
clear_button_image = gr.Button(value="Change Image", interactive=True,elem_classes="tools_button")
|
| 1473 |
+
focus_d = gr.Button(value="D",interactive=True,elem_classes="function_button")
|
| 1474 |
+
focus_da = gr.Button(value="DA",interactive=True,elem_classes="function_button")
|
| 1475 |
+
focus_dai = gr.Button(value="DAI",interactive=True,elem_classes="function_button")
|
| 1476 |
+
focus_dda = gr.Button(value="DDA",interactive=True,elem_classes="function_button")
|
| 1477 |
+
recommend_btn = gr.Button(value="Recommend",interactive=True,elem_classes="function_button")
|
| 1478 |
+
|
| 1479 |
+
with gr.Row(visible=False):
|
| 1480 |
+
with gr.Column():
|
| 1481 |
with gr.Row():
|
| 1482 |
+
# point_prompt = gr.Radio(
|
| 1483 |
+
# choices=["Positive", "Negative"],
|
| 1484 |
+
# value="Positive",
|
| 1485 |
+
# label="Point Prompt",
|
| 1486 |
+
# scale=5,
|
| 1487 |
+
# interactive=True)
|
| 1488 |
click_mode = gr.Radio(
|
| 1489 |
choices=["Continuous", "Single"],
|
| 1490 |
value="Continuous",
|
| 1491 |
label="Clicking Mode",
|
| 1492 |
scale=5,
|
| 1493 |
interactive=True)
|
| 1494 |
+
|
| 1495 |
+
|
| 1496 |
+
with gr.Tab("Trajectory (beta)", visible=False) as traj_tab:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1497 |
# sketcher_input = ImageSketcher(type="pil", interactive=True, brush_radius=10,
|
| 1498 |
# elem_id="image_sketcher")
|
| 1499 |
+
sketcher_input = gr.ImageEditor(type="pil", interactive=True
|
| 1500 |
+
)
|
| 1501 |
with gr.Row():
|
| 1502 |
name_label_traj = gr.Button(value="Name: ")
|
| 1503 |
artist_label_traj = gr.Button(value="Artist: ")
|
|
|
|
| 1507 |
with gr.Row():
|
| 1508 |
clear_button_sketcher = gr.Button(value="Clear Sketch", interactive=True)
|
| 1509 |
submit_button_sketcher = gr.Button(value="Submit", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1510 |
|
| 1511 |
with gr.Column(visible=False,scale=4) as modules_need_gpt1:
|
| 1512 |
+
with gr.Row(visible=False):
|
| 1513 |
sentiment = gr.Radio(
|
| 1514 |
choices=["Positive", "Natural", "Negative"],
|
| 1515 |
value="Natural",
|
| 1516 |
label="Sentiment",
|
| 1517 |
interactive=True,
|
| 1518 |
)
|
| 1519 |
+
|
| 1520 |
factuality = gr.Radio(
|
| 1521 |
choices=["Factual", "Imagination"],
|
| 1522 |
value="Factual",
|
|
|
|
| 1537 |
value="No",
|
| 1538 |
label="Expert",
|
| 1539 |
interactive=True)
|
| 1540 |
+
|
| 1541 |
+
|
| 1542 |
with gr.Column(visible=True) as modules_not_need_gpt3:
|
| 1543 |
gr.Examples(
|
| 1544 |
examples=examples,
|
|
|
|
| 1549 |
|
| 1550 |
|
| 1551 |
|
| 1552 |
+
with gr.Column(scale=4):
|
| 1553 |
with gr.Column(visible=True) as module_key_input:
|
| 1554 |
openai_api_key = gr.Textbox(
|
| 1555 |
placeholder="Input openAI API key",
|
|
|
|
| 1571 |
|
| 1572 |
with gr.Column(visible=False) as modules_not_need_gpt2:
|
| 1573 |
with gr.Blocks():
|
| 1574 |
+
chatbot = gr.Chatbot(label="Chatbox", elem_classes="chatbot",likeable=True,height=600,bubble_full_width=False)
|
| 1575 |
with gr.Column(visible=False) as modules_need_gpt3:
|
| 1576 |
+
chat_input = gr.MultimodalTextbox(interactive=True, file_types=[".txt"], placeholder="Message EyeSee...", show_label=False)
|
| 1577 |
with gr.Row():
|
| 1578 |
+
clear_button_text = gr.Button(value="Clear Chat", interactive=True)
|
| 1579 |
+
export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
|
| 1580 |
+
# submit_button_text = gr.Button(value="Send", interactive=True, variant="primary")
|
| 1581 |
# upvote_btn = gr.Button(value="👍 Upvote", interactive=True)
|
| 1582 |
# downvote_btn = gr.Button(value="👎 Downvote", interactive=True)
|
| 1583 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1584 |
|
| 1585 |
# TTS interface hidden initially
|
| 1586 |
with gr.Column(visible=False) as tts_interface:
|
|
|
|
| 1693 |
# show_share_button=True,
|
| 1694 |
# show_download_button=True
|
| 1695 |
)
|
| 1696 |
+
|
| 1697 |
+
with gr.Row():
|
| 1698 |
+
naritive = gr.Radio(
|
| 1699 |
+
choices=["Third", "Artist","Item"],
|
| 1700 |
+
value="Third",
|
| 1701 |
+
label="narritive",
|
| 1702 |
+
scale=5,
|
| 1703 |
+
interactive=True)
|
| 1704 |
+
chat_log_file = gr.File(label="Download Chat Log",scale=5)
|
| 1705 |
|
| 1706 |
|
| 1707 |
|
|
|
|
| 1740 |
# outputs=[result, seed],
|
| 1741 |
# api_name="run",
|
| 1742 |
# )
|
| 1743 |
+
recommend_btn.click(
|
| 1744 |
fn=infer,
|
| 1745 |
inputs=[new_crop_save_path],
|
| 1746 |
outputs=[result]
|
|
|
|
| 1755 |
# this part is for 3d generate.
|
| 1756 |
###############################################################################
|
| 1757 |
|
| 1758 |
+
# with gr.Row(variant="panel",visible=False) as d3_model:
|
| 1759 |
+
# with gr.Column():
|
| 1760 |
+
# with gr.Row():
|
| 1761 |
+
# input_image = gr.Image(
|
| 1762 |
+
# label="Input Image",
|
| 1763 |
+
# image_mode="RGBA",
|
| 1764 |
+
# sources="upload",
|
| 1765 |
+
# #width=256,
|
| 1766 |
+
# #height=256,
|
| 1767 |
+
# type="pil",
|
| 1768 |
+
# elem_id="content_image",
|
| 1769 |
+
# )
|
| 1770 |
+
# processed_image = gr.Image(
|
| 1771 |
+
# label="Processed Image",
|
| 1772 |
+
# image_mode="RGBA",
|
| 1773 |
+
# #width=256,
|
| 1774 |
+
# #height=256,
|
| 1775 |
+
# type="pil",
|
| 1776 |
+
# interactive=False
|
| 1777 |
+
# )
|
| 1778 |
+
# with gr.Row():
|
| 1779 |
+
# with gr.Group():
|
| 1780 |
+
# do_remove_background = gr.Checkbox(
|
| 1781 |
+
# label="Remove Background", value=True
|
| 1782 |
+
# )
|
| 1783 |
+
# sample_seed = gr.Number(value=42, label="Seed Value", precision=0)
|
| 1784 |
|
| 1785 |
+
# sample_steps = gr.Slider(
|
| 1786 |
+
# label="Sample Steps",
|
| 1787 |
+
# minimum=30,
|
| 1788 |
+
# maximum=75,
|
| 1789 |
+
# value=75,
|
| 1790 |
+
# step=5
|
| 1791 |
+
# )
|
| 1792 |
|
| 1793 |
+
# with gr.Row():
|
| 1794 |
+
# submit = gr.Button("Generate", elem_id="generate", variant="primary")
|
| 1795 |
|
| 1796 |
+
# with gr.Row(variant="panel"):
|
| 1797 |
+
# gr.Examples(
|
| 1798 |
+
# examples=[
|
| 1799 |
+
# os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples"))
|
| 1800 |
+
# ],
|
| 1801 |
+
# inputs=[input_image],
|
| 1802 |
+
# label="Examples",
|
| 1803 |
+
# cache_examples=False,
|
| 1804 |
+
# examples_per_page=16
|
| 1805 |
+
# )
|
| 1806 |
+
|
| 1807 |
+
# with gr.Column():
|
| 1808 |
|
| 1809 |
+
# with gr.Row():
|
| 1810 |
|
| 1811 |
+
# with gr.Column():
|
| 1812 |
+
# mv_show_images = gr.Image(
|
| 1813 |
+
# label="Generated Multi-views",
|
| 1814 |
+
# type="pil",
|
| 1815 |
+
# width=379,
|
| 1816 |
+
# interactive=False
|
| 1817 |
+
# )
|
| 1818 |
|
| 1819 |
+
# # with gr.Column():
|
| 1820 |
+
# # output_video = gr.Video(
|
| 1821 |
+
# # label="video", format="mp4",
|
| 1822 |
+
# # width=379,
|
| 1823 |
+
# # autoplay=True,
|
| 1824 |
+
# # interactive=False
|
| 1825 |
+
# # )
|
| 1826 |
|
| 1827 |
+
# with gr.Row():
|
| 1828 |
+
# with gr.Tab("OBJ"):
|
| 1829 |
+
# output_model_obj = gr.Model3D(
|
| 1830 |
+
# label="Output Model (OBJ Format)",
|
| 1831 |
+
# interactive=False,
|
| 1832 |
+
# )
|
| 1833 |
+
# gr.Markdown("Note: Downloaded .obj model will be flipped. Export .glb instead or manually flip it before usage.")
|
| 1834 |
+
# with gr.Tab("GLB"):
|
| 1835 |
+
# output_model_glb = gr.Model3D(
|
| 1836 |
+
# label="Output Model (GLB Format)",
|
| 1837 |
+
# interactive=False,
|
| 1838 |
+
# )
|
| 1839 |
+
# gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.")
|
| 1840 |
|
| 1841 |
|
| 1842 |
|
| 1843 |
|
| 1844 |
+
# mv_images = gr.State()
|
| 1845 |
|
| 1846 |
+
# chatbot.like(print_like_dislike, inputs=[like_res,dislike_res,state], outputs=[like_res,dislike_res,chatbot])
|
| 1847 |
|
| 1848 |
+
# submit.click(fn=check_input_image, inputs=[new_crop_save_path], outputs=[processed_image]).success(
|
| 1849 |
+
# fn=generate_mvs,
|
| 1850 |
+
# inputs=[processed_image, sample_steps, sample_seed],
|
| 1851 |
+
# outputs=[mv_images, mv_show_images]
|
| 1852 |
|
| 1853 |
+
# ).success(
|
| 1854 |
+
# fn=make3d,
|
| 1855 |
+
# inputs=[mv_images],
|
| 1856 |
+
# outputs=[output_model_obj, output_model_glb]
|
| 1857 |
+
# )
|
| 1858 |
|
| 1859 |
###############################################################################
|
| 1860 |
# above part is for 3d generate.
|
|
|
|
| 1881 |
|
| 1882 |
|
| 1883 |
|
| 1884 |
+
# clear_button_sketcher.click(
|
| 1885 |
+
# lambda x: (x),
|
| 1886 |
+
# [origin_image],
|
| 1887 |
+
# [sketcher_input],
|
| 1888 |
+
# queue=False,
|
| 1889 |
+
# show_progress=False
|
| 1890 |
+
# )
|
| 1891 |
|
| 1892 |
|
| 1893 |
|
|
|
|
| 1895 |
|
| 1896 |
openai_api_key.submit(init_openai_api_key, inputs=[openai_api_key],
|
| 1897 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
| 1898 |
+
modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
|
| 1899 |
enable_chatGPT_button.click(init_openai_api_key, inputs=[openai_api_key],
|
| 1900 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
| 1901 |
modules_not_need_gpt,
|
| 1902 |
+
modules_not_need_gpt2, tts_interface,module_key_input,module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
|
| 1903 |
# openai_api_key.submit(init_openai_api_key,
|
| 1904 |
# outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3, modules_not_need_gpt,
|
| 1905 |
# modules_not_need_gpt2, tts_interface,module_key_input ,module_notification_box, text_refiner, visual_chatgpt, notification_box,d3_model,top_row])
|
|
|
|
| 1911 |
disable_chatGPT_button.click(init_wo_openai_api_key,
|
| 1912 |
outputs=[modules_need_gpt0, modules_need_gpt1, modules_need_gpt2, modules_need_gpt3,
|
| 1913 |
modules_not_need_gpt,
|
| 1914 |
+
modules_not_need_gpt2, tts_interface,module_key_input, module_notification_box, text_refiner, visual_chatgpt, notification_box,top_row])
|
| 1915 |
|
| 1916 |
artist_label_base2.click(
|
| 1917 |
get_artistinfo,
|
|
|
|
| 2008 |
name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
| 2009 |
paragraph,artist])
|
| 2010 |
|
| 2011 |
+
# image_input_base_2.upload(upload_callback, [image_input_base_2, state, visual_chatgpt,openai_api_key],
|
| 2012 |
+
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
| 2013 |
+
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
| 2014 |
+
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
| 2015 |
+
# paragraph,artist])
|
| 2016 |
|
| 2017 |
+
# image_input.upload(upload_callback, [image_input, state, visual_chatgpt,openai_api_key],
|
| 2018 |
+
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
| 2019 |
+
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
| 2020 |
+
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
| 2021 |
+
# paragraph,artist])
|
| 2022 |
|
| 2023 |
+
# sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt,openai_api_key],
|
| 2024 |
+
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
| 2025 |
+
# image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
| 2026 |
+
# name_label_base2, artist_label_base2, year_label_base2, material_label_base2,name_label_traj, artist_label_traj, year_label_traj, material_label_traj, \
|
| 2027 |
+
# paragraph,artist])
|
| 2028 |
|
| 2029 |
# image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
|
| 2030 |
# [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
|
|
|
| 2035 |
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
| 2036 |
[chatbot, state, aux_state,output_audio])
|
| 2037 |
chat_input.submit(lambda: "", None, chat_input)
|
| 2038 |
+
# submit_button_text.click(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state,language,auto_play],
|
| 2039 |
+
# [chatbot, state, aux_state,output_audio])
|
| 2040 |
+
# submit_button_text.click(lambda: "", None, chat_input)
|
| 2041 |
example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
|
| 2042 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,image_input_base_2,
|
| 2043 |
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base, \
|
|
|
|
| 2081 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
| 2082 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 2083 |
],
|
| 2084 |
+
outputs=[chatbot, state, click_state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground],
|
| 2085 |
show_progress=False, queue=True
|
| 2086 |
)
|
| 2087 |
|
| 2088 |
|
| 2089 |
+
focus_d.click(
|
| 2090 |
submit_caption,
|
| 2091 |
inputs=[
|
| 2092 |
+
state,length, sentiment, factuality, language,
|
| 2093 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state, auto_play, paragraph,focus_d,openai_api_key,new_crop_save_path
|
|
|
|
| 2094 |
],
|
| 2095 |
outputs=[
|
| 2096 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
|
|
|
| 2097 |
],
|
| 2098 |
show_progress=True,
|
| 2099 |
queue=True
|
| 2100 |
)
|
| 2101 |
+
|
| 2102 |
|
| 2103 |
+
|
| 2104 |
+
|
| 2105 |
+
|
| 2106 |
+
focus_da.click(
|
| 2107 |
+
submit_caption,
|
| 2108 |
+
inputs=[
|
| 2109 |
+
state,length, sentiment, factuality, language,
|
| 2110 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,auto_play, paragraph,focus_da,openai_api_key,new_crop_save_path
|
| 2111 |
+
],
|
| 2112 |
+
outputs=[
|
| 2113 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
| 2114 |
+
],
|
| 2115 |
+
show_progress=True,
|
| 2116 |
+
queue=True
|
| 2117 |
+
)
|
| 2118 |
+
|
| 2119 |
+
|
| 2120 |
+
focus_dai.click(
|
| 2121 |
+
submit_caption,
|
| 2122 |
+
inputs=[
|
| 2123 |
+
state,length, sentiment, factuality, language,
|
| 2124 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 2125 |
+
auto_play, paragraph,focus_dai,openai_api_key,new_crop_save_path
|
| 2126 |
+
],
|
| 2127 |
+
outputs=[
|
| 2128 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
| 2129 |
+
],
|
| 2130 |
+
show_progress=True,
|
| 2131 |
+
queue=True
|
| 2132 |
)
|
| 2133 |
|
| 2134 |
+
|
| 2135 |
+
focus_dda.click(
|
| 2136 |
+
submit_caption,
|
| 2137 |
+
inputs=[
|
| 2138 |
+
state,length, sentiment, factuality, language,
|
| 2139 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 2140 |
+
auto_play, paragraph,focus_dda,openai_api_key,new_crop_save_path
|
| 2141 |
+
],
|
| 2142 |
+
outputs=[
|
| 2143 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,output_audio
|
| 2144 |
+
],
|
| 2145 |
+
show_progress=True,
|
| 2146 |
+
queue=True
|
| 2147 |
+
)
|
| 2148 |
+
|
| 2149 |
+
add_button.click(
|
| 2150 |
+
toggle_icons_and_update_prompt,
|
| 2151 |
+
inputs=[point_prompt],
|
| 2152 |
+
outputs=[point_prompt,add_button,minus_button],
|
| 2153 |
+
show_progress=True,
|
| 2154 |
+
queue=True
|
| 2155 |
+
|
| 2156 |
+
)
|
| 2157 |
+
|
| 2158 |
+
minus_button.click(
|
| 2159 |
+
toggle_icons_and_update_prompt,
|
| 2160 |
+
inputs=[point_prompt],
|
| 2161 |
+
outputs=[point_prompt,add_button,minus_button],
|
| 2162 |
+
show_progress=True,
|
| 2163 |
+
queue=True
|
| 2164 |
+
|
| 2165 |
+
)
|
| 2166 |
+
|
| 2167 |
+
|
| 2168 |
+
|
| 2169 |
+
|
| 2170 |
+
|
| 2171 |
+
# submit_button_sketcher.click(
|
| 2172 |
+
# inference_traject,
|
| 2173 |
+
# inputs=[
|
| 2174 |
+
# origin_image,sketcher_input, enable_wiki, language, sentiment, factuality, length, image_embedding, state,
|
| 2175 |
+
# original_size, input_size, text_refiner,focus_type_sketch,paragraph,openai_api_key,auto_play,Input_sketch
|
| 2176 |
+
# ],
|
| 2177 |
+
# outputs=[chatbot, state, sketcher_input,output_audio,new_crop_save_path],
|
| 2178 |
+
# show_progress=False, queue=True
|
| 2179 |
+
# )
|
| 2180 |
+
|
| 2181 |
export_button.click(
|
| 2182 |
export_chat_log,
|
| 2183 |
inputs=[state,paragraph,like_res,dislike_res],
|
|
|
|
| 2208 |
iface = create_ui()
|
| 2209 |
iface.queue(api_open=False, max_size=10)
|
| 2210 |
# iface.queue(concurrency_count=5, api_open=False, max_size=10)
|
| 2211 |
+
iface.launch(server_name="0.0.0.0",show_error=True)
|