Spaces:
Running
on
Zero
Running
on
Zero
Update demo.
Browse files
app.py
CHANGED
|
@@ -61,24 +61,23 @@ The service is a research preview intended for non-commercial use only, subject
|
|
| 61 |
|
| 62 |
|
| 63 |
class Chat:
|
| 64 |
-
def __init__(self, model_path, conv_mode, model_base=None, load_8bit=False, load_4bit=False
|
| 65 |
# disable_torch_init()
|
| 66 |
model_name = get_model_name_from_path(model_path)
|
| 67 |
self.tokenizer, self.model, processor, context_len = load_pretrained_model(
|
| 68 |
model_path, model_base, model_name,
|
| 69 |
load_8bit, load_4bit,
|
| 70 |
-
device=device,
|
| 71 |
offload_folder="save_folder")
|
| 72 |
self.processor = processor
|
| 73 |
self.conv_mode = conv_mode
|
| 74 |
self.conv = conv_templates[conv_mode].copy()
|
| 75 |
-
self.device = self.model.device
|
| 76 |
|
| 77 |
def get_prompt(self, qs, state):
|
| 78 |
state.append_message(state.roles[0], qs)
|
| 79 |
state.append_message(state.roles[1], None)
|
| 80 |
return state
|
| 81 |
|
|
|
|
| 82 |
@torch.inference_mode()
|
| 83 |
def generate(self, tensor: list, modals: list, prompt: str, first_run: bool, state):
|
| 84 |
# TODO: support multiple turns of conversation.
|
|
@@ -92,7 +91,7 @@ class Chat:
|
|
| 92 |
prompt = state.get_prompt()
|
| 93 |
# print('\n\n\n')
|
| 94 |
# print(prompt)
|
| 95 |
-
input_ids = tokenizer_MMODAL_token(prompt, tokenizer, MMODAL_TOKEN_INDEX[modals[0]], return_tensors='pt').unsqueeze(0).to(self.device)
|
| 96 |
|
| 97 |
# 3. generate response according to visual signals and prompts.
|
| 98 |
stop_str = self.conv.sep if self.conv.sep_style in [SeparatorStyle.SINGLE] else self.conv.sep2
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
class Chat:
|
| 64 |
+
def __init__(self, model_path, conv_mode, model_base=None, load_8bit=False, load_4bit=False):
|
| 65 |
# disable_torch_init()
|
| 66 |
model_name = get_model_name_from_path(model_path)
|
| 67 |
self.tokenizer, self.model, processor, context_len = load_pretrained_model(
|
| 68 |
model_path, model_base, model_name,
|
| 69 |
load_8bit, load_4bit,
|
|
|
|
| 70 |
offload_folder="save_folder")
|
| 71 |
self.processor = processor
|
| 72 |
self.conv_mode = conv_mode
|
| 73 |
self.conv = conv_templates[conv_mode].copy()
|
|
|
|
| 74 |
|
| 75 |
def get_prompt(self, qs, state):
|
| 76 |
state.append_message(state.roles[0], qs)
|
| 77 |
state.append_message(state.roles[1], None)
|
| 78 |
return state
|
| 79 |
|
| 80 |
+
@spaces.GPU(duration=120)
|
| 81 |
@torch.inference_mode()
|
| 82 |
def generate(self, tensor: list, modals: list, prompt: str, first_run: bool, state):
|
| 83 |
# TODO: support multiple turns of conversation.
|
|
|
|
| 91 |
prompt = state.get_prompt()
|
| 92 |
# print('\n\n\n')
|
| 93 |
# print(prompt)
|
| 94 |
+
input_ids = tokenizer_MMODAL_token(prompt, tokenizer, MMODAL_TOKEN_INDEX[modals[0]], return_tensors='pt').unsqueeze(0).to(self.model.device)
|
| 95 |
|
| 96 |
# 3. generate response according to visual signals and prompts.
|
| 97 |
stop_str = self.conv.sep if self.conv.sep_style in [SeparatorStyle.SINGLE] else self.conv.sep2
|