Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -900,7 +900,7 @@ if __name__ == "__main__":
|
|
| 900 |
# --- Add arguments for ILLUME configs and checkpoints ---
|
| 901 |
parser.add_argument("--model_name", type=str, default="ILLUME-MLLM/illume_plus-qwen-2_5-3b-hf",
|
| 902 |
help="Name for builder.")
|
| 903 |
-
parser.add_argument("--torch_dtype", type=str, default='
|
| 904 |
help="Computation data type.")
|
| 905 |
|
| 906 |
parser.add_argument("--diffusion_decoder_path", type=str, default='ILLUME-MLLM/dualvitok_sdxl_decoder',
|
|
@@ -939,6 +939,7 @@ if __name__ == "__main__":
|
|
| 939 |
attn_implementation='flash_attention_2', # OR 'sdpa' for Ascend NPUs
|
| 940 |
# torch_dtype=args.torch_dtype,
|
| 941 |
# attn_implementation='sdpa', # OR 'sdpa' for Ascend NPUs
|
|
|
|
| 942 |
low_cpu_mem_usage=True,
|
| 943 |
trust_remote_code=True).eval().cuda()
|
| 944 |
processor = AutoProcessor.from_pretrained(args.model_name, trust_remote_code=True)
|
|
@@ -946,11 +947,14 @@ if __name__ == "__main__":
|
|
| 946 |
# set the vision tokenizer for decoding image.
|
| 947 |
dualvitok = AutoModel.from_pretrained(args.tokenizer_path,
|
| 948 |
torch_dtype=torch.float32,
|
| 949 |
-
trust_remote_code=True
|
|
|
|
|
|
|
| 950 |
processor.set_vision_tokenizer(dualvitok)
|
| 951 |
|
| 952 |
# (Optional): set the sdxl diffusion decoder. It will enable upsample 2x image resolution.
|
| 953 |
-
processor.load_diffusion_vision_detokenizer(args.diffusion_decoder_path
|
|
|
|
| 954 |
|
| 955 |
# Assign device to model for later use
|
| 956 |
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
|
|
|
|
| 900 |
# --- Add arguments for ILLUME configs and checkpoints ---
|
| 901 |
parser.add_argument("--model_name", type=str, default="ILLUME-MLLM/illume_plus-qwen-2_5-3b-hf",
|
| 902 |
help="Name for builder.")
|
| 903 |
+
parser.add_argument("--torch_dtype", type=str, default='fp32', choices=['fp32', 'bf16', 'fp16'],
|
| 904 |
help="Computation data type.")
|
| 905 |
|
| 906 |
parser.add_argument("--diffusion_decoder_path", type=str, default='ILLUME-MLLM/dualvitok_sdxl_decoder',
|
|
|
|
| 939 |
attn_implementation='flash_attention_2', # OR 'sdpa' for Ascend NPUs
|
| 940 |
# torch_dtype=args.torch_dtype,
|
| 941 |
# attn_implementation='sdpa', # OR 'sdpa' for Ascend NPUs
|
| 942 |
+
token=os.environ["HF_TOKEN"],
|
| 943 |
low_cpu_mem_usage=True,
|
| 944 |
trust_remote_code=True).eval().cuda()
|
| 945 |
processor = AutoProcessor.from_pretrained(args.model_name, trust_remote_code=True)
|
|
|
|
| 947 |
# set the vision tokenizer for decoding image.
|
| 948 |
dualvitok = AutoModel.from_pretrained(args.tokenizer_path,
|
| 949 |
torch_dtype=torch.float32,
|
| 950 |
+
trust_remote_code=True,
|
| 951 |
+
token=os.environ["HF_TOKEN"],
|
| 952 |
+
).eval().cuda()
|
| 953 |
processor.set_vision_tokenizer(dualvitok)
|
| 954 |
|
| 955 |
# (Optional): set the sdxl diffusion decoder. It will enable upsample 2x image resolution.
|
| 956 |
+
processor.load_diffusion_vision_detokenizer(args.diffusion_decoder_path,
|
| 957 |
+
token=os.environ["HF_TOKEN"])
|
| 958 |
|
| 959 |
# Assign device to model for later use
|
| 960 |
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
|