huangrh9 commited on
Commit
a07b3ab
·
verified ·
1 Parent(s): 7378375

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -900,7 +900,7 @@ if __name__ == "__main__":
900
  # --- Add arguments for ILLUME configs and checkpoints ---
901
  parser.add_argument("--model_name", type=str, default="ILLUME-MLLM/illume_plus-qwen-2_5-3b-hf",
902
  help="Name for builder.")
903
- parser.add_argument("--torch_dtype", type=str, default='bf16', choices=['fp32', 'bf16', 'fp16'],
904
  help="Computation data type.")
905
 
906
  parser.add_argument("--diffusion_decoder_path", type=str, default='ILLUME-MLLM/dualvitok_sdxl_decoder',
@@ -939,6 +939,7 @@ if __name__ == "__main__":
939
  attn_implementation='flash_attention_2', # OR 'sdpa' for Ascend NPUs
940
  # torch_dtype=args.torch_dtype,
941
  # attn_implementation='sdpa', # OR 'sdpa' for Ascend NPUs
 
942
  low_cpu_mem_usage=True,
943
  trust_remote_code=True).eval().cuda()
944
  processor = AutoProcessor.from_pretrained(args.model_name, trust_remote_code=True)
@@ -946,11 +947,14 @@ if __name__ == "__main__":
946
  # set the vision tokenizer for decoding image.
947
  dualvitok = AutoModel.from_pretrained(args.tokenizer_path,
948
  torch_dtype=torch.float32,
949
- trust_remote_code=True).eval().cuda()
 
 
950
  processor.set_vision_tokenizer(dualvitok)
951
 
952
  # (Optional): set the sdxl diffusion decoder. It will enable upsample 2x image resolution.
953
- processor.load_diffusion_vision_detokenizer(args.diffusion_decoder_path)
 
954
 
955
  # Assign device to model for later use
956
  streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
 
900
  # --- Add arguments for ILLUME configs and checkpoints ---
901
  parser.add_argument("--model_name", type=str, default="ILLUME-MLLM/illume_plus-qwen-2_5-3b-hf",
902
  help="Name for builder.")
903
+ parser.add_argument("--torch_dtype", type=str, default='fp32', choices=['fp32', 'bf16', 'fp16'],
904
  help="Computation data type.")
905
 
906
  parser.add_argument("--diffusion_decoder_path", type=str, default='ILLUME-MLLM/dualvitok_sdxl_decoder',
 
939
  attn_implementation='flash_attention_2', # OR 'sdpa' for Ascend NPUs
940
  # torch_dtype=args.torch_dtype,
941
  # attn_implementation='sdpa', # OR 'sdpa' for Ascend NPUs
942
+ token=os.environ["HF_TOKEN"],
943
  low_cpu_mem_usage=True,
944
  trust_remote_code=True).eval().cuda()
945
  processor = AutoProcessor.from_pretrained(args.model_name, trust_remote_code=True)
 
947
  # set the vision tokenizer for decoding image.
948
  dualvitok = AutoModel.from_pretrained(args.tokenizer_path,
949
  torch_dtype=torch.float32,
950
+ trust_remote_code=True,
951
+ token=os.environ["HF_TOKEN"],
952
+ ).eval().cuda()
953
  processor.set_vision_tokenizer(dualvitok)
954
 
955
  # (Optional): set the sdxl diffusion decoder. It will enable upsample 2x image resolution.
956
+ processor.load_diffusion_vision_detokenizer(args.diffusion_decoder_path,
957
+ token=os.environ["HF_TOKEN"])
958
 
959
  # Assign device to model for later use
960
  streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)