Spaces:

austinmw
/

instructblip-vicuna-13b-4bit-image-qa

Runtime error

App Files Files Community

instructblip-vicuna-13b-4bit-image-qa / blip_tool.py

austinmw

Upload tool

298d752 over 2 years ago

raw

history blame contribute delete

2.84 kB

	import torch
	from transformers import AutoModelForVision2Seq, AutoProcessor
	from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
	from transformers.tools import PipelineTool
	from transformers.tools.base import get_default_device
	from transformers.utils import requires_backends

	class InstructBLIPImageQuestionAnsweringTool(PipelineTool):
	#default_checkpoint = "Salesforce/blip2-opt-2.7b"
	#default_checkpoint = "Salesforce/instructblip-flan-t5-xl"
	#default_checkpoint = "Salesforce/instructblip-vicuna-7b"
	default_checkpoint = "Salesforce/instructblip-vicuna-13b"

	description = (
	"This is a tool that answers a question about an image. It takes an input named `image` which should be the "
	"image containing the information, as well as a `question` which should be the question in English. It "
	"returns a text that is the answer to the question."
	)
	name = "image_qa"
	pre_processor_class = AutoProcessor
	model_class = AutoModelForVision2Seq
	inputs = ["image", "text"]
	outputs = ["text"]

	def __init__(self, args, *kwargs):
	requires_backends(self, ["vision"])
	super().__init__(args, *kwargs)

	def setup(self):
	"""
	Instantiates the `pre_processor`, `model` and `post_processor` if necessary.
	"""
	if isinstance(self.pre_processor, str):
	self.pre_processor = self.pre_processor_class.from_pretrained(self.pre_processor, **self.hub_kwargs)

	if isinstance(self.model, str):
	self.model = self.model_class.from_pretrained(self.model, self.model_kwargs, self.hub_kwargs, load_in_4bit=True, torch_dtype=torch.float16)

	if self.post_processor is None:
	self.post_processor = self.pre_processor
	elif isinstance(self.post_processor, str):
	self.post_processor = self.post_processor_class.from_pretrained(self.post_processor, **self.hub_kwargs)

	if self.device is None:
	if self.device_map is not None:
	self.device = list(self.model.hf_device_map.values())[0]
	else:
	self.device = get_default_device()

	self.is_initialized = True

	def encode(self, image, question: str):
	return self.pre_processor(images=image, text=question, return_tensors="pt").to(device="cuda", dtype=torch.float16)

	def forward(self, inputs):
	outputs = self.model.generate(
	**inputs,
	num_beams=5,
	max_new_tokens=256,
	min_length=1,
	top_p=0.9,
	repetition_penalty=1.5,
	length_penalty=1.0,
	temperature=0.7,
	)
	return outputs

	def decode(self, outputs):
	return self.pre_processor.batch_decode(outputs, skip_special_tokens=True)[0].strip()