Spaces:

Samarth991
/

CV-Agent

Sleeping

App Files Files Community

CV-Agent / app.py

Samarth991

added ultralytics

b60841d 9 months ago

raw

history blame

6.18 kB

	import os
	import streamlit as st
	from PIL import Image
	from pathlib import Path
	from QA_bot import tyre_synap_bot as bot
	from llm_service import get_llm
	from hub_prompts import PREFIX

	from extract_tools import get_all_tools
	from langchain.agents import AgentExecutor
	from langchain import hub
	from langchain.agents.format_scratchpad import format_log_to_str
	from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
	from langchain.tools.render import render_text_description

	import logging
	import warnings
	warnings.filterwarnings("ignore")

	logging.basicConfig(filename="newfile.log",
	format='%(asctime)s %(message)s',
	filemode='w')
	logger = logging.getLogger()

	llm = None
	tools = None
	cv_agent = None

	@st.cache_resource
	def call_llmservice_model(option,api_key):
	model = get_llm(option=option,key=api_key)
	return model

	@st.cache_resource
	def setup_agent_prompt():
	prompt = hub.pull("hwchase17/react-json")
	if len(tools) == 0 :
	logger.error ("No Tools added")
	else :
	prompt = prompt.partial(
	tools= render_text_description(tools),
	tool_names= ", ".join([t.name for t in tools]),
	additional_kwargs={
	'system_message':PREFIX,
	}
	)
	return prompt

	@st.cache_resource
	def agent_initalize():
	agent_prompt = setup_agent_prompt()
	lm_with_stop = llm.bind(stop=["\nObservation"])
	#### we can use create_react_agent https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/react/agent.py
	agent = (
	{
	"input": lambda x: x["input"],
	"agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
	}
	\| agent_prompt
	\| lm_with_stop
	\| ReActJsonSingleInputOutputParser()
	)

	# instantiate AgentExecutor
	agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True,handle_parsing_errors=True)
	return agent_executor

	# def agent_initalize(tools,max_iterations=5):
	# zero_shot_agent = initialize_agent(
	# agent= AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	# tools = tools,
	# llm = llm,
	# verbose = True,
	# max_iterations = max_iterations,
	# memory = None,
	# handle_parsing_errors=True,
	# agent_kwargs={
	# 'system_message':PREFIX,
	# # 'format_instructions':FORMAT_INSTRUCTIONS,
	# # 'suffix':SUFFIX
	# }
	# )
	# # sys_message = PREFIX
	# # zero_shot_agent.agent.llm_chain.prompt.template = sys_message
	# return zero_shot_agent


	def main():
	database_store = 'image_store'
	st.session_state.disabled = False
	st.session_state.visibility = "visible"

	st.title("Computer Vision Agent :sunglasses:")
	st.markdown("Use the CV agent to do Object Detection , Panoptic Segementation,Image Segmentation , Image Descrption task using the latest foundation models available opensource.")
	st.markdown('The CV Agent implements an Agent that decide what and when to use to provide the information related to the image asked my the user.')
	st.markdown(
	"""
	<style>
	section[data-testid="stSidebar"] {
	width: 350px !important; # Set the width to your desired value
	}
	</style>
	""",
	unsafe_allow_html=True,
	)

	with st.sidebar:
	st.header("About Project")
	st.markdown(
	"""
	- CV Agent can perform check on images to detemine the image quality and can also find out the segementaion mask and panoptic mask .
	- This application uses multiple tools like Image caption tool, DuckDuckGo search tool, Maskformer tool , Panoptic segementation tool to perform these tasks.
	- The decision on how to use the certain tool and when to use it soely relies on the Reasoning power of the LLM.
	""")
	st.sidebar.subheader("Upload Image !")
	option = st.sidebar.selectbox(
	"Select your Large Language Model(LLM) ",("deepseek-r1-distill-llama-70b",
	"gemma2-9b-it",
	"llama-3.2-3b-preview",
	"llama-3.2-1b-preview",
	"llama3-8b-8192",
	"Openai",
	"Google",
	"Ollama"),
	index=None,
	placeholder="Select LLM Service...",
	)
	api_key = st.sidebar.text_input("API_KEY", type="password", key="password")

	uploaded_file = st.sidebar.file_uploader("Upload Image for Processing", type=['png','jpg','jpeg'])

	if uploaded_file is not None :
	file_path = Path(database_store, uploaded_file.name)
	if not os.path.isdir(database_store):
	os.makedirs(database_store)

	global llm
	llm = call_llmservice_model(option=option,api_key=api_key)
	logger.info("\tLLM Service {} Active ... !".format(llm.get_name()))
	## extract tools
	global tools
	tools = get_all_tools()
	logger.info("\tFound {} tools ".format(len(tools)))
	## generate Agent
	global agent
	cv_agent = agent_initalize()
	logger.info('\tAgent inintalized with {} tools '.format(len(tools)))

	with open(file_path, mode='wb') as w:
	w.write(uploaded_file.getvalue())

	if os.path.isfile(file_path):
	st.sidebar.success("File uploaded successfully",icon="✅")

	with st.sidebar.container():
	image = Image.open(file_path)
	st.image(image,use_container_width=True)
	st.sidebar.subheader("""
	Examples Questions:
	- Describe about the image
	- Tell me what are the things you can detect in the image .
	- How is the image quality
	""")

	bot(cv_agent,file_path)

	if __name__ == '__main__':
	main()