Spaces:

phyloforfun
/

VoucherVision

Running

App Files Files Community

phyloforfun commited on Feb 14, 2024

Commit

c824976

1 Parent(s): 1d9ab62

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files

Files changed (3) hide show

app.py +24 -5
run_VoucherVision.py +35 -16
vouchervision/general_utils.py +1 -0

app.py CHANGED Viewed

@@ -308,7 +308,6 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
 @st.cache_data
 def handle_image_upload_and_gallery():
-    st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
     if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
         if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
@@ -381,6 +380,7 @@ def content_input_images(col_left, col_right):
             handle_image_upload_and_gallery_hf(uploaded_files)
         else:
             handle_image_upload_and_gallery()
 def list_jpg_files(directory_path):
@@ -468,12 +468,19 @@ def use_test_image():
     clear_image_uploads()
     st.session_state['uploader_idk'] += 1
     for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
-        file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
         st.session_state['input_list'].append(file_path)
         img = Image.open(file_path)
         img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
-        file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file, img)
         st.session_state['input_list_small'].append(file_path_small)
@@ -1667,7 +1674,20 @@ def content_prompt_and_llm_version():
     with col_llm_1:
         GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
         st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
 def content_api_check():
@@ -2186,7 +2206,6 @@ def content_less_used():
 #################################################################################################################################################
 # Sidebar #######################################################################################################################################
 #################################################################################################################################################
-@st.cache_data
 def sidebar_content():
     if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
         validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))

 @st.cache_data
 def handle_image_upload_and_gallery():
     if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
         if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
             handle_image_upload_and_gallery_hf(uploaded_files)
         else:
+            st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
             handle_image_upload_and_gallery()
 def list_jpg_files(directory_path):
     clear_image_uploads()
     st.session_state['uploader_idk'] += 1
     for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
+        try:
+            file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
+        except:
+            file_path = save_uploaded_file_local(os.path.join(st.session_state.dir_home,'demo','demo_images'),os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
         st.session_state['input_list'].append(file_path)
         img = Image.open(file_path)
         img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
+        try:
+            file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file, img)
+        except:
+            file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file, img)
         st.session_state['input_list_small'].append(file_path_small)
     with col_llm_1:
         GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
         st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
+        st.markdown("""
+Based on preliminary results, the following models perform the best. We are currently running tests of all possible OCR + LLM + Prompt combinations to create recipes for different workflows.
+- `Mistral Medium`
+- `Mistral Small`
+- `Mistral Tiny`
+- `PaLM 2 text-bison@001`
+- `GPT 4 Turbo 1106-preview`
+- `GPT 3.5 Instruct`
+- `LOCAL Mixtral 7Bx8 Instruct`
+- `LOCAL Mixtral 7B Instruct`
+Larger models (e.g., `GPT 4`, `GPT 4 32k`, `Gemini Pro`) do not necessarily perform better for these tasks. MistralAI models exceeded our expectations and perform extremely well. PaLM 2 text-bison@001 also seems to consistently out-perform Gemini Pro.
+The `SLTPvA_short.yaml` prompt also seems to work better with smaller LLMs (e.g., Mistral Tiny). Alternatively, enable double OCR to help the LLM focus on the OCR text given a longer prompt.""")
 def content_api_check():
 #################################################################################################################################################
 # Sidebar #######################################################################################################################################
 #################################################################################################################################################
 def sidebar_content():
     if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
         validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))

run_VoucherVision.py CHANGED Viewed

@@ -1,10 +1,26 @@
 import streamlit.web.cli as stcli
-import os, sys
 # pip install protobuf==3.20.0
 # pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
 # pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
 def resolve_path(path):
@@ -15,18 +31,21 @@ def resolve_path(path):
 if __name__ == "__main__":
     dir_home = os.path.dirname(__file__)
-    # pip install protobuf==3.20.0
-    sys.argv = [
-        "streamlit",
-        "run",
-        resolve_path(os.path.join(dir_home,"app.py")),
-        # resolve_path(os.path.join(dir_home,"vouchervision", "VoucherVision_GUI.py")),
-        "--global.developmentMode=false",
-        # "--server.port=8545",
-        "--server.port=8546",
-        # Toggle below for HF vs Local
-        # "--is_hf=1",
-        # "--is_hf=0",
-    ]
-    sys.exit(stcli.main())

 import streamlit.web.cli as stcli
+import os, sys, socket
 # pip install protobuf==3.20.0
 # pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
 # pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
+# pip install protobuf==3.20.0
+def find_available_port(start_port, max_attempts=1000):
+    port = start_port
+    attempts = 0
+    while attempts < max_attempts:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            try:
+                s.bind(("127.0.0.1", port))
+                # If successful, return the current port
+                return port
+            except socket.error:
+                # If the port is in use, increment the port number and try again
+                port += 1
+                attempts += 1
+    # Optional: Return None or raise an exception if no port is found within the attempts limit
+    raise ValueError(f"Could not find an available port within {max_attempts} attempts starting from port {start_port}.")
 def resolve_path(path):
 if __name__ == "__main__":
     dir_home = os.path.dirname(__file__)
+    start_port = 8529
+    try:
+        free_port = find_available_port(start_port)
+        sys.argv = [
+            "streamlit",
+            "run",
+            resolve_path(os.path.join(os.path.dirname(__file__),"app.py")),
+            # resolve_path(os.path.join(dir_home,"vouchervision", "VoucherVision_GUI.py")),
+            "--global.developmentMode=false",
+            # "--server.port=8545",
+            "--server.port=8546",
+            # Toggle below for HF vs Local
+            # "--is_hf=1",
+            # "--is_hf=0",
+        ]
+        sys.exit(stcli.main())
+    except ValueError as e:
+        print(e)

vouchervision/general_utils.py CHANGED Viewed

@@ -106,6 +106,7 @@ def save_token_info_as_csv(Dirs, LLM_version0, path_api_cost, total_tokens_in, t
     else:
         return None           #TODO add config tests to expense_report
 def summarize_expense_report(path_expense_report):
     # Initialize counters and sums
     run_count = 0

     else:
         return None           #TODO add config tests to expense_report
+@st.cache_data
 def summarize_expense_report(path_expense_report):
     # Initialize counters and sums
     run_count = 0