Spaces:

phyloforfun
/

VoucherVision

Running

App Files Files Community

phyloforfun commited on Jan 25, 2024

Commit

a1e2ec1

1 Parent(s): 01e6026

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files

Files changed (7) hide show

app.py +43 -16
demo/{demo_images/MICH_29667680_Hypericaceae_Hypericum_prolificum.jpg → demo_gallery/MICH_7574789_Cyperaceae_Carex_scoparia.jpg} +2 -2
demo/demo_images/MICH_7574789_Cyperaceae_Carex_scoparia.jpg +3 -0
vouchervision/API_validation.py +14 -13
vouchervision/OCR_google_cloud_vision.py +29 -27
vouchervision/VoucherVision_Config_Builder.py +1 -1
vouchervision/utils_VoucherVision.py +1 -1

app.py CHANGED Viewed

@@ -1395,24 +1395,41 @@ def load_api_status():
     except FileNotFoundError:
         return None, None, None
-def display_api_key_status():
     if not st.session_state['API_checked']:
         present_keys, missing_keys, date_of_check = load_api_status()
         if present_keys is None and missing_keys is None:
             st.session_state['API_checked'] = False
         else:
             # Convert keys to annotations (similar to what you do in check_api_key_status)
-            present_annotations = [(key, " ", "#059c1b") for key in present_keys]  # Adjust as needed
-            missing_annotations = [(key, " ", "#525252") for key in missing_keys]  # Adjust as needed
             st.session_state['present_annotations'] = present_annotations
             st.session_state['missing_annotations'] = missing_annotations
             st.session_state['date_of_check'] = date_of_check
             st.session_state['API_checked'] = True
     # Check if the API status has already been retrieved
     if 'API_checked' not in st.session_state or not st.session_state['API_checked'] or st.session_state['API_rechecked']:
-        st.session_state['present_annotations'], st.session_state['missing_annotations'], st.session_state['date_of_check'] = check_api_key_status()
         st.session_state['API_checked'] = True
         st.session_state['API_rechecked'] = False
@@ -1424,6 +1441,7 @@ def display_api_key_status():
     # Display missing keys horizontally
     if 'missing_annotations' in st.session_state and st.session_state['missing_annotations']:
         annotated_text(*st.session_state['missing_annotations'])
 def check_api_key_status():
@@ -1454,9 +1472,11 @@ def check_api_key_status():
     # Save API key status
     save_api_status(present_keys, missing_keys, date_of_check)
-    return present_annotations, missing_annotations, date_of_check
 def convert_cost_dict_to_table(cost, name):
     # Convert the dictionary to a pandas DataFrame for nicer display
@@ -1507,7 +1527,10 @@ def get_all_cost_tables():
 def content_header():
-    col_logo, col_run_1, col_run_2, col_run_3, col_run_4, col_run_5 = st.columns([2,2,2,2,2,2])
     col_test = st.container()
@@ -1591,10 +1614,7 @@ def content_header():
         if st.session_state['formatted_json']:
             json_report.set_JSON(st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'])
-    with col_run_5:
-        with st.expander("View Messages and Updates"):
-            st.info("***Note:*** If you use VoucherVision frequently, you can change the default values that are auto-populated in the form below. In a text editor or IDE, edit the first few rows in the file `../VoucherVision/vouchervision/VoucherVision_Config_Builder.py`")
     with col_run_1:
@@ -1810,13 +1830,14 @@ def content_api_check():
         st.header('Available APIs')
         # Display API key status
-        display_api_key_status()
         # Place the button in the second column, right-justified
         # with col_llm_2b:
         if st.button("Re-Check API Keys"):
             st.session_state['API_checked'] = False
             st.session_state['API_rechecked'] = True
         # with col_llm_2c:
         if st.button("Edit API Keys"):
             st.session_state.proceed_to_private = True
@@ -1861,7 +1882,7 @@ def content_collage_overlay():
             st.session_state["demo_collage"] = Image.open(ba)
         # Display the image
-        st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG")
         # st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="JPEG")
@@ -1934,7 +1955,7 @@ def content_collage_overlay():
             ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr2_low.png')
             st.session_state["demo_overlay"] = Image.open(ocr)
-        st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "PNG")
         # st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
@@ -2242,8 +2263,14 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherV
 # Parse the 'is_hf' argument and set it in session state
 if 'is_hf' not in st.session_state:
-    st.session_state['is_hf'] = True
 print(f"is_hf {st.session_state['is_hf']}")
 # Default YAML file path
 if 'config' not in st.session_state:
@@ -2266,7 +2293,7 @@ if st.session_state['is_hf']:
 else:
     if 'proceed_to_main' not in st.session_state:
-        st.session_state.proceed_to_main = False  # New state variable to control the flow
     print(f"proceed_to_main {st.session_state['proceed_to_main']}")
     if 'private_file' not in st.session_state:
         st.session_state.private_file = does_private_file_exist()

     except FileNotFoundError:
         return None, None, None
+def display_api_key_status(ccol):
     if not st.session_state['API_checked']:
         present_keys, missing_keys, date_of_check = load_api_status()
         if present_keys is None and missing_keys is None:
             st.session_state['API_checked'] = False
         else:
             # Convert keys to annotations (similar to what you do in check_api_key_status)
+            present_annotations = []
+            missing_annotations = []
+            for key in present_keys:
+                if "Valid" in key:
+                    show_text = key.split('(')[0]
+                    present_annotations.append((show_text, "ready!", "#059c1b"))  # Green for valid
+                elif "Invalid" in key:
+                    show_text = key.split('(')[0]
+                    present_annotations.append((show_text, "error", "#870307"))  # Red for invalid
             st.session_state['present_annotations'] = present_annotations
             st.session_state['missing_annotations'] = missing_annotations
             st.session_state['date_of_check'] = date_of_check
             st.session_state['API_checked'] = True
+            # print('for')
+            # print(st.session_state['present_annotations'])
+            # print(st.session_state['missing_annotations'])
+    else:
+        # print('else')
+        # print(st.session_state['present_annotations'])
+        # print(st.session_state['missing_annotations'])
+        pass
     # Check if the API status has already been retrieved
     if 'API_checked' not in st.session_state or not st.session_state['API_checked'] or st.session_state['API_rechecked']:
+        with ccol:
+            with st.spinner('Verifying APIs by sending short requests...'):
+                check_api_key_status()
         st.session_state['API_checked'] = True
         st.session_state['API_rechecked'] = False
     # Display missing keys horizontally
     if 'missing_annotations' in st.session_state and st.session_state['missing_annotations']:
         annotated_text(*st.session_state['missing_annotations'])
 def check_api_key_status():
     # Save API key status
     save_api_status(present_keys, missing_keys, date_of_check)
+    st.session_state['present_annotations'] = present_annotations
+    st.session_state['missing_annotations'] = missing_annotations
+    st.session_state['date_of_check'] = date_of_check
 def convert_cost_dict_to_table(cost, name):
     # Convert the dictionary to a pandas DataFrame for nicer display
 def content_header():
+    col_logo, col_run_1, col_run_2, col_run_3, col_run_4 = st.columns([2,2,2,2,4])
+    with col_run_4:
+        with st.expander("View Messages and Updates"):
+            st.info("***Note:*** If you use VoucherVision frequently, you can change the default values that are auto-populated in the form below. In a text editor or IDE, edit the first few rows in the file `../VoucherVision/vouchervision/VoucherVision_Config_Builder.py`")
     col_test = st.container()
         if st.session_state['formatted_json']:
             json_report.set_JSON(st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'])
     with col_run_1:
         st.header('Available APIs')
         # Display API key status
+        display_api_key_status(col_llm_2a)
         # Place the button in the second column, right-justified
         # with col_llm_2b:
         if st.button("Re-Check API Keys"):
             st.session_state['API_checked'] = False
             st.session_state['API_rechecked'] = True
+            st.rerun()
         # with col_llm_2c:
         if st.button("Edit API Keys"):
             st.session_state.proceed_to_private = True
             st.session_state["demo_collage"] = Image.open(ba)
         # Display the image
+        st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG", width=500)
         # st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="JPEG")
             ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr2_low.png')
             st.session_state["demo_overlay"] = Image.open(ocr)
+        st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "PNG", width=500)
         # st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
 # Parse the 'is_hf' argument and set it in session state
 if 'is_hf' not in st.session_state:
+    st.session_state['is_hf'] = False
+#################################################################################################################################################
+# Initializations ###############################################################################################################################
+#################################################################################################################################################
 print(f"is_hf {st.session_state['is_hf']}")
 # Default YAML file path
 if 'config' not in st.session_state:
 else:
     if 'proceed_to_main' not in st.session_state:
+        st.session_state.proceed_to_main = True  # New state variable to control the flow
     print(f"proceed_to_main {st.session_state['proceed_to_main']}")
     if 'private_file' not in st.session_state:
         st.session_state.private_file = does_private_file_exist()

demo/{demo_images/MICH_29667680_Hypericaceae_Hypericum_prolificum.jpg → demo_gallery/MICH_7574789_Cyperaceae_Carex_scoparia.jpg} RENAMED Viewed

File without changes

demo/demo_images/MICH_7574789_Cyperaceae_Carex_scoparia.jpg ADDED Viewed

Git LFS Details

SHA256: 468a97467cd9f15963f122c57dc6816e20aff5bec01c58e5d7e5c53d2abdb7eb
Pointer size: 131 Bytes
Size of remote file: 788 kB

vouchervision/API_validation.py CHANGED Viewed

@@ -77,6 +77,7 @@ class APIvalidation:
                 response = client.document_text_detection(image=image)
                 texts = response.text_annotations
                 normal_cleaned_text = texts[0].description if texts else None
             else:
                 logo_path = os.path.join(self.dir_home, 'img','logo.png')
                 client = vision.ImageAnnotatorClient()
@@ -151,22 +152,22 @@ class APIvalidation:
                 client = MistralClient(api_key=os.getenv('MISTRAL_API_KEY'))
-                # Initialize the Mistral Client with the API key
-                # Create a simple message
-                messages = [ChatMessage(role="user", content="hello")]
-                # Send the message and get the response
-                chat_response = client.chat(
-                    model="mistral-tiny",
-                    messages=messages,
-                )
-                # Check if the response is valid (adjust this according to the actual response structure)
-                if chat_response and chat_response.choices:
-                    return True
-                else:
-                    return False
         except Exception as e:  # Replace with a more specific exception if possible
             return False

                 response = client.document_text_detection(image=image)
                 texts = response.text_annotations
                 normal_cleaned_text = texts[0].description if texts else None
+                print(f"OCR TEST: {normal_cleaned_text}")
             else:
                 logo_path = os.path.join(self.dir_home, 'img','logo.png')
                 client = vision.ImageAnnotatorClient()
                 client = MistralClient(api_key=os.getenv('MISTRAL_API_KEY'))
+            # Initialize the Mistral Client with the API key
+            # Create a simple message
+            messages = [ChatMessage(role="user", content="hello")]
+            # Send the message and get the response
+            chat_response = client.chat(
+                model="mistral-tiny",
+                messages=messages,
+            )
+            # Check if the response is valid (adjust this according to the actual response structure)
+            if chat_response and chat_response.choices:
+                return True
+            else:
+                return False
         except Exception as e:  # Replace with a more specific exception if possible
             return False

vouchervision/OCR_google_cloud_vision.py CHANGED Viewed

@@ -29,7 +29,9 @@ class OCRGoogle:
     BBOX_COLOR = "black"
-    def __init__(self, path, cfg, trOCR_model_version, trOCR_model, trOCR_processor, device):
         self.path = path
         self.cfg = cfg
         self.do_use_trOCR = self.cfg['leafmachine']['project']['do_use_trOCR']
@@ -67,6 +69,29 @@ class OCRGoogle:
         self.trOCR_height = None
         self.trOCR_confidences = None
         self.trOCR_characters = None
     def detect_text_with_trOCR_using_google_bboxes(self, do_use_trOCR, logger):
         CONFIDENCES = 0.80
@@ -255,22 +280,11 @@ class OCRGoogle:
     def detect_text(self):
-        service_account_json_str = os.getenv('google_service_account_json')
-        if not service_account_json_str:
-            print("Service account JSON not found in environment variables.")
-            return False
-        # Convert JSON string to a dictionary
-        service_account_info = json.loads(service_account_json_str)
-        # Create credentials from the service account info
-        credentials = service_account.Credentials.from_service_account_info(service_account_info)
-        # Initialize the client with the credentials
-        client = vision.ImageAnnotatorClient(credentials=credentials)
-        # client = vision.ImageAnnotatorClient() #####################################################################################################################
         with io.open(self.path, 'rb') as image_file:
             content = image_file.read()
         image = vision.Image(content=content)
-        response = client.document_text_detection(image=image)
         texts = response.text_annotations
         if response.error.message:
@@ -381,25 +395,13 @@ class OCRGoogle:
     def detect_handwritten_ocr(self):
-        service_account_json_str = os.getenv('google_service_account_json')
-        if not service_account_json_str:
-            print("Service account JSON not found in environment variables.")
-            return False
-        # Convert JSON string to a dictionary
-        service_account_info = json.loads(service_account_json_str)
-        # Create credentials from the service account info
-        credentials = service_account.Credentials.from_service_account_info(service_account_info)
-        # Initialize the client with the credentials
-        client = vision.ImageAnnotatorClient(credentials=credentials)
-        # client = vision.ImageAnnotatorClient() #####################################################################################################################
         with open(self.path, "rb") as image_file:
             content = image_file.read()
         image = vision_beta.Image(content=content)
         image_context = vision_beta.ImageContext(language_hints=["en-t-i0-handwrit"])
-        response = client.document_text_detection(image=image, image_context=image_context)
         texts = response.text_annotations
         if response.error.message:

     BBOX_COLOR = "black"
+    def __init__(self, is_hf, path, cfg, trOCR_model_version, trOCR_model, trOCR_processor, device):
+        self.is_hf = is_hf
         self.path = path
         self.cfg = cfg
         self.do_use_trOCR = self.cfg['leafmachine']['project']['do_use_trOCR']
         self.trOCR_height = None
         self.trOCR_confidences = None
         self.trOCR_characters = None
+        self.set_client()
+    def set_client(self):
+        if self.is_hf:
+            service_account_json_str = os.getenv('google_service_account_json')
+            if not service_account_json_str:
+                print("Service account JSON not found in environment variables.")
+                return False
+            # Convert JSON string to a dictionary
+            service_account_info = json.loads(service_account_json_str)
+            # Create credentials from the service account info
+            credentials = service_account.Credentials.from_service_account_info(service_account_info)
+            # Initialize the client with the credentials
+            self.client_beta = vision_beta.ImageAnnotatorClient(credentials=credentials)
+            self.client = vision.ImageAnnotatorClient(credentials=credentials)
+        else:
+            self.client_beta = vision_beta.ImageAnnotatorClient()
+            self.client = vision.ImageAnnotatorClient()
     def detect_text_with_trOCR_using_google_bboxes(self, do_use_trOCR, logger):
         CONFIDENCES = 0.80
     def detect_text(self):
         with io.open(self.path, 'rb') as image_file:
             content = image_file.read()
         image = vision.Image(content=content)
+        response = self.client.document_text_detection(image=image)
         texts = response.text_annotations
         if response.error.message:
     def detect_handwritten_ocr(self):
         with open(self.path, "rb") as image_file:
             content = image_file.read()
         image = vision_beta.Image(content=content)
         image_context = vision_beta.ImageContext(language_hints=["en-t-i0-handwrit"])
+        response = self.client_beta.document_text_detection(image=image, image_context=image_context)
         texts = response.text_annotations
         if response.error.message:

vouchervision/VoucherVision_Config_Builder.py CHANGED Viewed

@@ -39,7 +39,7 @@ def build_VV_config(loaded_cfg=None):
         OCR_option = 'hand'
         check_for_illegal_filenames = False
-        LLM_version_user = 'Azure GPT 4 Turbo 1106-preview' #'Azure GPT 4 Turbo 1106-preview'
         prompt_version = 'version_5.yaml' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
         use_LeafMachine2_collage_images = True # Use LeafMachine2 collage images
         do_create_OCR_helper_image = True

         OCR_option = 'hand'
         check_for_illegal_filenames = False
+        LLM_version_user = 'Azure GPT 3.5 Instruct' #'Azure GPT 4 Turbo 1106-preview'
         prompt_version = 'version_5.yaml' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
         use_LeafMachine2_collage_images = True # Use LeafMachine2 collage images
         do_create_OCR_helper_image = True

vouchervision/utils_VoucherVision.py CHANGED Viewed

@@ -632,7 +632,7 @@ class VoucherVision():
         # self.OCR - None
         ### Process_image() runs the OCR for text, handwriting, trOCR AND creates the overlay image
-        ocr_google = OCRGoogle(self.path_to_crop, self.cfg, self.trOCR_model_version, self.trOCR_model, self.trOCR_processor, self.device)
         ocr_google.process_image(self.do_create_OCR_helper_image, self.logger)
         self.OCR = ocr_google.OCR

         # self.OCR - None
         ### Process_image() runs the OCR for text, handwriting, trOCR AND creates the overlay image
+        ocr_google = OCRGoogle(self.is_hf, self.path_to_crop, self.cfg, self.trOCR_model_version, self.trOCR_model, self.trOCR_processor, self.device)
         ocr_google.process_image(self.do_create_OCR_helper_image, self.logger)
         self.OCR = ocr_google.OCR