Spaces:
Running
Running
Commit
·
c824976
1
Parent(s):
1d9ab62
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
Browse files- app.py +24 -5
- run_VoucherVision.py +35 -16
- vouchervision/general_utils.py +1 -0
app.py
CHANGED
|
@@ -308,7 +308,6 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
|
|
| 308 |
|
| 309 |
@st.cache_data
|
| 310 |
def handle_image_upload_and_gallery():
|
| 311 |
-
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
| 312 |
|
| 313 |
if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 314 |
if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
|
|
@@ -381,6 +380,7 @@ def content_input_images(col_left, col_right):
|
|
| 381 |
handle_image_upload_and_gallery_hf(uploaded_files)
|
| 382 |
|
| 383 |
else:
|
|
|
|
| 384 |
handle_image_upload_and_gallery()
|
| 385 |
|
| 386 |
def list_jpg_files(directory_path):
|
|
@@ -468,12 +468,19 @@ def use_test_image():
|
|
| 468 |
clear_image_uploads()
|
| 469 |
st.session_state['uploader_idk'] += 1
|
| 470 |
for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
st.session_state['input_list'].append(file_path)
|
| 473 |
|
| 474 |
img = Image.open(file_path)
|
| 475 |
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 476 |
-
|
|
|
|
|
|
|
|
|
|
| 477 |
st.session_state['input_list_small'].append(file_path_small)
|
| 478 |
|
| 479 |
|
|
@@ -1667,7 +1674,20 @@ def content_prompt_and_llm_version():
|
|
| 1667 |
with col_llm_1:
|
| 1668 |
GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
|
| 1669 |
st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
|
| 1670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1671 |
|
| 1672 |
|
| 1673 |
def content_api_check():
|
|
@@ -2186,7 +2206,6 @@ def content_less_used():
|
|
| 2186 |
#################################################################################################################################################
|
| 2187 |
# Sidebar #######################################################################################################################################
|
| 2188 |
#################################################################################################################################################
|
| 2189 |
-
@st.cache_data
|
| 2190 |
def sidebar_content():
|
| 2191 |
if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
|
| 2192 |
validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
|
|
|
|
| 308 |
|
| 309 |
@st.cache_data
|
| 310 |
def handle_image_upload_and_gallery():
|
|
|
|
| 311 |
|
| 312 |
if st.session_state['view_local_gallery'] and st.session_state['input_list_small'] and (st.session_state['dir_images_local_TEMP'] == st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 313 |
if MAX_GALLERY_IMAGES <= st.session_state['processing_add_on']:
|
|
|
|
| 380 |
handle_image_upload_and_gallery_hf(uploaded_files)
|
| 381 |
|
| 382 |
else:
|
| 383 |
+
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
| 384 |
handle_image_upload_and_gallery()
|
| 385 |
|
| 386 |
def list_jpg_files(directory_path):
|
|
|
|
| 468 |
clear_image_uploads()
|
| 469 |
st.session_state['uploader_idk'] += 1
|
| 470 |
for file in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']):
|
| 471 |
+
try:
|
| 472 |
+
file_path = save_uploaded_file(os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
|
| 473 |
+
except:
|
| 474 |
+
file_path = save_uploaded_file_local(os.path.join(st.session_state.dir_home,'demo','demo_images'),os.path.join(st.session_state.dir_home,'demo','demo_images'), file)
|
| 475 |
+
|
| 476 |
st.session_state['input_list'].append(file_path)
|
| 477 |
|
| 478 |
img = Image.open(file_path)
|
| 479 |
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
| 480 |
+
try:
|
| 481 |
+
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file, img)
|
| 482 |
+
except:
|
| 483 |
+
file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file, img)
|
| 484 |
st.session_state['input_list_small'].append(file_path_small)
|
| 485 |
|
| 486 |
|
|
|
|
| 1674 |
with col_llm_1:
|
| 1675 |
GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
|
| 1676 |
st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
|
| 1677 |
+
st.markdown("""
|
| 1678 |
+
Based on preliminary results, the following models perform the best. We are currently running tests of all possible OCR + LLM + Prompt combinations to create recipes for different workflows.
|
| 1679 |
+
- `Mistral Medium`
|
| 1680 |
+
- `Mistral Small`
|
| 1681 |
+
- `Mistral Tiny`
|
| 1682 |
+
- `PaLM 2 text-bison@001`
|
| 1683 |
+
- `GPT 4 Turbo 1106-preview`
|
| 1684 |
+
- `GPT 3.5 Instruct`
|
| 1685 |
+
- `LOCAL Mixtral 7Bx8 Instruct`
|
| 1686 |
+
- `LOCAL Mixtral 7B Instruct`
|
| 1687 |
+
|
| 1688 |
+
Larger models (e.g., `GPT 4`, `GPT 4 32k`, `Gemini Pro`) do not necessarily perform better for these tasks. MistralAI models exceeded our expectations and perform extremely well. PaLM 2 text-bison@001 also seems to consistently out-perform Gemini Pro.
|
| 1689 |
+
|
| 1690 |
+
The `SLTPvA_short.yaml` prompt also seems to work better with smaller LLMs (e.g., Mistral Tiny). Alternatively, enable double OCR to help the LLM focus on the OCR text given a longer prompt.""")
|
| 1691 |
|
| 1692 |
|
| 1693 |
def content_api_check():
|
|
|
|
| 2206 |
#################################################################################################################################################
|
| 2207 |
# Sidebar #######################################################################################################################################
|
| 2208 |
#################################################################################################################################################
|
|
|
|
| 2209 |
def sidebar_content():
|
| 2210 |
if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
|
| 2211 |
validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
|
run_VoucherVision.py
CHANGED
|
@@ -1,10 +1,26 @@
|
|
| 1 |
import streamlit.web.cli as stcli
|
| 2 |
-
import os, sys
|
| 3 |
|
| 4 |
# pip install protobuf==3.20.0
|
| 5 |
# pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
|
| 6 |
# pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
|
|
|
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def resolve_path(path):
|
|
@@ -15,18 +31,21 @@ def resolve_path(path):
|
|
| 15 |
if __name__ == "__main__":
|
| 16 |
dir_home = os.path.dirname(__file__)
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit.web.cli as stcli
|
| 2 |
+
import os, sys, socket
|
| 3 |
|
| 4 |
# pip install protobuf==3.20.0
|
| 5 |
# pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
|
| 6 |
# pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
|
| 7 |
+
# pip install protobuf==3.20.0
|
| 8 |
|
| 9 |
+
def find_available_port(start_port, max_attempts=1000):
|
| 10 |
+
port = start_port
|
| 11 |
+
attempts = 0
|
| 12 |
+
while attempts < max_attempts:
|
| 13 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
| 14 |
+
try:
|
| 15 |
+
s.bind(("127.0.0.1", port))
|
| 16 |
+
# If successful, return the current port
|
| 17 |
+
return port
|
| 18 |
+
except socket.error:
|
| 19 |
+
# If the port is in use, increment the port number and try again
|
| 20 |
+
port += 1
|
| 21 |
+
attempts += 1
|
| 22 |
+
# Optional: Return None or raise an exception if no port is found within the attempts limit
|
| 23 |
+
raise ValueError(f"Could not find an available port within {max_attempts} attempts starting from port {start_port}.")
|
| 24 |
|
| 25 |
|
| 26 |
def resolve_path(path):
|
|
|
|
| 31 |
if __name__ == "__main__":
|
| 32 |
dir_home = os.path.dirname(__file__)
|
| 33 |
|
| 34 |
+
start_port = 8529
|
| 35 |
+
try:
|
| 36 |
+
free_port = find_available_port(start_port)
|
| 37 |
+
sys.argv = [
|
| 38 |
+
"streamlit",
|
| 39 |
+
"run",
|
| 40 |
+
resolve_path(os.path.join(os.path.dirname(__file__),"app.py")),
|
| 41 |
+
# resolve_path(os.path.join(dir_home,"vouchervision", "VoucherVision_GUI.py")),
|
| 42 |
+
"--global.developmentMode=false",
|
| 43 |
+
# "--server.port=8545",
|
| 44 |
+
"--server.port=8546",
|
| 45 |
+
# Toggle below for HF vs Local
|
| 46 |
+
# "--is_hf=1",
|
| 47 |
+
# "--is_hf=0",
|
| 48 |
+
]
|
| 49 |
+
sys.exit(stcli.main())
|
| 50 |
+
except ValueError as e:
|
| 51 |
+
print(e)
|
vouchervision/general_utils.py
CHANGED
|
@@ -106,6 +106,7 @@ def save_token_info_as_csv(Dirs, LLM_version0, path_api_cost, total_tokens_in, t
|
|
| 106 |
else:
|
| 107 |
return None #TODO add config tests to expense_report
|
| 108 |
|
|
|
|
| 109 |
def summarize_expense_report(path_expense_report):
|
| 110 |
# Initialize counters and sums
|
| 111 |
run_count = 0
|
|
|
|
| 106 |
else:
|
| 107 |
return None #TODO add config tests to expense_report
|
| 108 |
|
| 109 |
+
@st.cache_data
|
| 110 |
def summarize_expense_report(path_expense_report):
|
| 111 |
# Initialize counters and sums
|
| 112 |
run_count = 0
|