Spaces:
Runtime error
Runtime error
| import io | |
| import pandas as pd | |
| import streamlit as st | |
| from transformers import AutoTokenizer | |
| from tapas_visualizer import TapasVisualizer | |
| st.set_page_config(page_title="Tapas Tokenizer", page_icon="🍽️", layout="wide") | |
| def set_file_input(): | |
| st.session_state.input_stream = "file" | |
| def set_text_input(): | |
| st.session_state.input_stream = "text" | |
| def main(): | |
| models = ["google/tapas-base", "deepset/tapas-large-nq-hn-reader"] | |
| def load_tokenizer(): | |
| tokenizer = AutoTokenizer.from_pretrained(selected_model) | |
| return tokenizer | |
| st.markdown( | |
| """ | |
| ## TAPAS Tokenization Visualization | |
| [TAPAS](https://huggingface.co/docs/transformers/model_doc/tapas) models work on Tables. | |
| The tool below is to help visualize how the table is tokenized and give total (+ row-wise) | |
| token counts. | |
| Implementation adapted from `tokenizers.tools.EncodingVisualizer`. | |
| """ | |
| ) | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| selected_model = st.selectbox("Select a tokenizer", models, key=1) | |
| text = st.text_area( | |
| label="", placeholder="Table to tokenize; csv", on_change=set_text_input | |
| ) | |
| uploaded_file = st.file_uploader("(Or) Choose a file", on_change=set_file_input) | |
| button_clicked = st.button("Tokenize") | |
| tokenizer = load_tokenizer() | |
| visualizer = TapasVisualizer(tokenizer) | |
| with col2: | |
| if text or uploaded_file or button_clicked: | |
| df: pd.DataFrame | |
| if ( | |
| "input_stream" not in st.session_state | |
| or st.session_state.input_stream == "text" | |
| ): | |
| df = pd.read_csv(io.StringIO(text), sep=",") | |
| elif st.session_state.input_stream == "file": | |
| df = pd.read_csv(uploaded_file) | |
| if df is not None: | |
| st.components.v1.html(visualizer(df.astype(str)), height=1500) | |
| if __name__ == "__main__": | |
| main() | |