Spaces:

thaboe01
/

Shona-Spell-Checking

Paused

App Files Files Community

thaboe01 commited on May 20, 2024

Commit

a61e741

verified ·

1 Parent(s): b9544c7

Create app.py

Browse files

Files changed (1) hide show

app.py +51 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import streamlit as st
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+# Load your fine-tuned FLAN-T5 model and tokenizer
+@st.cache_resource
+def load_model():
+    tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
+    model = T5ForConditionalGeneration.from_pretrained("thaboe01/t5-spelling-correctorv2")
+    return tokenizer, model
+# Load model (only once)
+tokenizer, model = load_model()
+MAX_PHRASE_LENGTH = 5
+PREFIX = "Please correct the following sentence: "
+# Function to correct text
+def correct_text(text):
+    words = text.split()
+    corrected_phrases = []
+    current_chunk = []
+    for word in words:
+        current_chunk.append(word)
+        # Check if adding the next word would exceed max length (including prefix)
+        if len(current_chunk) + 1 > MAX_PHRASE_LENGTH:
+            input_text = PREFIX + " ".join(current_chunk)
+            input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+            outputs = model.generate(input_ids)
+            corrected_phrase = tokenizer.decode(outputs[0], skip_special_tokens=True)[len(PREFIX):]  # Remove the prefix
+            corrected_phrases.append(corrected_phrase)
+            current_chunk = []  # Reset the chunk
+    # Handle the last chunk
+    if current_chunk:
+        input_text = PREFIX + " ".join(current_chunk)
+        input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+        outputs = model.generate(input_ids)
+        corrected_phrase = tokenizer.decode(outputs[0], skip_special_tokens=True)[len(PREFIX):]
+        corrected_phrases.append(corrected_phrase)
+    return " ".join(corrected_phrases)  # Join the corrected chunks
+# Streamlit App
+st.title("Shona Text Editor with Real-Time Spelling Correction")
+text_input = st.text_area("Start typing here...", height=250)
+if text_input:
+    corrected_text = correct_text(text_input)
+    st.text_area("Corrected Text", value=corrected_text, height=250, disabled=True)