Spaces:

sashtech
/

aihumanifierandgrmoform

Sleeping

App Files Files Community

sashtech commited on Sep 20, 2024

Commit

7c41997

verified ·

1 Parent(s): 19c632c

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -57

app.py CHANGED Viewed

@@ -7,12 +7,14 @@ import nltk
 from nltk.corpus import wordnet
 from spellchecker import SpellChecker
 import re
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 # Initialize the spell checker
 spell = SpellChecker()
 # Ensure necessary NLTK data is downloaded
 nltk.download('wordnet')
@@ -35,7 +37,7 @@ def get_synonyms_nltk(word, pos):
     synsets = wordnet.synsets(word, pos=pos)
     if synsets:
         lemmas = synsets[0].lemmas()
-        return [lemma.name() for lemma in lemmas]
     return []
 # Function to remove redundant and meaningless words
@@ -68,14 +70,14 @@ def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
-        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
             lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
             corrected_text.append(lemma)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
-# Function to correct singular/plural errors
 def correct_singular_plural_errors(text):
     doc = nlp(text)
     corrected_text = []
@@ -84,12 +86,12 @@ def correct_singular_plural_errors(text):
         if token.pos_ == "NOUN":
             if token.tag_ == "NN":  # Singular noun
                 if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
-                    corrected_text.append(token.lemma_ + 's')
                 else:
                     corrected_text.append(token.text)
             elif token.tag_ == "NNS":  # Plural noun
                 if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
-                    corrected_text.append(token.lemma_)
                 else:
                     corrected_text.append(token.text)
         else:
@@ -116,26 +118,23 @@ def correct_article_errors(text):
 # Function to get the correct synonym while maintaining verb form
 def replace_with_synonym(token):
-    pos = None
-    if token.pos_ == "VERB":
-        pos = wordnet.VERB
-    elif token.pos_ == "NOUN":
-        pos = wordnet.NOUN
-    elif token.pos_ == "ADJ":
-        pos = wordnet.ADJ
-    elif token.pos_ == "ADV":
-        pos = wordnet.ADV
     synonyms = get_synonyms_nltk(token.lemma_, pos)
     if synonyms:
         synonym = synonyms[0]
-        if token.tag_ == "VBG":  # Present participle (e.g., running)
-            synonym = synonym + 'ing'
-        elif token.tag_ == "VBD" or token.tag_ == "VBN":  # Past tense or past participle
-            synonym = synonym + 'ed'
         elif token.tag_ == "VBZ":  # Third-person singular present
-            synonym = synonym + 's'
         return synonym
     return token.text
@@ -155,12 +154,12 @@ def ensure_subject_verb_agreement(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
         if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
             if token.tag_ == "NN" and token.head.tag_ != "VBZ":  # Singular noun, should use singular verb
-                corrected_text.append(token.head.lemma_ + "s")
             elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":  # Plural noun, should not use singular verb
-                corrected_text.append(token.head.lemma_)
-        corrected_text.append(token.text)
     return ' '.join(corrected_text)
 # Function to correct spelling errors
@@ -193,27 +192,24 @@ def rephrase_with_synonyms(text):
             rephrased_text.append("Earth")
             continue
-        pos_tag = None
-        if token.pos_ == "NOUN":
-            pos_tag = wordnet.NOUN
-        elif token.pos_ == "VERB":
-            pos_tag = wordnet.VERB
-        elif token.pos_ == "ADJ":
-            pos_tag = wordnet.ADJ
-        elif token.pos_ == "ADV":
-            pos_tag = wordnet.ADV
         if pos_tag:
             synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
             if synonyms:
                 synonym = synonyms[0]  # Just using the first synonym for simplicity
                 if token.pos_ == "VERB":
-                    if token.tag_ == "VBG":  # Present participle (e.g., running)
-                        synonym = synonym + 'ing'
-                    elif token.tag_ == "VBD" or token.tag_ == "VBN":  # Past tense or past participle
-                        synonym = synonym + 'ed'
                     elif token.tag_ == "VBZ":  # Third-person singular present
-                        synonym = synonym + 's'
                 rephrased_text.append(synonym)
             else:
                 rephrased_text.append(token.text)
@@ -234,37 +230,46 @@ def paraphrase_and_correct(text):
     paraphrased_text = correct_tense_errors(paraphrased_text)
     paraphrased_text = correct_singular_plural_errors(paraphrased_text)
     paraphrased_text = correct_article_errors(paraphrased_text)
-    paraphrased_text = correct_double_negatives(paraphrased_text)
-    paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
-    # Correct spelling and punctuation
     paraphrased_text = correct_spelling(paraphrased_text)
     paraphrased_text = correct_punctuation(paraphrased_text)
-    paraphrased_text = handle_possessives(paraphrased_text)  # Handle possessives
-    # Rephrase with synonyms
     paraphrased_text = rephrase_with_synonyms(paraphrased_text)
-    # Force capitalization of the first letter of each sentence
-    final_text = capitalize_sentences_and_nouns(paraphrased_text)
-    return final_text
-# Gradio Interface
 def process_text(input_text):
     ai_label, ai_score = predict_en(input_text)
-    corrected_text = paraphrase_and_correct(input_text)
-    return ai_label, ai_score, corrected_text
-# Create Gradio interface
 iface = gr.Interface(
     fn=process_text,
-    inputs="text",
-    outputs=["text", "number", "text"],
-    title="AI Content Detection and Grammar Correction",
-    description="Enter text to detect AI-generated content and correct grammar."
 )
-# Launch the Gradio app
-if __name__ == "__main__":
-    iface.launch()

 from nltk.corpus import wordnet
 from spellchecker import SpellChecker
 import re
+from inflect import engine  # For pluralization
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 # Initialize the spell checker
 spell = SpellChecker()
+inflect_engine = engine()
 # Ensure necessary NLTK data is downloaded
 nltk.download('wordnet')
     synsets = wordnet.synsets(word, pos=pos)
     if synsets:
         lemmas = synsets[0].lemmas()
+        return [lemma.name() for lemma in lemmas if lemma.name() != word]  # Avoid original word
     return []
 # Function to remove redundant and meaningless words
     doc = nlp(text)
     corrected_text = []
     for token in doc:
+        if token.pos_ == "VERB":
             lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
             corrected_text.append(lemma)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
+# Function to correct singular/plural errors using inflect
 def correct_singular_plural_errors(text):
     doc = nlp(text)
     corrected_text = []
         if token.pos_ == "NOUN":
             if token.tag_ == "NN":  # Singular noun
                 if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
+                    corrected_text.append(inflect_engine.plural(token.lemma_))
                 else:
                     corrected_text.append(token.text)
             elif token.tag_ == "NNS":  # Plural noun
                 if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
+                    corrected_text.append(inflect_engine.singular_noun(token.text) or token.text)
                 else:
                     corrected_text.append(token.text)
         else:
 # Function to get the correct synonym while maintaining verb form
 def replace_with_synonym(token):
+    pos = {
+        "VERB": wordnet.VERB,
+        "NOUN": wordnet.NOUN,
+        "ADJ": wordnet.ADJ,
+        "ADV": wordnet.ADV
+    }.get(token.pos_, None)
     synonyms = get_synonyms_nltk(token.lemma_, pos)
     if synonyms:
         synonym = synonyms[0]
+        if token.tag_ == "VBG":  # Present participle
+            synonym += 'ing'
+        elif token.tag_ in {"VBD", "VBN"}:  # Past tense or past participle
+            synonym += 'ed'
         elif token.tag_ == "VBZ":  # Third-person singular present
+            synonym += 's'
         return synonym
     return token.text
     doc = nlp(text)
     corrected_text = []
     for token in doc:
+        corrected_text.append(token.text)
         if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
             if token.tag_ == "NN" and token.head.tag_ != "VBZ":  # Singular noun, should use singular verb
+                corrected_text[-1] = token.head.lemma_ + "s"
             elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":  # Plural noun, should not use singular verb
+                corrected_text[-1] = token.head.lemma_
     return ' '.join(corrected_text)
 # Function to correct spelling errors
             rephrased_text.append("Earth")
             continue
+        pos_tag = {
+            "NOUN": wordnet.NOUN,
+            "VERB": wordnet.VERB,
+            "ADJ": wordnet.ADJ,
+            "ADV": wordnet.ADV
+        }.get(token.pos_, None)
         if pos_tag:
             synonyms = get_synonyms_nltk(token.lemma_, pos_tag)
             if synonyms:
                 synonym = synonyms[0]  # Just using the first synonym for simplicity
                 if token.pos_ == "VERB":
+                    if token.tag_ == "VBG":  # Present participle
+                        synonym += 'ing'
+                    elif token.tag_ in {"VBD", "VBN"}:  # Past tense or past participle
+                        synonym += 'ed'
                     elif token.tag_ == "VBZ":  # Third-person singular present
+                        synonym += 's'
                 rephrased_text.append(synonym)
             else:
                 rephrased_text.append(token.text)
     paraphrased_text = correct_tense_errors(paraphrased_text)
     paraphrased_text = correct_singular_plural_errors(paraphrased_text)
     paraphrased_text = correct_article_errors(paraphrased_text)
+    # Correct spelling errors
     paraphrased_text = correct_spelling(paraphrased_text)
+    # Correct punctuation issues
     paraphrased_text = correct_punctuation(paraphrased_text)
+    # Handle possessives
+    paraphrased_text = handle_possessives(paraphrased_text)
+    # Ensure subject-verb agreement
+    paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
+    # Replace with synonyms
     paraphrased_text = rephrase_with_synonyms(paraphrased_text)
+    # Correct for double negatives
+    paraphrased_text = correct_double_negatives(paraphrased_text)
+    return paraphrased_text
+# Function to handle the user interface
 def process_text(input_text):
     ai_label, ai_score = predict_en(input_text)
+    ai_result = f"AI Detected: {ai_label} (Score: {ai_score:.2f})"
+    if ai_label == "HUMAN":
+        corrected_text = paraphrase_and_correct(input_text)
+        return corrected_text, ai_result
+    else:
+        return "The text seems to be AI-generated; no correction applied.", ai_result
+# Gradio interface
 iface = gr.Interface(
     fn=process_text,
+    inputs=gr.Textbox(lines=10, placeholder="Enter your text here..."),
+    outputs=[gr.Textbox(label="Corrected Text"), gr.Textbox(label="AI Detection Result")],
+    title="Text Correction and AI Detection",
+    description="This app corrects grammar, spelling, and punctuation while also detecting AI-generated content."
 )
+# Launch the interface
+iface.launch()