Mamadou2727 commited on
Commit
d9eb428
·
1 Parent(s): 7a90a1a

upload the files

Browse files
Files changed (4) hide show
  1. app.py +357 -0
  2. glossary.json +0 -0
  3. grammar_rules.json +231 -0
  4. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import warnings
3
+ import re
4
+ import os
5
+ from google import genai
6
+ from google.genai import types
7
+ from langchain.schema import Document
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.embeddings import HuggingFaceEmbeddings
10
+ from langchain.prompts import PromptTemplate
11
+ import gradio as gr
12
+
13
+ # Suppress warnings for cleaner output
14
+ warnings.filterwarnings("ignore")
15
+
16
+ class ZarmaLanguageAnalyzer:
17
+ def __init__(self, grammar_path: str, glossary_path: str):
18
+ """
19
+ Initialize the Zarma Language Analyzer with grammar rules and glossary.
20
+ Optimized for CPU usage on Hugging Face Spaces.
21
+ """
22
+ print("Running on CPU for Hugging Face Spaces.")
23
+
24
+ self.grammar_rules = self._load_json(grammar_path).get("grammar_rules", [])
25
+ self.glossary_data = self._load_json(glossary_path)
26
+
27
+ self._setup_models()
28
+ self._setup_vectorstore()
29
+
30
+ def _load_json(self, file_path: str) -> dict:
31
+ """Load and parse a JSON file."""
32
+ with open(file_path, 'r', encoding='utf-8') as f:
33
+ return json.load(f)
34
+
35
+ def _setup_models(self):
36
+ """Set up the Gemini-2.0-Flash model via Google Generative AI API."""
37
+ # Get API key from environment variable
38
+ api_key = os.getenv("GOOGLE_API_KEY")
39
+ if not api_key:
40
+ raise ValueError("GOOGLE_API_KEY environment variable not set.")
41
+ self.client = genai.Client(api_key=api_key)
42
+ self.model = "gemini-2.0-flash"
43
+
44
+ self.analysis_template = PromptTemplate(
45
+ input_variables=["sentence", "grammar_check", "glossary_info"],
46
+ template="""
47
+ You are a Zarma language expert. Analyze this Zarma sentence: "{sentence}"
48
+
49
+ Rely primarily on your expertise in Zarma grammar and meaning. Recognize proper nouns (e.g., names or places) as such unless the glossary explicitly contradicts this with a common Zarma meaning. Use the grammar check and glossary below as supplementary aids only—do not override your knowledge unless they provide clear, contextually relevant insight.
50
+
51
+ Grammar check results (optional guide):
52
+ {grammar_check}
53
+
54
+ Glossary information (use it but prioritize your expertise to confirm):
55
+ {glossary_info}
56
+
57
+ Provide a detailed linguistic analysis in this exact format, with no extra text outside the sections:
58
+
59
+ 1. WORD BREAKDOWN:
60
+ - [List each word with its grammatical role and meaning, e.g., "Ay: 1st person singular pronoun, meaning 'I'."]
61
+
62
+ 2. LINGUISTIC INSIGHT:
63
+ - Word Order: [Describe typical Zarma word order (e.g., SOV, SVO) and how this sentence aligns or deviates]
64
+ - Tense/Aspect Markers: [Explain tense/aspect markers like 'ga', 'goono ga', or none for past, with examples like "Ay ga koy" (I will go)]
65
+ - Contextual Insight: [Discuss what the sentence might intend to convey and any external influences or errors]
66
+
67
+ 3. CORRECTNESS ASSESSMENT:
68
+ - Is the sentence correct? [Yes/No, with explanation]
69
+ - Reason for Incorrectness (if applicable): [Detailed reason why it’s wrong, e.g., misplaced particle]
70
+ - Corrections (depending on intended meaning):
71
+ - [Option 1: Corrected sentence with explanation]
72
+ - [Option 2: Corrected sentence with explanation]
73
+ - [Option 3: Corrected sentence with explanation]
74
+ """
75
+ )
76
+
77
+ def _setup_vectorstore(self):
78
+ """Set up FAISS vector store with the glossary for retrieval."""
79
+ embed_model = HuggingFaceEmbeddings(
80
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
81
+ model_kwargs={"device": "cpu"} # Force CPU usage
82
+ )
83
+
84
+ documents = []
85
+ for entry in self.glossary_data:
86
+ fr_word = entry.get("fr", "")
87
+ dje_word = entry.get("dje", "")
88
+ notes = entry.get("notes", "No additional context available")
89
+
90
+ content = f"French: {fr_word}\nDjerma: {dje_word}\nNotes: {notes}"
91
+ metadata = {"fr": fr_word, "dje": dje_word, "notes": notes}
92
+
93
+ documents.append(Document(page_content=content, metadata=metadata))
94
+
95
+ self.vectorstore = FAISS.from_documents(documents, embed_model)
96
+
97
+ def check_grammar(self, sentence: str) -> list:
98
+ """Check if the sentence violates any grammar rules."""
99
+ issues = []
100
+ for rule in self.grammar_rules:
101
+ rule_id = rule.get("rule_id", "")
102
+ category = rule.get("category", "")
103
+ subcategory = rule.get("subcategory", "")
104
+ description = rule.get("description", "")
105
+ examples = rule.get("examples", [])
106
+
107
+ for example in examples:
108
+ wrong_phrase = example.get("zarma", "")
109
+ corrected_phrase = example.get("corrected_zarma", "")
110
+ english_example = example.get("english", "")
111
+
112
+ if wrong_phrase and wrong_phrase in sentence:
113
+ explanation = (
114
+ f"This rule applies because '{wrong_phrase}' doesn't follow {category} norms in Zarma. "
115
+ f"Specifically, it violates rules related to {subcategory}. "
116
+ f"The correct form would be '{corrected_phrase or 'unknown'}'. "
117
+ f"In English, this is similar to: '{english_example}'"
118
+ )
119
+ issues.append({
120
+ "rule_id": rule_id,
121
+ "category": category,
122
+ "subcategory": subcategory,
123
+ "description": description,
124
+ "wrong_phrase": wrong_phrase,
125
+ "corrected_phrase": corrected_phrase,
126
+ "english_example": english_example,
127
+ "explanation": explanation
128
+ })
129
+ return issues
130
+
131
+ def translate_and_explain_words(self, sentence: str) -> dict:
132
+ """Break the sentence into words and find glossary entries."""
133
+ words = sentence.split()
134
+ word_info = {}
135
+ retrieved_context = []
136
+
137
+ for word in words:
138
+ clean_word = word.strip(".,!?;:()\"'")
139
+ if not clean_word:
140
+ continue
141
+
142
+ exact_match = None
143
+ for entry in self.glossary_data:
144
+ if entry.get("dje", "").lower() == clean_word.lower() or entry.get("fr", "").lower() == clean_word.lower():
145
+ exact_match = entry
146
+ break
147
+
148
+ if exact_match:
149
+ fr_word = exact_match.get("fr", "")
150
+ dje_word = exact_match.get("dje", "")
151
+ notes = entry.get("notes", "No additional context available")
152
+
153
+ word_info[clean_word] = {
154
+ "french": fr_word,
155
+ "djerma": dje_word,
156
+ "notes": notes,
157
+ "match_type": "exact"
158
+ }
159
+
160
+ context_entry = f"Word: {clean_word}\nFrench: {fr_word}\nDjerma: {dje_word}\nNotes: {notes}"
161
+ if context_entry not in retrieved_context:
162
+ retrieved_context.append(context_entry)
163
+ else:
164
+ search_results = self.vectorstore.similarity_search(clean_word, k=1)
165
+ if search_results:
166
+ result = search_results[0]
167
+ metadata = result.metadata
168
+ word_info[clean_word] = {
169
+ "french": metadata.get("fr", ""),
170
+ "djerma": metadata.get("dje", ""),
171
+ "notes": metadata.get("notes", "No additional context available"),
172
+ "match_type": "semantic"
173
+ }
174
+ context_entry = f"Word: {clean_word}\nFrench: {metadata.get('fr', '')}\nDjerma: {metadata.get('dje', '')}\nNotes: {metadata.get('notes', 'No additional context available')}"
175
+ if context_entry not in retrieved_context:
176
+ retrieved_context.append(context_entry)
177
+
178
+ sentence_results = self.vectorstore.similarity_search(sentence, k=5)
179
+ for result in sentence_results:
180
+ context_entry = result.page_content
181
+ if context_entry not in retrieved_context:
182
+ retrieved_context.append(context_entry)
183
+
184
+ top_contexts = retrieved_context[:3]
185
+ return {"word_info": word_info, "retrieved_context": top_contexts}
186
+
187
+ def format_grammar_issues(self, issues: list) -> str:
188
+ """Format grammar issues for display."""
189
+ if not issues:
190
+ return "No grammar issues detected."
191
+ result = "Grammar Issues Detected:\n\n"
192
+ for i, issue in enumerate(issues, 1):
193
+ result += f"Issue {i}:\n"
194
+ result += f"Rule ID: {issue.get('rule_id', '')}\n"
195
+ result += f"Category: {issue.get('category', '')}\n"
196
+ result += f"Subcategory: {issue.get('subcategory', '')}\n"
197
+ result += f"Description: {issue.get('description', '')}\n"
198
+ result += f"Wrong phrase: '{issue.get('wrong_phrase', '')}'\n"
199
+ result += f"Corrected phrase: '{issue.get('corrected_phrase', '')}'\n"
200
+ result += f"English example: {issue.get('english_example', '')}\n"
201
+ result += f"Explanation: {issue.get('explanation', '')}\n\n"
202
+ return result
203
+
204
+ def format_glossary_info(self, glossary_results: dict) -> str:
205
+ """Format glossary information for model input."""
206
+ word_info = glossary_results.get("word_info", {})
207
+ if not word_info:
208
+ return "No glossary matches found for words in the sentence."
209
+ result = "Glossary information:\n\n"
210
+ for word, info in word_info.items():
211
+ result += f"Word: {word}\n"
212
+ result += f"French: {info.get('french', '')}\n"
213
+ result += f"Djerma: {info.get('djerma', '')}\n"
214
+ result += f"Notes: {info.get('notes', '')}\n\n"
215
+ return result
216
+
217
+ def filter_reliable_context(self, glossary_results: dict, analysis_result: str) -> list:
218
+ """Filter glossary context to only show entries reliable in the context of Gemini's analysis."""
219
+ retrieved_context = glossary_results.get("retrieved_context", [])
220
+ analysis_lower = analysis_result.lower()
221
+ reliable_context = []
222
+
223
+ for context in retrieved_context:
224
+ lines = context.split("\n")
225
+ word_line = lines[0]
226
+ word = word_line.split(": ")[1].lower()
227
+
228
+ if word in analysis_lower:
229
+ reliable_context.append(context)
230
+
231
+ return reliable_context[:3]
232
+
233
+ def extract_analysis(self, raw_output: str) -> str:
234
+ """Extract the detailed analysis sections."""
235
+ pattern = (
236
+ r"(1\. WORD BREAKDOWN:\s*-\s*.+?)" +
237
+ r"(2\. LINGUISTIC INSIGHT:\s*-\s*Word Order:\s*.+?)" +
238
+ r"(3\. CORRECTNESS ASSESSMENT:\s*-\s*Is the sentence correct\?.+?)(?=\n\n|$)"
239
+ )
240
+ match = re.search(pattern, raw_output, re.DOTALL)
241
+
242
+ if match:
243
+ return match.group(1) + "\n" + match.group(2) + "\n" + match.group(3)
244
+
245
+ return (
246
+ "1. WORD BREAKDOWN:\n"
247
+ " - Analysis incomplete due to model limitations.\n\n"
248
+ "2. LINGUISTIC INSIGHT:\n"
249
+ " - Word Order: Analysis incomplete.\n"
250
+ " - Tense/Aspect Markers: Analysis incomplete.\n"
251
+ " - Contextual Insight: Analysis incomplete.\n\n"
252
+ "3. CORRECTNESS ASSESSMENT:\n"
253
+ " - Is the sentence correct? Unknown due to model limitations.\n"
254
+ " - Reason for Incorrectness (if applicable): Unknown.\n"
255
+ " - Corrections: None provided."
256
+ )
257
+
258
+ def analyze_sentence(self, sentence: str) -> dict:
259
+ """Full analysis pipeline for a Zarma sentence using Gemini-2.0-Flash."""
260
+ grammar_issues = self.check_grammar(sentence)
261
+ formatted_grammar = self.format_grammar_issues(grammar_issues)
262
+ glossary_results = self.translate_and_explain_words(sentence)
263
+ formatted_glossary = self.format_glossary_info(glossary_results)
264
+
265
+ prompt = self.analysis_template.format(
266
+ sentence=sentence,
267
+ grammar_check=formatted_grammar,
268
+ glossary_info=formatted_glossary
269
+ )
270
+
271
+ contents = [
272
+ types.Content(
273
+ role="user",
274
+ parts=[types.Part.from_text(text=prompt)],
275
+ ),
276
+ ]
277
+ generate_content_config = types.GenerateContentConfig(
278
+ response_mime_type="text/plain",
279
+ )
280
+
281
+ raw_analysis = ""
282
+ try:
283
+ for chunk in self.client.models.generate_content_stream(
284
+ model=self.model,
285
+ contents=contents,
286
+ config=generate_content_config,
287
+ ):
288
+ raw_analysis += chunk.text
289
+ except Exception as e:
290
+ raw_analysis = f"Error in analysis generation: {str(e)}"
291
+
292
+ analysis_result = self.extract_analysis(raw_analysis)
293
+ reliable_context = self.filter_reliable_context(glossary_results, analysis_result)
294
+
295
+ return {
296
+ "sentence": sentence,
297
+ "grammar_issues": grammar_issues,
298
+ "formatted_grammar": formatted_grammar,
299
+ "analysis_result": analysis_result,
300
+ "retrieved_context": reliable_context
301
+ }
302
+
303
+ def format_output(self, results: dict) -> str:
304
+ """Format the analysis results for Gradio output."""
305
+ output = "=" * 80 + "\n"
306
+ output += "ZARMA LANGUAGE ANALYZER\n"
307
+ output += "=" * 80 + "\n\n"
308
+
309
+ output += "SENTENCE ANALYZED:\n"
310
+ output += f" \"{results['sentence']}\"\n\n"
311
+
312
+ output += "GRAMMAR STATUS:\n"
313
+ output += f" {'Issues detected' if results['grammar_issues'] else 'Correct'}\n"
314
+ output += "-" * 80 + "\n"
315
+
316
+ output += "DETAILED ANALYSIS:\n"
317
+ output += results["analysis_result"] + "\n"
318
+ output += "-" * 80 + "\n"
319
+
320
+ output += "RELIABLE CONTEXT SOURCES:\n"
321
+ if results["retrieved_context"]:
322
+ for i, context in enumerate(results["retrieved_context"], 1):
323
+ output += f"Source {i}:\n"
324
+ output += context + "\n\n"
325
+ else:
326
+ output += " No reliable context sources retrieved based on the analysis.\n"
327
+ output += "=" * 80
328
+
329
+ return output
330
+
331
+ # Initialize the analyzer (adjust paths to match your Hugging Face Space structure)
332
+ analyzer = ZarmaLanguageAnalyzer("grammar_rules.json", "glossary.json")
333
+
334
+ # Gradio interface
335
+ def analyze_zarma_sentence(sentence):
336
+ if not sentence.strip():
337
+ return "Please enter a valid Zarma sentence."
338
+ results = analyzer.analyze_sentence(sentence)
339
+ return analyzer.format_output(results)
340
+
341
+ # Define the Gradio UI
342
+ with gr.Blocks(title="Zarma Language Analyzer") as demo:
343
+ gr.Markdown("# Zarma Language Analyzer")
344
+ gr.Markdown("Enter a Zarma sentence below to analyze its grammar and meaning.")
345
+
346
+ sentence_input = gr.Textbox(label="Zarma Sentence", placeholder="e.g., Ay ga koy.")
347
+ analyze_button = gr.Button("Analyze")
348
+ output_text = gr.Textbox(label="Analysis Result", lines=20)
349
+
350
+ analyze_button.click(
351
+ fn=analyze_zarma_sentence,
352
+ inputs=sentence_input,
353
+ outputs=output_text
354
+ )
355
+
356
+ # Launch the app
357
+ demo.launch()
glossary.json ADDED
The diff for this file is too large to render. See raw diff
 
grammar_rules.json ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "grammar_rules": [
3
+ {
4
+ "rule_id": 1,
5
+ "category": "Pronouns",
6
+ "subcategory": "Personal Pronouns",
7
+ "description": "Personal pronouns in Zarma are invariable across nominative, objective, and possessive cases.",
8
+ "examples": [
9
+ {"zarma": "ay", "english": "I, me, my"},
10
+ {"zarma": "ni", "english": "you, your (singular)"},
11
+ {"zarma": "a (nga)", "english": "he, she, it; his, her, its"},
12
+ {"zarma": "iri (ir)", "english": "we, us, our"},
13
+ {"zarma": "araŋ", "english": "you (plural), your"},
14
+ {"zarma": "i (ngey, ey)", "english": "they, them, their"}
15
+ ]
16
+ },
17
+ {
18
+ "rule_id": 2,
19
+ "category": "Pronouns",
20
+ "subcategory": "Demonstrative Pronouns",
21
+ "description": "Demonstrative pronouns indicate specific items; 'din' suffix can be added to nouns for specificity.",
22
+ "examples": [
23
+ {"zarma": "wo", "english": "this, that"},
24
+ {"zarma": "wey", "english": "these, those"}
25
+ ]
26
+ },
27
+ {
28
+ "rule_id": 3,
29
+ "category": "Pronouns",
30
+ "subcategory": "Indefinite Pronouns",
31
+ "description": "Indefinite pronouns refer to non-specific entities.",
32
+ "examples": [
33
+ {"zarma": "boro", "english": "someone, one (person)"},
34
+ {"zarma": "hay kulu", "english": "everything"},
35
+ {"zarma": "hay fo", "english": "something"}
36
+ ]
37
+ },
38
+ {
39
+ "rule_id": 4,
40
+ "category": "Nouns",
41
+ "subcategory": "Definite Article",
42
+ "description": "Definite articles are expressed by adding 'a' or 'o' to the noun based on its ending.",
43
+ "patterns": [
44
+ {"ending": "a", "rule": "add 'a' (e.g., zanka → zankaa)", "exceptions": "Pre-1999 texts may not change"},
45
+ {"ending": "o", "rule": "change to 'a' or add 'a' (e.g., wayboro → waybora)"},
46
+ {"ending": "ko", "rule": "change to 'kwa' (e.g., darbayko → darbaykwa)"},
47
+ {"ending": "e, i, u, consonant", "rule": "change to 'o' or add 'o' (e.g., wande → wando)"},
48
+ {"ending": "ay", "rule": "change 'ay' to 'a' or add 'o' (e.g., farkay → farka or farkayo)"}
49
+ ],
50
+ "examples": [
51
+ {"zarma": "zanka → zankaa", "english": "a child → the child"},
52
+ {"zarma": "wayboro → waybora", "english": "a woman → the woman"},
53
+ {"zarma": "darbayko → darbaykwa", "english": "a fisherman → the fisherman"},
54
+ {"zarma": "hansi → hanso", "english": "a dog → the dog"},
55
+ {"zarma": "farkay → farka", "english": "a donkey → the donkey"}
56
+ ]
57
+ },
58
+ {
59
+ "rule_id": 5,
60
+ "category": "Nouns",
61
+ "subcategory": "Definite Plural",
62
+ "description": "Definite plural is formed by replacing the definite singular vowel with 'ey'.",
63
+ "pattern": "Replace final vowel with 'ey' (e.g., zankaa → zankey)",
64
+ "examples": [
65
+ {"zarma": "zankaa → zankey", "english": "the child → the children"},
66
+ {"zarma": "hanso → hansey", "english": "the dog → the dogs"},
67
+ {"zarma": "farka → farkey", "english": "the donkey → the donkeys"}
68
+ ]
69
+ },
70
+ {
71
+ "rule_id": 6,
72
+ "category": "Nouns",
73
+ "subcategory": "Indefinite Article",
74
+ "description": "No explicit indefinite article; 'fo' (one) is used to specify 'a certain' or 'one'.",
75
+ "pattern": "Add 'fo' after noun for specificity (e.g., musu → musu fo)",
76
+ "examples": [
77
+ {"zarma": "musu", "english": "a cat"},
78
+ {"zarma": "musu fo", "english": "a (certain) cat, one cat"}
79
+ ]
80
+ },
81
+ {
82
+ "rule_id": 7,
83
+ "category": "Nouns",
84
+ "subcategory": "Gender",
85
+ "description": "No grammatical gender; specific words indicate male/female for living beings.",
86
+ "examples": [
87
+ {"zarma": "alboro", "english": "man"},
88
+ {"zarma": "wayboro", "english": "woman"}
89
+ ]
90
+ },
91
+ {
92
+ "rule_id": 8,
93
+ "category": "Verbs",
94
+ "subcategory": "Completed Action (Past Tense)",
95
+ "description": "Verbs without auxiliaries indicate completed actions (past tense).",
96
+ "pattern": "Subject + Verb (e.g., ay neera)",
97
+ "examples": [
98
+ {"zarma": "ay neera", "english": "I sold"},
99
+ {"zarma": "a neera", "english": "he/she sold"},
100
+ {"zarma": "zankaa kani", "english": "the child went to bed"}
101
+ ]
102
+ },
103
+ {
104
+ "rule_id": 9,
105
+ "category": "Verbs",
106
+ "subcategory": "Uncompleted Action (Future Tense)",
107
+ "description": "Future tense uses auxiliary 'ga' before the verb.",
108
+ "pattern": "Subject + ga + Verb (e.g., ay ga neera)",
109
+ "examples": [
110
+ {"zarma": "ay ga neera", "english": "I will sell"},
111
+ {"zarma": "i ga neera", "english": "they will sell"}
112
+ ]
113
+ },
114
+ {
115
+ "rule_id": 10,
116
+ "category": "Verbs",
117
+ "subcategory": "Continuous Aspect",
118
+ "description": "Continuous aspect uses 'go no ga' before the verb for ongoing actions.",
119
+ "pattern": "Subject + go no ga + Verb (e.g., ay go no ga neera)",
120
+ "examples": [
121
+ {"zarma": "ay go no ga neera", "english": "I am selling"},
122
+ {"zarma": "a go no ga neera", "english": "he/she is selling"}
123
+ ]
124
+ },
125
+ {
126
+ "rule_id": 11,
127
+ "category": "Verbs",
128
+ "subcategory": "Subjunctive",
129
+ "description": "Subjunctive uses 'ma' to indicate possible actions.",
130
+ "pattern": "Subject + ma + Verb (e.g., ay ma neera)",
131
+ "examples": [
132
+ {"zarma": "ay ma neera", "english": "I should sell"},
133
+ {"zarma": "ni ma neera", "english": "you should sell"}
134
+ ]
135
+ },
136
+ {
137
+ "rule_id": 12,
138
+ "category": "Verbs",
139
+ "subcategory": "Imperative",
140
+ "description": "Imperative uses 'ma' or 'wa' before the verb, or just the verb alone.",
141
+ "pattern": "[Ma/Wa] + Verb or Verb alone (e.g., Ma haŋ or Haŋ)",
142
+ "examples": [
143
+ {"zarma": "Haŋ!", "english": "Drink!"},
144
+ {"zarma": "Ma haŋ!", "english": "Drink!"},
145
+ {"zarma": "Araŋ ma di!", "english": "You (plural) see!"}
146
+ ]
147
+ },
148
+ {
149
+ "rule_id": 13,
150
+ "category": "Verbs",
151
+ "subcategory": "To Be",
152
+ "description": "The verb 'to be' varies by context: 'go', 'ya...no', or 'ga ti'.",
153
+ "examples": [
154
+ {"zarma": "A go fu", "english": "He/she is at home"},
155
+ {"zarma": "Ay ya alfa no", "english": "I am a teacher"},
156
+ {"zarma": "Nga ga ti wayboro", "english": "She is a woman"}
157
+ ]
158
+ },
159
+ {
160
+ "rule_id": 14,
161
+ "category": "Verbs",
162
+ "subcategory": "Irregular Verbs",
163
+ "description": "Some verbs place objects unusually (e.g., direct object before verb without 'na').",
164
+ "examples": [
165
+ {"zarma": "Ay di a", "english": "I saw him/her"},
166
+ {"zarma": "A ne ay se", "english": "He/she said to me"}
167
+ ]
168
+ },
169
+ {
170
+ "rule_id": 15,
171
+ "category": "Adjectives",
172
+ "subcategory": "Qualifying Adjectives",
173
+ "description": "Adjectives follow the noun they modify.",
174
+ "pattern": "Noun + Adjective (e.g., fu beeri)",
175
+ "examples": [
176
+ {"zarma": "fu beeri", "english": "a big house"},
177
+ {"zarma": "hansi kayna", "english": "a small dog"}
178
+ ]
179
+ },
180
+ {
181
+ "rule_id": 16,
182
+ "category": "Sentence Structure",
183
+ "subcategory": "Basic Order",
184
+ "description": "Basic sentence order is Subject-Verb-Object (SVO).",
185
+ "pattern": "S + V + O (e.g., Ay neera bari)",
186
+ "examples": [
187
+ {"zarma": "Ay neera bari", "english": "I sold a horse"}
188
+ ]
189
+ },
190
+ {
191
+ "rule_id": 17,
192
+ "category": "Sentence Structure",
193
+ "subcategory": "Direct Object",
194
+ "description": "Direct object before verb requires 'na' in past positive.",
195
+ "pattern": "S + na + O + V (e.g., Ay na bari neera)",
196
+ "examples": [
197
+ {"zarma": "Ay na bari neera", "english": "I sold a horse"}
198
+ ]
199
+ },
200
+ {
201
+ "rule_id": 18,
202
+ "category": "Sentence Structure",
203
+ "subcategory": "Indirect Object",
204
+ "description": "Indirect object is marked with 'se' after the object.",
205
+ "pattern": "S + V + O + IO + se (e.g., Ay no bari wayboro se)",
206
+ "examples": [
207
+ {"zarma": "Ay no bari wayboro se", "english": "I gave a horse to the woman"}
208
+ ]
209
+ },
210
+ {
211
+ "rule_id": 19,
212
+ "category": "Negation",
213
+ "subcategory": "Past Negative",
214
+ "description": "Past negative uses 'mana' after the subject.",
215
+ "pattern": "S + mana + V (e.g., Ay mana neera)",
216
+ "examples": [
217
+ {"zarma": "Ay mana neera", "english": "I did not sell"}
218
+ ]
219
+ },
220
+ {
221
+ "rule_id": 20,
222
+ "category": "Negation",
223
+ "subcategory": "Present/Future Negative",
224
+ "description": "Present/future negative uses 'si' instead of 'ga'.",
225
+ "pattern": "S + si + V (e.g., Ay si neera)",
226
+ "examples": [
227
+ {"zarma": "Ay si neera", "english": "I do not/will not sell"}
228
+ ]
229
+ }
230
+ ]
231
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio==4.44.0
2
+ google-generativeai==0.8.2
3
+ langchain==0.3.0
4
+ langchain-community==0.3.0
5
+ faiss-cpu==1.8.0
6
+ sentence-transformers==3.1.1
7
+ torch==2.4.1