Spaces:

Remostart
/

MILESTONE_ONE_FARMLINGUA_AI

Runtime error

App Files Files Community

Remostart commited on Sep 15

Commit

2891d1d

verified ·

1 Parent(s): bc078c8

milestone one and two AI BRIAN AND MULTILINGUA commit

Browse files

Files changed (40) hide show

.dockerignore +0 -0
.dockerigore +0 -0
.gitattributes +4 -0
Dockerfile +57 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-311.pyc +0 -0
app/__pycache__/__init__.cpython-312.pyc +0 -0
app/__pycache__/main.cpython-311.pyc +0 -0
app/__pycache__/main.cpython-312.pyc +0 -0
app/agents/__init__.py +0 -0
app/agents/__pycache__/__init__.cpython-311.pyc +0 -0
app/agents/__pycache__/__init__.cpython-312.pyc +0 -0
app/agents/__pycache__/crew_pipeline.cpython-311.pyc +0 -0
app/agents/__pycache__/crew_pipeline.cpython-312.pyc +0 -0
app/agents/crew_pipeline.py +245 -0
app/main.py +86 -0
app/models/__init__.py +0 -0
app/models/intent_classifier_v2.joblib +3 -0
app/tasks/__init__.py +0 -0
app/tasks/__pycache__/__init__.cpython-311.pyc +0 -0
app/tasks/__pycache__/__init__.cpython-312.pyc +0 -0
app/tasks/__pycache__/rag_updater.cpython-311.pyc +0 -0
app/tasks/__pycache__/rag_updater.cpython-312.pyc +0 -0
app/tasks/rag_updater.py +141 -0
app/utils/__init__.py +0 -0
app/utils/__pycache__/__init__.cpython-311.pyc +0 -0
app/utils/__pycache__/__init__.cpython-312.pyc +0 -0
app/utils/__pycache__/config.cpython-311.pyc +0 -0
app/utils/__pycache__/config.cpython-312.pyc +0 -0
app/utils/config.py +54 -0
app/vectorstore/__init__.py +0 -0
app/vectorstore/faiss_index/index.faiss +3 -0
app/vectorstore/faiss_index/index.pkl +3 -0
app/vectorstore/live_rag_index/index.faiss +0 -0
app/vectorstore/live_rag_index/index.pkl +3 -0
app/venv/bin/python +3 -0
app/venv/bin/python3 +3 -0
app/venv/bin/python3.11 +3 -0
app/venv/pyvenv.cfg +5 -0
requirements.txt +21 -0

.dockerignore ADDED Viewed

File without changes

.dockerigore ADDED Viewed

File without changes

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+app/vectorstore/faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python3 filter=lfs diff=lfs merge=lfs -text
+app/venv/bin/python3.11 filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,57 @@

+FROM python:3.10-slim
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+WORKDIR /code
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    git \
+    curl \
+    libopenblas-dev \
+    libomp-dev \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir huggingface-hub sentencepiece accelerate fasttext
+ENV HF_HOME=/models/huggingface
+ENV TRANSFORMERS_CACHE=/models/huggingface
+ENV HUGGINGFACE_HUB_CACHE=/models/huggingface
+ENV HF_HUB_CACHE=/models/huggingface
+# Create cache dir
+RUN mkdir -p /models/huggingface
+# Pre-download models at build time (Qwen + SentenceTransformer + FastText + NLLB finetuned)
+RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='Qwen/Qwen3-4B-Instruct-2507')" \
+ && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')" \
+ && python -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='facebook/fasttext-language-identification', filename='model.bin')" \
+ && python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='drrobot9/nllb-ig-yo-ha-finetuned')"
+# Preload tokenizers (avoid runtime delays)
+RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-4B-Instruct-2507', use_fast=True)" \
+ && python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', use_fast=True)" \
+ && python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('drrobot9/nllb-ig-yo-ha-finetuned', use_fast=True)"
+COPY . .
+EXPOSE 7860
+# Run FastAPI app with uvicorn (2 workers for better concurrency)
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "2"]

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (166 Bytes). View file

app/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (154 Bytes). View file

app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (3.31 kB). View file

app/__pycache__/main.cpython-312.pyc ADDED Viewed

Binary file (3.41 kB). View file

app/agents/__init__.py ADDED Viewed

File without changes

app/agents/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (173 Bytes). View file

app/agents/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (161 Bytes). View file

app/agents/__pycache__/crew_pipeline.cpython-311.pyc ADDED Viewed

Binary file (8.73 kB). View file

app/agents/__pycache__/crew_pipeline.cpython-312.pyc ADDED Viewed

Binary file (11 kB). View file

app/agents/crew_pipeline.py ADDED Viewed

	@@ -0,0 +1,245 @@

+# farmlingua/app/agents/crew_pipeline.pyversion3multilingua
+import os
+import sys
+import requests
+import joblib
+import faiss
+import numpy as np
+import torch
+import fasttext
+from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from sentence_transformers import SentenceTransformer
+from app.utils import config
+import re
+hf_cache = "/models/huggingface"
+os.environ["HF_HOME"] = hf_cache
+os.environ["TRANSFORMERS_CACHE"] = hf_cache
+os.environ["HUGGINGFACE_HUB_CACHE"] = hf_cache
+os.makedirs(hf_cache, exist_ok=True)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+try:
+    classifier = joblib.load(config.CLASSIFIER_PATH)
+except Exception:
+    classifier = None
+print(f"Loading Qwen expert model ({config.EXPERT_MODEL_NAME})...")
+tokenizer = AutoTokenizer.from_pretrained(config.EXPERT_MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(
+    config.EXPERT_MODEL_NAME,
+    torch_dtype="auto",
+    device_map="auto"
+)
+embedder = SentenceTransformer(config.EMBEDDING_MODEL)
+print(f"Loading FastText language identifier ({config.LANG_ID_MODEL_REPO})...")
+lang_model_path = hf_hub_download(
+    repo_id=config.LANG_ID_MODEL_REPO,
+    filename=config.LANG_ID_MODEL_FILE
+)
+lang_identifier = fasttext.load_model(lang_model_path)
+def detect_language(text, top_k=1):
+    """Detect language with FastText, ensuring no newlines."""
+    clean_text = text.replace("\n", " ").strip()
+    labels, probs = lang_identifier.predict(clean_text, k=top_k)
+    return [(l.replace("__label__", ""), float(p)) for l, p in zip(labels, probs)]
+print(f"Loading NLLB translation model ({config.TRANSLATION_MODEL_NAME})...")
+translation_pipeline = pipeline(
+    "translation",
+    model=config.TRANSLATION_MODEL_NAME,
+    device=0 if DEVICE == "cuda" else -1,
+    max_new_tokens=512
+)
+SUPPORTED_LANGS = {
+    "eng_Latn": "English",
+    "ibo_Latn": "Igbo",
+    "yor_Latn": "Yoruba",
+    "hau_Latn": "Hausa",
+    "swh_Latn": "Swahili",
+    "amh_Ethi": "Amharic"
+}
+def chunk_text(text, max_len=400):
+    """Split text into chunks without cutting sentences."""
+    sentences = re.split(r'(?<=[.!?]) +', text)
+    chunks, current = [], ""
+    for sent in sentences:
+        if len(current) + len(sent) < max_len:
+            current += " " + sent
+        else:
+            chunks.append(current.strip())
+            current = sent
+    if current:
+        chunks.append(current.strip())
+    return chunks
+def translate_text(text, src_lang, tgt_lang):
+    """Translate with chunking and stitch results together."""
+    if not text.strip():
+        return text
+    chunks = chunk_text(text)
+    results = []
+    for chunk in chunks:
+        out = translation_pipeline(chunk, src_lang=src_lang, tgt_lang=tgt_lang)
+        results.append(out[0]['translation_text'])
+    return " ".join(results)
+def retrieve_docs(query, vs_path):
+    if not vs_path or not os.path.exists(vs_path):
+        return None
+    try:
+        index = faiss.read_index(vs_path)
+    except Exception:
+        return None
+    query_vec = np.array([embedder.encode(query)], dtype=np.float32)
+    D, I = index.search(query_vec, k=3)
+    if D[0][0] == 0:
+        return None
+    meta_path = vs_path + "_meta.npy"
+    if os.path.exists(meta_path):
+        metadata = np.load(meta_path, allow_pickle=True).item()
+        docs = [metadata.get(str(idx), "") for idx in I[0] if str(idx) in metadata]
+        docs = [doc for doc in docs if doc]
+        return "\n\n".join(docs) if docs else None
+    return None
+def get_weather(state_name):
+    url = "http://api.weatherapi.com/v1/current.json"
+    params = {
+        "key": config.WEATHER_API_KEY,
+        "q": f"{state_name}, Nigeria",
+        "aqi": "no"
+    }
+    r = requests.get(url, params=params)
+    if r.status_code != 200:
+        return f"Unable to retrieve weather for {state_name}."
+    data = r.json()
+    return (
+        f"Weather in {state_name}:\n"
+        f"- Condition: {data['current']['condition']['text']}\n"
+        f"- Temperature: {data['current']['temp_c']}°C\n"
+        f"- Humidity: {data['current']['humidity']}%\n"
+        f"- Wind: {data['current']['wind_kph']} kph"
+    )
+def detect_intent(query):
+    q_lower = query.lower()
+    if any(word in q_lower for word in ["weather condition", "forecast"]):
+        for state in config.STATES:
+            if state.lower() in q_lower:
+                return "weather", state
+        return "weather", None
+    if any(word in q_lower for word in ["update", "breaking", "news", "current"]):
+        return "live_update", None
+    if hasattr(classifier, "predict") and hasattr(classifier, "predict_proba"):
+        predicted_intent = classifier.predict([query])[0]
+        confidence = max(classifier.predict_proba([query])[0])
+        if confidence < config.CLASSIFIER_CONFIDENCE_THRESHOLD:
+            return "low_confidence", None
+        return predicted_intent, None
+    return "normal", None
+def run_qwen(messages, max_new_tokens=1000):
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(
+        **inputs,
+        max_new_tokens=max_new_tokens,
+        temperature=0.4,
+        repetition_penalty=1.1
+    )
+    output_ids = generated_ids[0][len(inputs.input_ids[0]):].tolist()
+    return tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+def run_pipeline(user_query: str):
+    lang, prob = detect_language(user_query, top_k=1)[0]
+    if lang not in SUPPORTED_LANGS:
+        lang = "eng_Latn"
+    # Translate to English if needed
+    translated_query = user_query
+    if lang != "eng_Latn":
+        translated_query = translate_text(user_query, src_lang=lang, tgt_lang="eng_Latn")
+    # Detect intent
+    intent, extra = detect_intent(translated_query)
+    if intent == "weather" and extra:
+        weather_text = get_weather(extra)
+        messages = [
+            {"role": "system", "content": "You are FarmLingua, an AI assistant for Nigerian farmers."},
+            {"role": "user", "content": f"Rewrite this weather update simply:\n{weather_text}"}
+        ]
+        english_answer = run_qwen(messages, max_new_tokens=256)
+    else:
+        if intent == "live_update":
+            context = retrieve_docs(translated_query, config.LIVE_VS_PATH)
+            if context:
+                translated_query += f"\n\nLatest agricultural updates:\n{context}"
+        if intent == "low_confidence":
+            context = retrieve_docs(translated_query, config.STATIC_VS_PATH)
+            if context:
+                translated_query += f"\n\nReference information:\n{context}"
+        messages = [
+            {"role": "system", "content": (
+                "You are FarmLingua, an AI assistant for Nigerian farmers. "
+                "Answer directly without repeating the question. "
+                "Use short, clear farmer-friendly English. "
+                "Avoid scientific jargon, focus on practical farming advice."
+                "When a user ask you who built you or created you say it jackson kelvin a sophisticated AI engineer from Remostart AI company, he built my core brain"
+            )},
+            {"role": "user", "content": translated_query}
+        ]
+        english_answer = run_qwen(messages, max_new_tokens=700)
+    # Translate back to original language
+    if lang != "eng_Latn":
+        final_answer = translate_text(english_answer, src_lang="eng_Latn", tgt_lang=lang)
+    else:
+        final_answer = english_answer
+    return {
+        "detected_language": SUPPORTED_LANGS.get(lang, "Unknown"),
+        "answer": final_answer
+    }

app/main.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# farmlingua_backend/app/main.py
+import os
+import sys
+import logging
+from fastapi import FastAPI, Body
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+from app.tasks.rag_updater import schedule_updates
+from app.utils import config
+from app.agents.crew_pipeline import run_pipeline
+logging.basicConfig(
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    level=logging.INFO
+)
+app = FastAPI(
+    title="FarmLingua Backend",
+    description="Backend service for FarmLingua with RAG updates, multilingual support, and expert AI pipeline",
+    version="1.1.0"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=getattr(config, "ALLOWED_ORIGINS", ["*"]),
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.on_event("startup")
+def startup_event():
+    logging.info("Starting FarmLingua backend...")
+    schedule_updates()
+@app.get("/")
+def home():
+    """Health check endpoint."""
+    return {
+        "status": "FarmLingua backend running",
+        "version": "1.1.0",
+        "vectorstore_path": config.VECTORSTORE_PATH
+    }
+@app.post("/ask")
+def ask_farmbot(query: str = Body(..., embed=True)):
+    """
+    Ask FarmLingua a farming-related question.
+    Supports Hausa, Igbo, Yoruba, and English.
+    Automatically detects user language, translates if needed,
+    and returns response in the same language.
+    """
+    logging.info(f"Received query: {query}")
+    answer_data = run_pipeline(query)
+    detected_lang = answer_data.get("detected_language", "Unknown")
+    confidence = answer_data.get("confidence", None)
+    logging.info(
+        f"Detected language: {detected_lang}"
+        + (f" (confidence={confidence:.2f})" if confidence else "")
+    )
+    return {
+        "query": query,
+        #"detected_language": detected_lang,
+        "answer": answer_data.get("answer")
+    }
+if __name__ == "__main__":
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=getattr(config, "PORT", 7860),
+        reload=bool(getattr(config, "DEBUG", False))
+    )

app/models/__init__.py ADDED Viewed

File without changes

app/models/intent_classifier_v2.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffeda9eeb604a1a24ef64e774eb6b503ead5eae6ad3b043401033040a4309405
+size 39296294

app/tasks/__init__.py ADDED Viewed

File without changes

app/tasks/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (172 Bytes). View file

app/tasks/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (160 Bytes). View file

app/tasks/__pycache__/rag_updater.cpython-311.pyc ADDED Viewed

Binary file (8.43 kB). View file

app/tasks/__pycache__/rag_updater.cpython-312.pyc ADDED Viewed

Binary file (7.42 kB). View file

app/tasks/rag_updater.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# farmlingua_backend/app/tasks/rag_updater.py
+import os
+import sys
+from datetime import datetime, date
+import logging
+import requests
+from bs4 import BeautifulSoup
+from apscheduler.schedulers.background import BackgroundScheduler
+from langchain.vectorstores import FAISS
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.docstore.document import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from app.utils import config
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if BASE_DIR not in sys.path:
+    sys.path.insert(0, BASE_DIR)
+logging.basicConfig(
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    level=logging.INFO
+)
+session = requests.Session()
+def fetch_weather_now():
+    """Fetch current weather for all configured states."""
+    docs = []
+    for state in config.STATES:
+        try:
+            url = "http://api.weatherapi.com/v1/current.json"
+            params = {
+                "key": config.WEATHER_API_KEY,
+                "q": f"{state}, Nigeria",
+                "aqi": "no"
+            }
+            res = session.get(url, params=params, timeout=10)
+            res.raise_for_status()
+            data = res.json()
+            if "current" in data:
+                condition = data['current']['condition']['text']
+                temp_c = data['current']['temp_c']
+                humidity = data['current']['humidity']
+                text = (
+                    f"Weather in {state}: {condition}, "
+                    f"Temperature: {temp_c}°C, Humidity: {humidity}%"
+                )
+                docs.append(Document(
+                    page_content=text,
+                    metadata={
+                        "source": "WeatherAPI",
+                        "location": state,
+                        "timestamp": datetime.utcnow().isoformat()
+                    }
+                ))
+        except Exception as e:
+            logging.error(f"Weather fetch failed for {state}: {e}")
+    return docs
+def fetch_harvestplus_articles():
+    """Fetch ALL today's articles from HarvestPlus site."""
+    try:
+        res = session.get(config.DATA_SOURCES["harvestplus"], timeout=10)
+        res.raise_for_status()
+        soup = BeautifulSoup(res.text, "html.parser")
+        articles = soup.find_all("article")
+        docs = []
+        today_str = date.today().strftime("%Y-%m-%d")
+        for a in articles:
+            content = a.get_text(strip=True)
+            if content and len(content) > 100:
+                if today_str in a.text or True:
+                    docs.append(Document(
+                        page_content=content,
+                        metadata={
+                            "source": "HarvestPlus",
+                            "timestamp": datetime.utcnow().isoformat()
+                        }
+                    ))
+        return docs
+    except Exception as e:
+        logging.error(f"HarvestPlus fetch failed: {e}")
+        return []
+def build_rag_vectorstore(reset=False):
+    job_type = "FULL REBUILD" if reset else "INCREMENTAL UPDATE"
+    logging.info(f"RAG update started — {job_type}")
+    all_docs = fetch_weather_now() + fetch_harvestplus_articles()
+    logging.info(f"Weather docs fetched: {len([d for d in all_docs if d.metadata['source'] == 'WeatherAPI'])}")
+    logging.info(f"News docs fetched: {len([d for d in all_docs if d.metadata['source'] == 'HarvestPlus'])}")
+    if not all_docs:
+        logging.warning("No documents fetched, skipping update")
+        return
+    splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
+    chunks = splitter.split_documents(all_docs)
+    embedder = SentenceTransformerEmbeddings(model_name=config.EMBEDDING_MODEL)
+    vectorstore_path = config.LIVE_VS_PATH
+    if reset and os.path.exists(vectorstore_path):
+        for file in os.listdir(vectorstore_path):
+            file_path = os.path.join(vectorstore_path, file)
+            try:
+                os.remove(file_path)
+                logging.info(f"Deleted old file: {file_path}")
+            except Exception as e:
+                logging.error(f"Failed to delete {file_path}: {e}")
+    if os.path.exists(vectorstore_path) and not reset:
+        vs = FAISS.load_local(
+            vectorstore_path,
+            embedder,
+            allow_dangerous_deserialization=True
+        )
+        vs.add_documents(chunks)
+    else:
+        vs = FAISS.from_documents(chunks, embedder)
+    os.makedirs(vectorstore_path, exist_ok=True)
+    vs.save_local(vectorstore_path)
+    logging.info(f"Vectorstore updated at {vectorstore_path}")
+def schedule_updates():
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(build_rag_vectorstore, 'interval', hours=12, kwargs={"reset": False})
+    scheduler.add_job(build_rag_vectorstore, 'interval', days=7, kwargs={"reset": True})
+    scheduler.start()
+    logging.info("Scheduler started — 12-hour incremental updates + weekly full rebuild")
+    return scheduler

app/utils/__init__.py ADDED Viewed

File without changes

app/utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (172 Bytes). View file

app/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (160 Bytes). View file

app/utils/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (1.85 kB). View file

app/utils/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (2.33 kB). View file

app/utils/config.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# farmlingua_backend/app/utils/config.py
+from pathlib import Path
+import os
+import sys
+BASE_DIR = Path(__file__).resolve().parents[2]
+if str(BASE_DIR) not in sys.path:
+    sys.path.insert(0, str(BASE_DIR))
+EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+STATIC_VS_PATH = BASE_DIR / "app" / "vectorstore" / "faiss_index"
+LIVE_VS_PATH = BASE_DIR / "app" / "vectorstore" / "live_rag_index"
+VECTORSTORE_PATH = LIVE_VS_PATH
+WEATHER_API_KEY = os.getenv("WEATHER_API_KEY", "1eefcad138134d62a1e220003252608")
+CLASSIFIER_PATH = BASE_DIR / "app" / "models" / "intent_classifier_v2.joblib"
+CLASSIFIER_CONFIDENCE_THRESHOLD = float(os.getenv("CLASSIFIER_CONFIDENCE_THRESHOLD", "0.6"))
+EXPERT_MODEL_NAME = os.getenv("EXPERT_MODEL_NAME", "Qwen/Qwen3-4B-Instruct-2507")
+#FORMATTER_MODEL_NAME = os.getenv("FORMATTER_MODEL_NAME", "google/flan-t5-large")
+LANG_ID_MODEL_REPO = os.getenv("LANG_ID_MODEL_REPO", "facebook/fasttext-language-identification")
+LANG_ID_MODEL_FILE = os.getenv("LANG_ID_MODEL_FILE", "model.bin")
+TRANSLATION_MODEL_NAME = os.getenv("TRANSLATION_MODEL_NAME", "drrobot9/nllb-ig-yo-ha-finetuned")
+DATA_SOURCES = {
+    "harvestplus": "https://agronigeria.ng/category/news/",
+}
+STATES = [
+    "Abuja", "Lagos", "Kano", "Kaduna", "Rivers", "Enugu", "Anambra", "Ogun",
+    "Oyo", "Delta", "Edo", "Katsina", "Borno", "Benue", "Niger", "Plateau",
+    "Bauchi", "Adamawa", "Cross River", "Akwa Ibom", "Ekiti", "Osun", "Ondo",
+    "Imo", "Abia", "Ebonyi", "Taraba", "Kebbi", "Zamfara", "Yobe", "Gombe",
+    "Sokoto", "Kogi", "Bayelsa", "Nasarawa", "Jigawa"
+]
+hf_cache = "/models/huggingface"
+os.environ["HF_HOME"] = hf_cache
+os.environ["TRANSFORMERS_CACHE"] = hf_cache
+os.environ["HUGGINGFACE_HUB_CACHE"] = hf_cache
+os.makedirs(hf_cache, exist_ok=True)

app/vectorstore/__init__.py ADDED Viewed

File without changes

app/vectorstore/faiss_index/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4faefcc68ae5a575b18f559e04cd2c68e166a73c4c89c9550e1794ccbf90695
+size 19648557

app/vectorstore/faiss_index/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1c75f31eab757e90e9c9771b62368c2de5dc11ed776629521fb007d8d47b84a
+size 5863908

app/vectorstore/live_rag_index/index.faiss ADDED Viewed

Binary file (70.7 kB). View file

app/vectorstore/live_rag_index/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:466653741f0cbbcbb51c817af910e5ca03c769e9009b3e3bf0f6fdcad71393b1
+size 12074

app/venv/bin/python ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddfaecd2bd157a57e1211cde4fce9bf8107d4993a131bbf4b890ae53b76554bd
+size 7901928

app/venv/bin/python3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddfaecd2bd157a57e1211cde4fce9bf8107d4993a131bbf4b890ae53b76554bd
+size 7901928

app/venv/bin/python3.11 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddfaecd2bd157a57e1211cde4fce9bf8107d4993a131bbf4b890ae53b76554bd
+size 7901928

app/venv/pyvenv.cfg ADDED Viewed

	@@ -0,0 +1,5 @@

+home = /usr/bin
+include-system-site-packages = false
+version = 3.11.13
+executable = /usr/bin/python3.11
+command = /usr/bin/python3 -m venv /content/drive/MyDrive/farmlingua_backend/app/venv

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+crewai
+langchain
+langchain-community
+faiss-cpu
+transformers
+sentence-transformers
+pydantic
+joblib
+pyyaml
+torch
+fastapi
+uvicorn
+apscheduler
+numpy<2
+requests
+beautifulsoup4
+huggingface-hub
+python-dotenv
+blobfile
+sentencepiece
+fasttext