restapitrial_vectordb

Sleeping

Redmind commited on Feb 13

Commit

6e9858c

verified ·

1 Parent(s): 16af574

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,11 +15,26 @@ client = chromadb.PersistentClient(path="/data/chroma_db")
 collection = client.get_or_create_collection(name="knowledge_base")
 pdf_file="Sutures and Suturing techniques.pdf"
 pptx_file="impalnt 1.pptx"
-process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
 collections = client.list_collections()
 print("Existing Collections:", [c.name for c in collections])
 collection = client.get_collection(name="knowledge_base")
 # Initialize models
 text_model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -30,7 +45,7 @@ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
 IMAGE_FOLDER = "/data/extracted_images"
 os.makedirs(IMAGE_FOLDER, exist_ok=True)
 @app.get("/")
 def greet_json():
@@ -134,18 +149,3 @@ def store_data(texts, image_paths):
     print("Data stored successfully!")
-### Step 8: Process and Store from Files ###
-def process_and_store(pdf_path=None, pptx_path=None):
-    texts, images = [], []
-    if pdf_path:
-        print(f"Processing PDF: {pdf_path}")
-        texts.append(extract_text_from_pdf(pdf_path))
-        images.extend(extract_images_from_pdf(pdf_path))
-    if pptx_path:
-        print(f"Processing PPTX: {pptx_path}")
-        texts.append(extract_text_from_pptx(pptx_path))
-        images.extend(extract_images_from_pptx(pptx_path))
-    store_data(texts, images)

 collection = client.get_or_create_collection(name="knowledge_base")
 pdf_file="Sutures and Suturing techniques.pdf"
 pptx_file="impalnt 1.pptx"
 collections = client.list_collections()
 print("Existing Collections:", [c.name for c in collections])
 collection = client.get_collection(name="knowledge_base")
+### Step 8: Process and Store from Files ###
+def process_and_store(pdf_path=None, pptx_path=None):
+    texts, images = [], []
+    if pdf_path:
+        print(f"Processing PDF: {pdf_path}")
+        texts.append(extract_text_from_pdf(pdf_path))
+        images.extend(extract_images_from_pdf(pdf_path))
+    if pptx_path:
+        print(f"Processing PPTX: {pptx_path}")
+        texts.append(extract_text_from_pptx(pptx_path))
+        images.extend(extract_images_from_pptx(pptx_path))
+    store_data(texts, images)
 # Initialize models
 text_model = SentenceTransformer('all-MiniLM-L6-v2')
 IMAGE_FOLDER = "/data/extracted_images"
 os.makedirs(IMAGE_FOLDER, exist_ok=True)
+process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
 @app.get("/")
 def greet_json():
     print("Data stored successfully!")