Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,11 +15,26 @@ client = chromadb.PersistentClient(path="/data/chroma_db")
|
|
| 15 |
collection = client.get_or_create_collection(name="knowledge_base")
|
| 16 |
pdf_file="Sutures and Suturing techniques.pdf"
|
| 17 |
pptx_file="impalnt 1.pptx"
|
| 18 |
-
|
| 19 |
collections = client.list_collections()
|
| 20 |
|
| 21 |
print("Existing Collections:", [c.name for c in collections])
|
| 22 |
collection = client.get_collection(name="knowledge_base")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Initialize models
|
| 25 |
text_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
@@ -30,7 +45,7 @@ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
|
| 30 |
IMAGE_FOLDER = "/data/extracted_images"
|
| 31 |
os.makedirs(IMAGE_FOLDER, exist_ok=True)
|
| 32 |
|
| 33 |
-
|
| 34 |
@app.get("/")
|
| 35 |
def greet_json():
|
| 36 |
|
|
@@ -134,18 +149,3 @@ def store_data(texts, image_paths):
|
|
| 134 |
print("Data stored successfully!")
|
| 135 |
|
| 136 |
|
| 137 |
-
### Step 8: Process and Store from Files ###
|
| 138 |
-
def process_and_store(pdf_path=None, pptx_path=None):
|
| 139 |
-
texts, images = [], []
|
| 140 |
-
|
| 141 |
-
if pdf_path:
|
| 142 |
-
print(f"Processing PDF: {pdf_path}")
|
| 143 |
-
texts.append(extract_text_from_pdf(pdf_path))
|
| 144 |
-
images.extend(extract_images_from_pdf(pdf_path))
|
| 145 |
-
|
| 146 |
-
if pptx_path:
|
| 147 |
-
print(f"Processing PPTX: {pptx_path}")
|
| 148 |
-
texts.append(extract_text_from_pptx(pptx_path))
|
| 149 |
-
images.extend(extract_images_from_pptx(pptx_path))
|
| 150 |
-
|
| 151 |
-
store_data(texts, images)
|
|
|
|
| 15 |
collection = client.get_or_create_collection(name="knowledge_base")
|
| 16 |
pdf_file="Sutures and Suturing techniques.pdf"
|
| 17 |
pptx_file="impalnt 1.pptx"
|
| 18 |
+
|
| 19 |
collections = client.list_collections()
|
| 20 |
|
| 21 |
print("Existing Collections:", [c.name for c in collections])
|
| 22 |
collection = client.get_collection(name="knowledge_base")
|
| 23 |
+
### Step 8: Process and Store from Files ###
|
| 24 |
+
def process_and_store(pdf_path=None, pptx_path=None):
|
| 25 |
+
texts, images = [], []
|
| 26 |
+
|
| 27 |
+
if pdf_path:
|
| 28 |
+
print(f"Processing PDF: {pdf_path}")
|
| 29 |
+
texts.append(extract_text_from_pdf(pdf_path))
|
| 30 |
+
images.extend(extract_images_from_pdf(pdf_path))
|
| 31 |
+
|
| 32 |
+
if pptx_path:
|
| 33 |
+
print(f"Processing PPTX: {pptx_path}")
|
| 34 |
+
texts.append(extract_text_from_pptx(pptx_path))
|
| 35 |
+
images.extend(extract_images_from_pptx(pptx_path))
|
| 36 |
+
|
| 37 |
+
store_data(texts, images)
|
| 38 |
|
| 39 |
# Initialize models
|
| 40 |
text_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
| 45 |
IMAGE_FOLDER = "/data/extracted_images"
|
| 46 |
os.makedirs(IMAGE_FOLDER, exist_ok=True)
|
| 47 |
|
| 48 |
+
process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
|
| 49 |
@app.get("/")
|
| 50 |
def greet_json():
|
| 51 |
|
|
|
|
| 149 |
print("Data stored successfully!")
|
| 150 |
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|