| from llama_parse import LlamaParse | |
| from dotenv import load_dotenv | |
| import os | |
| import streamlit as st | |
| load_dotenv() | |
| LLAMA_PARSE = os.getenv('LLAMA_PARSE') | |
| parser = LlamaParse( | |
| api_key = LLAMA_PARSE, | |
| result_type="text", # "markdown" and "text" are available | |
| num_workers=4, # if multiple files passed, split in `num_workers` API calls | |
| verbose=True, | |
| language="en" # Optionaly you can define a language, default=en | |
| ) | |
| def extract_text(pdf_path): | |
| documents = parser.load_data(pdf_path) | |
| all_text = "" | |
| for document in documents: | |
| all_text += document.text + '\n' | |
| return all_text.strip() # Remove the trailing newline character | |
| # combined_text = extract_text("/app/Non_form_pdfs/chapter-17-web-designing2.pdf") | |
| # print(combined_text) |