Spaces:

calm-ai
/

DocQA

Sleeping

DocQA / non_form_llama_parse.py

new rag approach

c523dd3 over 1 year ago

785 Bytes

	from llama_parse import LlamaParse
	from dotenv import load_dotenv
	import os
	import streamlit as st

	load_dotenv()
	LLAMA_PARSE = os.getenv('LLAMA_PARSE')

	parser = LlamaParse(
	api_key = LLAMA_PARSE,
	result_type="text", # "markdown" and "text" are available
	num_workers=4, # if multiple files passed, split in `num_workers` API calls
	verbose=True,
	language="en" # Optionaly you can define a language, default=en
	)

	@st.cache_data
	def extract_text(pdf_path):
	documents = parser.load_data(pdf_path)
	all_text = ""
	for document in documents:
	all_text += document.text + '\n'
	return all_text.strip() # Remove the trailing newline character

	# combined_text = extract_text("/app/Non_form_pdfs/chapter-17-web-designing2.pdf")
	# print(combined_text)