Spaces:
Sleeping
Sleeping
| # %% | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| """ | |
| # %% | |
| import pandas as pd | |
| df = pd.read_parquet( | |
| "../raw_data/dale_carnegie/how_to_win_friends_and_influence_people.parquet" | |
| ) | |
| df.head() | |
| # %% | |
| from langchain.schema import Document | |
| documents = [] | |
| for index, row in df.iterrows(): | |
| doc = Document(page_content=row["text"]) | |
| documents.append(doc) | |
| documents | |
| # %% | |
| from autorag.utils import cast_corpus_dataset | |
| from autorag.data.corpus import langchain_documents_to_parquet | |
| corpus_df = langchain_documents_to_parquet(documents) | |
| corpus_df = cast_corpus_dataset(corpus_df) | |
| corpus_df.to_parquet("./data/corpus.parquet") | |
| # %% | |
| import nest_asyncio | |
| nest_asyncio.apply() | |
| import os | |
| from llama_index.llms.openai import OpenAI | |
| from autorag.data.qacreation import generate_qa_llama_index, make_single_content_qa | |
| llm = OpenAI( | |
| api_base=os.getenv("OPENAI_BASE_URL"), | |
| model="gpt-4o", | |
| ) | |
| qa_df = make_single_content_qa( | |
| corpus_df, | |
| content_size=49, | |
| qa_creation_func=generate_qa_llama_index, | |
| llm=llm, | |
| question_num_per_content=1, | |
| ) | |
| qa_df.to_parquet("./data/qa.parquet") | |
| """ | |
| # %% | |
| import nest_asyncio | |
| nest_asyncio.apply() | |
| import autorag as ag | |
| from autorag.evaluator import Evaluator | |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
| ag.embedding_models["huggingface_baai_llm_embedder"] = HuggingFaceEmbedding( | |
| "BAAI/llm-embedder" | |
| ) | |
| ag.embedding_models["huggingface_baai_bge_large_en"] = HuggingFaceEmbedding( | |
| "BAAI/bge-large-en-v1.5" | |
| ) | |
| ag.embedding_models["huggingface_baai_bge_base_en"] = HuggingFaceEmbedding( | |
| "BAAI/bge-base-en-v1.5" | |
| ) | |
| ag.embedding_models["huggingface_baai_bge_small_en"] = HuggingFaceEmbedding( | |
| "BAAI/bge-small-en-v1.5" | |
| ) | |
| ag.embedding_models["huggingface_baai_bge_m3"] = HuggingFaceEmbedding("BAAI/bge-m3") | |
| evaluator = Evaluator( | |
| qa_data_path="./data/qa.parquet", | |
| corpus_data_path="./data/corpus.parquet", | |
| project_dir="./benchmark", | |
| ) | |
| evaluator.start_trial("./config/config_small.yaml") | |
| # %% | |