Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| import os | |
| import pandas as pd | |
| from io import StringIO | |
| import os | |
| import base64 | |
| app = FastAPI() | |
| def get_download_link_dify(df): | |
| # code to save file in dify framework | |
| import requests | |
| # API Configuration | |
| BASE_URL = "http://redmindgpt.redmindtechnologies.com:81/v1" | |
| DATASET_ID = "084ae979-d101-414b-8854-9bbf5d3a442e" | |
| API_KEY = "dataset-feqz5KrqHkFRdWbh2DInt58L" | |
| dataset_name = 'output_dataset' | |
| # Endpoint URL | |
| url = f"{BASE_URL}/datasets/{DATASET_ID}/document/create-by-file" | |
| print(url) | |
| # Headers | |
| headers = { | |
| "Authorization": f"Bearer {API_KEY}" | |
| } | |
| # Data payload (form data as a plain text string) | |
| data_payload = { | |
| "data": """ | |
| { | |
| "indexing_technique": "high_quality", | |
| "process_rule": { | |
| "rules": { | |
| "pre_processing_rules": [ | |
| {"id": "remove_extra_spaces", "enabled": true}, | |
| {"id": "remove_urls_emails", "enabled": true} | |
| ], | |
| "segmentation": { | |
| "separator": "###", | |
| "max_tokens": 500 | |
| } | |
| }, | |
| "mode": "custom" | |
| } | |
| } | |
| """ | |
| } | |
| # Convert DataFrame to binary (in-memory) | |
| file_buffer = dataframe_to_binary(df) | |
| files = { | |
| "file": ("output.xlsx", file_buffer, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") | |
| } | |
| # Send the POST request | |
| response = requests.post(url, headers=headers, data=data_payload, files=files) | |
| print(response) | |
| data = response.json() | |
| document_id = data['document']['id'] | |
| # code to get download_url | |
| url = f"http://redmindgpt.redmindtechnologies.com:81/v1/datasets/{DATASET_ID}/documents/{document_id}/upload-file" | |
| response = requests.get(url, headers=headers) | |
| print(response) | |
| download_url = response.json().get("download_url") | |
| download_url = download_url.replace("download/","") | |
| return download_url | |
| def dataframe_to_binary(df): | |
| import io | |
| # Create a BytesIO stream | |
| output = io.BytesIO() | |
| # Write the DataFrame to this in-memory buffer as an Excel file | |
| df.to_excel(output, index=False, engine="openpyxl") | |
| # Move the cursor to the beginning of the stream | |
| output.seek(0) | |
| return output | |
| # FastAPI Endpoints | |
| def greet_json(): | |
| # Run Data Processing | |
| #process_and_store(pdf_path=pdf_file, pptx_path=pptx_file) | |
| return {"Document store": "created!"} | |
| def save_file_dify(csv_data: str): | |
| # Split into lines | |
| lines = csv_data.split("\n") | |
| # Find the max number of columns | |
| max_cols = max(line.count(",") + 1 for line in lines if line.strip()) | |
| # Normalize all rows to have the same number of columns | |
| fixed_lines = [line + "," * (max_cols - line.count(",") - 1) for line in lines] | |
| # Reconstruct CSV string | |
| fixed_csv_data = "\n".join(fixed_lines) | |
| # Convert CSV string to DataFrame | |
| df = pd.read_csv(StringIO(fixed_csv_data)) | |
| #save in dify dataset and return download link | |
| download_link = get_download_link_dify(df) | |
| return download_link | |