Spaces:
Runtime error
Runtime error
| import json | |
| import uuid | |
| from langchain.text_splitter import CharacterTextSplitter | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size=3000, chunk_overlap=0 | |
| ) | |
| def generate_uuid(): | |
| return str(uuid.uuid4()) | |
| def check_id_extis_in_json(file_id): | |
| with open('file_ids.json', 'r') as f: | |
| file_ids = json.load(f) | |
| if file_id in file_ids: | |
| return True | |
| else: | |
| return False | |
| def compare_paper_ids(data, paper_ids): | |
| existing_dois = {item['doi_no'] for item in data} | |
| missing_paper_ids = [paper_id for paper_id in paper_ids if paper_id not in existing_dois] | |
| return missing_paper_ids | |
| def extract_json_from_text(text): | |
| text = str(text) | |
| # print("text",text) | |
| try: | |
| # Find the JSON part within the text | |
| start_index = text.find('{') | |
| end_index = text.rfind('}') + 1 | |
| json_part = text[start_index:end_index] | |
| json_part = json.loads(json_part.lower()) | |
| print("json",type(json_part)) | |
| print(json_part) | |
| return json_part.get('data', []) | |
| except Exception as e: | |
| print(f"\033[31m Exception occurred while loading JSON: {str(e)} [0m") | |
| return text |