Spaces:
Sleeping
Sleeping
| import os | |
| import feedparser | |
| import urllib.parse | |
| def get_arxiv_abstract_by_title(title): | |
| query_title = urllib.parse.quote(f'"{title}"') | |
| url = f"http://export.arxiv.org/api/query?search_query=ti:{query_title}&max_results=1" | |
| feed = feedparser.parse(url) | |
| if feed.entries: | |
| return feed.entries[0].summary | |
| return "No abstract found" | |
| def main(): | |
| d = os.path.dirname(os.path.abspath(__file__)) | |
| inp = os.path.join(d, 'arxiv_downloads') | |
| out = os.path.join(d, 'abstract_survey') | |
| if not os.path.exists(out): | |
| os.makedirs(out) | |
| for folder_name in os.listdir(inp): | |
| fp = os.path.join(inp, folder_name) | |
| if not os.path.isdir(fp): | |
| continue | |
| md = os.path.join(out, f"{folder_name}.md") | |
| data = [] | |
| pdfs = sorted([x for x in os.listdir(fp) if x.lower().endswith('.pdf')]) | |
| for i, pdf in enumerate(pdfs, 1): | |
| title = os.path.splitext(pdf)[0] | |
| abs_text = get_arxiv_abstract_by_title(title) | |
| data.append(f"{i}: {pdf}\n{abs_text}\n") | |
| with open(md, 'w', encoding='utf-8') as f: | |
| f.write(f"# {folder_name} Abstracts\n\n") | |
| for line in data: | |
| f.write(line + "\n") | |
| if __name__ == "__main__": | |
| main() | |