Spaces:
Build error
Build error
trend
Browse files- arxiv_agent.py +84 -65
arxiv_agent.py
CHANGED
|
@@ -110,8 +110,21 @@ def dailyDownload(agent_ls):
|
|
| 110 |
agent.paper_embedding = update_paper_file
|
| 111 |
print("Today is " + agent.newest_day.strftime("%m/%d/%Y"))
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
class ArxivAgent:
|
|
@@ -119,6 +132,8 @@ class ArxivAgent:
|
|
| 119 |
|
| 120 |
self.dataset_path = "./dataset/paper.json"
|
| 121 |
self.thought_path = "./dataset/thought.json"
|
|
|
|
|
|
|
| 122 |
|
| 123 |
self.embedding_path = "./dataset/paper_embedding.pkl"
|
| 124 |
self.thought_embedding_path = './dataset/thought_embedding.pkl'
|
|
@@ -127,30 +142,24 @@ class ArxivAgent:
|
|
| 127 |
self.today = datetime.datetime.now().strftime("%m/%d/%Y")
|
| 128 |
|
| 129 |
self.newest_day = ""
|
| 130 |
-
self.
|
| 131 |
-
|
| 132 |
self.download()
|
| 133 |
try:
|
| 134 |
thread6.run_threaded(dailyDownload, [self])
|
| 135 |
-
|
| 136 |
except:
|
| 137 |
print("Error: unable to start thread")
|
| 138 |
-
|
| 139 |
-
# self.paper_by_date = self.paper
|
| 140 |
def edit_profile(self, profile, author_name):
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
data = json.load(file)
|
| 145 |
-
data[author_name]=profile
|
| 146 |
-
with open(filename, "w") as f:
|
| 147 |
-
json.dump(data, f)
|
| 148 |
return "Successfully edit profile!"
|
| 149 |
|
| 150 |
def get_profile(self, author_name):
|
| 151 |
if author_name == "": return None
|
| 152 |
-
|
| 153 |
-
# pdb.set_trace()
|
| 154 |
profile = self.get_arxiv_data_by_author(author_name)
|
| 155 |
return profile
|
| 156 |
def select_date(self, method, profile_input):
|
|
@@ -186,16 +195,40 @@ class ArxivAgent:
|
|
| 186 |
data_chunk_embedding=chunk_embedding_date
|
| 187 |
profile = profile_input
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
# import pdb
|
| 193 |
# pdb.set_trace()
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
if key_update not in self.thought:
|
| 200 |
self.thought[key_update] = []
|
| 201 |
if key_update not in self.thought_embedding:
|
|
@@ -205,22 +238,11 @@ class ArxivAgent:
|
|
| 205 |
self.thought_embedding[key_update].append(get_bert_embedding([trend])[0])
|
| 206 |
self.thought[key_update].append(idea[0])
|
| 207 |
self.thought_embedding[key_update].append(get_bert_embedding([idea])[0])
|
| 208 |
-
# with open(self.dataset_path, "w") as f_:
|
| 209 |
-
# json.dump(self.paper, f_)
|
| 210 |
-
|
| 211 |
-
with open(self.thought_path, "w") as f_:
|
| 212 |
-
json.dump(self.thought, f_)
|
| 213 |
-
|
| 214 |
-
with open(self.thought_embedding_path, "wb") as f:
|
| 215 |
-
pickle.dump(self.thought_embedding, f)
|
| 216 |
-
|
| 217 |
|
| 218 |
return trend, reference, idea
|
| 219 |
|
| 220 |
def response(self, data, profile_input):
|
| 221 |
-
# dataset = self.paper_by_date
|
| 222 |
|
| 223 |
-
# dataset = self.paper
|
| 224 |
query = [data]
|
| 225 |
profile = profile_input
|
| 226 |
|
|
@@ -315,7 +337,7 @@ class ArxivAgent:
|
|
| 315 |
|
| 316 |
|
| 317 |
|
| 318 |
-
def
|
| 319 |
filename = self.feedback_path
|
| 320 |
|
| 321 |
if os.path.exists(filename):
|
|
@@ -330,13 +352,35 @@ class ArxivAgent:
|
|
| 330 |
m = {}
|
| 331 |
self.feedback = m.copy()
|
| 332 |
|
| 333 |
-
|
| 334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
|
| 336 |
-
def load_thought(self):
|
| 337 |
filename = self.thought_path
|
| 338 |
filename_emb = self.thought_embedding_path
|
| 339 |
-
|
| 340 |
if os.path.exists(filename):
|
| 341 |
with open(filename,"rb") as f:
|
| 342 |
content = f.read()
|
|
@@ -348,7 +392,6 @@ class ArxivAgent:
|
|
| 348 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
| 349 |
m = {}
|
| 350 |
|
| 351 |
-
|
| 352 |
if os.path.exists(filename_emb):
|
| 353 |
with open(filename_emb,"rb") as f:
|
| 354 |
content = f.read()
|
|
@@ -366,12 +409,7 @@ class ArxivAgent:
|
|
| 366 |
|
| 367 |
|
| 368 |
|
| 369 |
-
|
| 370 |
-
# papers = data[time]['abstract']
|
| 371 |
-
# papers_embedding=get_bert_embedding(papers)
|
| 372 |
-
# time_chunks_embed[time.strftime("%m/%d/%Y")] = papers_embedding
|
| 373 |
-
# return
|
| 374 |
-
# for k in json_data.keys():
|
| 375 |
def update_feedback_thought(self, query, ansA, ansB, feedbackA, feedbackB):
|
| 376 |
try:
|
| 377 |
thread6.run_threaded(feedback_thought, [self, query, ansA, ansB, feedbackA, feedbackB])
|
|
@@ -413,22 +451,7 @@ class ArxivAgent:
|
|
| 413 |
|
| 414 |
|
| 415 |
|
| 416 |
-
|
| 417 |
-
if os.path.exists(filename):
|
| 418 |
-
with open(filename,"r") as f:
|
| 419 |
-
content = f.read()
|
| 420 |
-
if not content:
|
| 421 |
-
m = {}
|
| 422 |
-
else:
|
| 423 |
-
m = json.loads(content)
|
| 424 |
-
else:
|
| 425 |
-
with open(filename, mode='w', encoding='utf-8') as ff:
|
| 426 |
-
m = {}
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
json_data = m.copy()
|
| 430 |
-
|
| 431 |
-
if author_name in json_data: return json_data[author_name]
|
| 432 |
|
| 433 |
author_query = author_name.replace(" ", "+")
|
| 434 |
url = f"http://export.arxiv.org/api/query?search_query=au:{author_query}&start=0&max_results=300" # Adjust max_results if needed
|
|
@@ -512,15 +535,11 @@ class ArxivAgent:
|
|
| 512 |
# pdb.set_trace()
|
| 513 |
personal_info = "; ".join([f"{details['Title & Abstract']}" for details in papers_list])
|
| 514 |
info = summarize_research_direction(personal_info)
|
| 515 |
-
|
| 516 |
-
with open(filename,"w") as f:
|
| 517 |
-
json.dump(json_data,f)
|
| 518 |
-
return json_data[author_name]
|
| 519 |
|
| 520 |
-
|
| 521 |
|
| 522 |
else:
|
| 523 |
-
# print("Failed to fetch data from arXiv.")
|
| 524 |
return None
|
| 525 |
|
| 526 |
|
|
|
|
| 110 |
agent.paper_embedding = update_paper_file
|
| 111 |
print("Today is " + agent.newest_day.strftime("%m/%d/%Y"))
|
| 112 |
|
| 113 |
+
def dailySave(agent_ls):
|
| 114 |
+
agent = agent_ls[0]
|
| 115 |
+
while True:
|
| 116 |
+
time.sleep(DAY_TIME)
|
| 117 |
+
with open(agent.trend_idea_path, "w") as f_:
|
| 118 |
+
json.dump(agent.trend_idea, f_)
|
| 119 |
+
|
| 120 |
+
with open(agent.thought_path, "w") as f_:
|
| 121 |
+
json.dump(agent.thought, f_)
|
| 122 |
|
| 123 |
+
with open(agent.thought_embedding_path, "wb") as f:
|
| 124 |
+
pickle.dump(agent.thought_embedding, f)
|
| 125 |
+
|
| 126 |
+
with open(agent.profile_path,"w") as f:
|
| 127 |
+
json.dump(agent.profile,f)
|
| 128 |
|
| 129 |
|
| 130 |
class ArxivAgent:
|
|
|
|
| 132 |
|
| 133 |
self.dataset_path = "./dataset/paper.json"
|
| 134 |
self.thought_path = "./dataset/thought.json"
|
| 135 |
+
self.trend_idea_path = "./dataset/trend_idea.json"
|
| 136 |
+
self.profile_path = "./dataset/profile.json"
|
| 137 |
|
| 138 |
self.embedding_path = "./dataset/paper_embedding.pkl"
|
| 139 |
self.thought_embedding_path = './dataset/thought_embedding.pkl'
|
|
|
|
| 142 |
self.today = datetime.datetime.now().strftime("%m/%d/%Y")
|
| 143 |
|
| 144 |
self.newest_day = ""
|
| 145 |
+
self.load_cache()
|
| 146 |
+
|
| 147 |
self.download()
|
| 148 |
try:
|
| 149 |
thread6.run_threaded(dailyDownload, [self])
|
| 150 |
+
thread6.run_threaded(dailySave, [self])
|
| 151 |
except:
|
| 152 |
print("Error: unable to start thread")
|
| 153 |
+
|
|
|
|
| 154 |
def edit_profile(self, profile, author_name):
|
| 155 |
+
|
| 156 |
+
self.profile[author_name]=profile
|
| 157 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
return "Successfully edit profile!"
|
| 159 |
|
| 160 |
def get_profile(self, author_name):
|
| 161 |
if author_name == "": return None
|
| 162 |
+
|
|
|
|
| 163 |
profile = self.get_arxiv_data_by_author(author_name)
|
| 164 |
return profile
|
| 165 |
def select_date(self, method, profile_input):
|
|
|
|
| 195 |
data_chunk_embedding=chunk_embedding_date
|
| 196 |
profile = profile_input
|
| 197 |
|
| 198 |
+
key_update = list(self.paper.keys())[-1]
|
| 199 |
+
isQuery = False
|
| 200 |
+
if profile in self.trend_idea:
|
| 201 |
+
if key_update in self.trend_idea[profile]:
|
| 202 |
+
if method in self.trend_idea[profile][key_update]:
|
| 203 |
+
trend = self.trend_idea[profile][key_update][method]["trend"]
|
| 204 |
+
reference = self.trend_idea[profile][key_update][method]["reference"]
|
| 205 |
+
idea = self.trend_idea[profile][key_update][method]["idea"]
|
| 206 |
+
isQuery = True
|
| 207 |
|
| 208 |
# import pdb
|
| 209 |
# pdb.set_trace()
|
| 210 |
+
if not(isQuery):
|
| 211 |
+
trend, paper_link = summarize_research_field(profile, "Machine Learning", dataset,data_chunk_embedding) # trend
|
| 212 |
+
reference = papertitleAndLink(paper_link)
|
| 213 |
+
idea = generate_ideas(trend) # idea
|
| 214 |
+
if profile in self.trend_idea:
|
| 215 |
+
if key_update in self.trend_idea[profile]:
|
| 216 |
+
if not(method in self.trend_idea[profile][key_update]):
|
| 217 |
+
self.trend_idea[profile][key_update][method] = {}
|
| 218 |
+
else:
|
| 219 |
+
self.trend_idea[profile][key_update] = {}
|
| 220 |
+
self.trend_idea[profile][key_update][method] = {}
|
| 221 |
+
else:
|
| 222 |
+
self.trend_idea[profile] = {}
|
| 223 |
+
self.trend_idea[profile][key_update] = {}
|
| 224 |
+
self.trend_idea[profile][key_update][method] = {}
|
| 225 |
|
| 226 |
+
self.trend_idea[profile][key_update][method]["trend"] = trend
|
| 227 |
+
self.trend_idea[profile][key_update][method]["reference"] = reference
|
| 228 |
+
self.trend_idea[profile][key_update][method]["idea"] = idea
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
|
| 232 |
if key_update not in self.thought:
|
| 233 |
self.thought[key_update] = []
|
| 234 |
if key_update not in self.thought_embedding:
|
|
|
|
| 238 |
self.thought_embedding[key_update].append(get_bert_embedding([trend])[0])
|
| 239 |
self.thought[key_update].append(idea[0])
|
| 240 |
self.thought_embedding[key_update].append(get_bert_embedding([idea])[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
return trend, reference, idea
|
| 243 |
|
| 244 |
def response(self, data, profile_input):
|
|
|
|
| 245 |
|
|
|
|
| 246 |
query = [data]
|
| 247 |
profile = profile_input
|
| 248 |
|
|
|
|
| 337 |
|
| 338 |
|
| 339 |
|
| 340 |
+
def load_cache(self):
|
| 341 |
filename = self.feedback_path
|
| 342 |
|
| 343 |
if os.path.exists(filename):
|
|
|
|
| 352 |
m = {}
|
| 353 |
self.feedback = m.copy()
|
| 354 |
|
| 355 |
+
filename = self.trend_idea_path
|
| 356 |
|
| 357 |
+
if os.path.exists(filename):
|
| 358 |
+
with open(filename,"rb") as f:
|
| 359 |
+
content = f.read()
|
| 360 |
+
if not content:
|
| 361 |
+
m = {}
|
| 362 |
+
else:
|
| 363 |
+
m = json.loads(content)
|
| 364 |
+
else:
|
| 365 |
+
with open(filename, mode='w', encoding='utf-8') as ff:
|
| 366 |
+
m = {}
|
| 367 |
+
self.trend_idea = m.copy()
|
| 368 |
+
|
| 369 |
+
filename = self.profile_path
|
| 370 |
+
if os.path.exists(filename):
|
| 371 |
+
with open(filename,"rb") as f:
|
| 372 |
+
content = f.read()
|
| 373 |
+
if not content:
|
| 374 |
+
m = {}
|
| 375 |
+
else:
|
| 376 |
+
m = json.loads(content)
|
| 377 |
+
else:
|
| 378 |
+
with open(filename, mode='w', encoding='utf-8') as ff:
|
| 379 |
+
m = {}
|
| 380 |
+
self.profile = m.copy()
|
| 381 |
|
|
|
|
| 382 |
filename = self.thought_path
|
| 383 |
filename_emb = self.thought_embedding_path
|
|
|
|
| 384 |
if os.path.exists(filename):
|
| 385 |
with open(filename,"rb") as f:
|
| 386 |
content = f.read()
|
|
|
|
| 392 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
| 393 |
m = {}
|
| 394 |
|
|
|
|
| 395 |
if os.path.exists(filename_emb):
|
| 396 |
with open(filename_emb,"rb") as f:
|
| 397 |
content = f.read()
|
|
|
|
| 409 |
|
| 410 |
|
| 411 |
|
| 412 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
def update_feedback_thought(self, query, ansA, ansB, feedbackA, feedbackB):
|
| 414 |
try:
|
| 415 |
thread6.run_threaded(feedback_thought, [self, query, ansA, ansB, feedbackA, feedbackB])
|
|
|
|
| 451 |
|
| 452 |
|
| 453 |
|
| 454 |
+
if author_name in self.profile: return self.profile[author_name]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
|
| 456 |
author_query = author_name.replace(" ", "+")
|
| 457 |
url = f"http://export.arxiv.org/api/query?search_query=au:{author_query}&start=0&max_results=300" # Adjust max_results if needed
|
|
|
|
| 535 |
# pdb.set_trace()
|
| 536 |
personal_info = "; ".join([f"{details['Title & Abstract']}" for details in papers_list])
|
| 537 |
info = summarize_research_direction(personal_info)
|
| 538 |
+
self.profile[author_name] = info
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
+
return self.profile[author_name]
|
| 541 |
|
| 542 |
else:
|
|
|
|
| 543 |
return None
|
| 544 |
|
| 545 |
|