bhardwaj08sarthak commited on
Commit
0ee77d5
·
verified ·
1 Parent(s): 71fa3f5

Update all_datasets.py

Browse files
Files changed (1) hide show
  1. all_datasets.py +19 -18
all_datasets.py CHANGED
@@ -1,18 +1,19 @@
1
- #%%
2
- from datasets import load_dataset
3
- import pandas as pd
4
- import os
5
- os.chdir(os.path.dirname(__file__))
6
- clean_math = pd.read_json(
7
- "deepmind_math.jsonl",
8
- lines=True,
9
- orient="records"
10
- )
11
- GSM8k = load_dataset('openai/gsm8k','main', split= 'train')
12
- MMMLU = load_dataset('cais/mmlu', 'college_mathematics', split='test+validation')
13
- MMMU = load_dataset('MMMU/MMMU', 'Math', split='test+validation')
14
- Olympiad_math = load_dataset('Hothan/OlympiadBench', 'TP_TO_maths_en_COMP', split='train')
15
- Olympiad_math2 = load_dataset('Hothan/OlympiadBench', 'OE_TO_maths_en_COMP', split='train')
16
- ScienceQA = load_dataset("derek-thomas/ScienceQA", split="train")
17
- PubmedQA = load_dataset('qiaojin/PubMedQA','pqa_unlabeled', split='train')
18
- # %%
 
 
1
+ #%%
2
+ from datasets import load_dataset
3
+ import pandas as pd
4
+ from pathlib import Path
5
+ BASE_DIR = Path(__file__).resolve().parent
6
+ JSONL_PATH = BASE_DIR / "deepmind_math.jsonl"
7
+ clean_math = pd.read_json(
8
+ JSONL_PATH,
9
+ lines=True,
10
+ orient="records"
11
+ )
12
+ GSM8k = load_dataset('openai/gsm8k','main', split= 'train')
13
+ MMMLU = load_dataset('cais/mmlu', 'college_mathematics', split='test+validation')
14
+ MMMU = load_dataset('MMMU/MMMU', 'Math', split='test+validation')
15
+ Olympiad_math = load_dataset('Hothan/OlympiadBench', 'TP_TO_maths_en_COMP', split='train')
16
+ Olympiad_math2 = load_dataset('Hothan/OlympiadBench', 'OE_TO_maths_en_COMP', split='train')
17
+ ScienceQA = load_dataset("derek-thomas/ScienceQA", split="train")
18
+ PubmedQA = load_dataset('qiaojin/PubMedQA','pqa_unlabeled', split='train')
19
+ # %%