| #SBATCH --time=1:00:00 # walltime. hours:minutes:seconds | |
| #SBATCH --ntasks=8 # number of processor cores (i.e. tasks) | |
| #SBATCH --nodes=1 # number of nodes | |
| #SBATCH --gpus=1 | |
| #SBATCH --mem=80G # 164G memory per CPU core | |
| #SBATCH [email protected] # email address | |
| #SBATCH --mail-type=BEGIN | |
| #SBATCH --mail-type=END | |
| #SBATCH --mail-type=FAIL | |
| #SBATCH --qos=cs | |
| #SBATCH --partition=cs | |
| # some helpful debugging options | |
| set -e | |
| set -u | |
| # LOAD MODULES, INSERT CODE, AND RUN YOUR PROGRAMS HERE | |
| # module load python/3.11 | |
| source ./mse_env/Scripts/activate | |
| # json config = "max_samples": 500, | |
| # python mse_text_img_process.py | |
| # python convert_mse.py | |
| # pip install jsonlines | |
| # pip install deepeval | |
| NUM_TEST_CASES=100 | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --test f --shot 0 --out_file metric_test_orig_100_f.txt | |
| # echo "Test case faithfulness finished" | |
| NUM_SHOT=0 | |
| # set DEEPEVAL_RESULTS_FOLDER=.\data | |
| python mse_ollama_timer.py | |
| echo "Test time calculated" | |
| # deepeval set-local-model --model-name Hudson/llemma:7b | |
| # ollama pull Hudson/llemma:7b | |
| # deepeval set-ollama Hudson/llemma:7b | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_ar" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_0_shot_100_ar.txt | |
| # echo "Test case answer relevancy finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_crec" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_0_shot_100_crec.txt | |
| # echo "Test case contexual recall finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_0_shot_100_cp" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_0_shot_100_cp.txt | |
| # echo "Test case contextual precision finished" | |
| NUM_SHOT=1 | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_ar" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_1_shot_100_ar.txt | |
| # echo "Test case answer relevancy finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_crec" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_1_shot_100_crec.txt | |
| # echo "Test case contexual recall finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_1_shot_100_cp" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_1_shot_100_cp.txt | |
| # echo "Test case contextual precision finished" | |
| NUM_SHOT=5 | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_ar" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #--out_file metric_test_5_shot_100_ar.txt | |
| # echo "Test case answer relevancy finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_crec" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #--out_file metric_test_5_shot_100_crec.txt | |
| # echo "Test case contexual recall finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_5_shot_100_cp" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT #--out_file metric_test_5_shot_100_cp.txt | |
| # echo "Test case contextual precision finished" | |
| # python mse_ollama_run.py --num 25 --begin 0 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_cp.txt | |
| # echo "Test case contextual precision finished" | |
| # python mse_ollama_run.py --num 25 --begin 25 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b25_cp.txt | |
| # echo "Test case contextual precision finished (start 25)" | |
| # python mse_ollama_run.py --num 25 --begin 50 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b50_cp.txt | |
| # echo "Test case contextual precision finished (start 50)" | |
| # python mse_ollama_run.py --num 25 --begin 75 --test cp --shot $NUM_SHOT --out_file metric_test_5_shot_25_b75_cp.txt | |
| # echo "Test case contextual precision finished (start 75)" | |
| NUM_SHOT=10 | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_ar" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT -out_file metric_test_10_shot_100_ar.txt | |
| # echo "Test case answer relevancy finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_crec" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT -out_file metric_test_10_shot_100_crec.txt | |
| # echo "Test case contexual recall finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="./metric_test_10_shot_100_cp" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT -out_file metric_test_10_shot_100_cp.txt | |
| # echo "Test case contextual precision finished" | |
| # finetuned | |
| NUM_SHOT=0 | |
| # export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_ar" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test ar --shot $NUM_SHOT #> metric_test_ft_100_ar.txt | |
| # echo "Test case answer relevancy finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_crec" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test crec --shot $NUM_SHOT #> metric_test_ft_100_crec.txt | |
| # echo "Test case contexual recall finished" | |
| # export DEEPEVAL_RESULTS_FOLDER="metric_test_ft_100_cp" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --begin 0 --test cp --shot $NUM_SHOT > metric_test_ft_100_cp.txt | |
| # echo "Test case contextual precision finished" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --test crel --out_file metric_test_orig_100_crel.txt | |
| # echo "Test case contextual relevancy finished" | |
| # python mse_ollama_run.py --num $NUM_TEST_CASES --test f --out_file metric_test_orig_100_f.txt | |
| # echo "Test case faithfulness finished" | |
| # python mse_jsonl_resize.py | |
| # python finetune.py | |
| # echo "Original Llemma Model" | |
| # echo "Processing 0 shot 100 test cases" | |
| # CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 0 --dataset mse_llemma_orig_100_case_0_shot | |
| # echo "Processing 1 shot 100 test cases" | |
| # CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 1 --dataset mse_llemma_orig_100_case_1_shot | |
| # echo "Processing 5 shot 100 test cases" | |
| # CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 5 --dataset mse_llemma_orig_100_case_5_shot | |
| # echo "Processing 10 shot 100 test cases" | |
| # CUDA_VISIBLE_DEVICES=0 python mse_deepeval_dataset.py --num 100 --shot 10 --dataset mse_llemma_orig_100_case_10_shot |