Spaces:

somosnlp
/

SpanishMedicaLLM

Runtime error

inoid commited on Mar 29, 2024

Commit

b802fa4

1 Parent(s): 450e36d

Use environement variables with os.environ function

Files changed (2) hide show

app.py CHANGED Viewed

@@ -40,6 +40,7 @@ def evaluate_model():
     return(f"Evaluate Model {os.environ.get('HF_LLM_MODEL_ID')} from dataset {os.environ.get('HF_LLM_DATASET_ID')}")
 def train_model(*inputs):
     if "IS_SHARED_UI" in os.environ:
         raise gr.Error("This Space only works in duplicated instances")

     return(f"Evaluate Model {os.environ.get('HF_LLM_MODEL_ID')} from dataset {os.environ.get('HF_LLM_DATASET_ID')}")
 def train_model(*inputs):
     if "IS_SHARED_UI" in os.environ:
         raise gr.Error("This Space only works in duplicated instances")

spanish_medica_llm.py CHANGED Viewed

@@ -518,7 +518,8 @@ def configAndRunTraining(basemodel, dataset, eval_dataset, tokenizer):
                 save_steps = 50,                # Save checkpoints every 50 steps
                 evaluation_strategy = "steps", # Evaluate the model every logging step
                 eval_steps = 50,               # Evaluate and save checkpoints every 50 steps
-                do_eval = True,                # Perform evaluation at the end of training
                 run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" ,         # Name of the W&B run (optional)
                 fp16=True,  #Set for GPU T4 for more powerful GPU as G-100 or another change to false and bf16 parameter
                 bf16=False
@@ -534,12 +535,17 @@ def configAndRunTraining(basemodel, dataset, eval_dataset, tokenizer):
         basemodel.config.use_cache = False  # silence the warnings. Please re-enable for inference!
         trainer.train()
         trainer.push_to_hub()
 def run_training_process():
     #Loggin to Huggin Face
     login(token = os.environ.get('HG_FACE_TOKEN'))
     tokenizer = loadSpanishTokenizer()
     medicalSpanishDataset =  loadSpanishDataset()
     train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid(

                 save_steps = 50,                # Save checkpoints every 50 steps
                 evaluation_strategy = "steps", # Evaluate the model every logging step
                 eval_steps = 50,               # Evaluate and save checkpoints every 50 steps
+                do_eval = True,                # Perform evaluation at the end of training
+                report_to = None,           # Comment this out if you don't want to use weights & baises
                 run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" ,         # Name of the W&B run (optional)
                 fp16=True,  #Set for GPU T4 for more powerful GPU as G-100 or another change to false and bf16 parameter
                 bf16=False
         basemodel.config.use_cache = False  # silence the warnings. Please re-enable for inference!
         trainer.train()
         trainer.push_to_hub()
 def run_training_process():
     #Loggin to Huggin Face
     login(token = os.environ.get('HG_FACE_TOKEN'))
+    os.environ['WANDB_DISABLED'] = 'true'
     tokenizer = loadSpanishTokenizer()
     medicalSpanishDataset =  loadSpanishDataset()
     train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid(