Spaces:
Runtime error
Runtime error
Commit
·
8647e3b
1
Parent(s):
a70d6a8
refactor: classifier training
Browse files- application_pages/train_classifier.py +1 -1
- guardrails_genie/guardrails/entity_recognition/pii_examples/pii_benchmark_weave.py +1 -1
- guardrails_genie/guardrails/injection/classifier_guardrail.py +1 -2
- guardrails_genie/{train_classifier.py → train/train_classifier.py} +2 -39
- guardrails_genie/utils.py +43 -0
application_pages/train_classifier.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
| 3 |
import streamlit as st
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
-
from guardrails_genie.train_classifier import train_binary_classifier
|
| 7 |
|
| 8 |
|
| 9 |
def initialize_session_state():
|
|
|
|
| 3 |
import streamlit as st
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
+
from guardrails_genie.train.train_classifier import train_binary_classifier
|
| 7 |
|
| 8 |
|
| 9 |
def initialize_session_state():
|
guardrails_genie/guardrails/entity_recognition/pii_examples/pii_benchmark_weave.py
CHANGED
|
@@ -362,7 +362,7 @@ def main():
|
|
| 362 |
preprocess_model_input=preprocess_model_input,
|
| 363 |
)
|
| 364 |
|
| 365 |
-
|
| 366 |
|
| 367 |
|
| 368 |
if __name__ == "__main__":
|
|
|
|
| 362 |
preprocess_model_input=preprocess_model_input,
|
| 363 |
)
|
| 364 |
|
| 365 |
+
asyncio.run(evaluation.evaluate(guardrail))
|
| 366 |
|
| 367 |
|
| 368 |
if __name__ == "__main__":
|
guardrails_genie/guardrails/injection/classifier_guardrail.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
from typing import Optional
|
| 2 |
|
| 3 |
import torch
|
|
|
|
| 4 |
import weave
|
| 5 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
| 6 |
from transformers.pipelines.base import Pipeline
|
| 7 |
|
| 8 |
-
import wandb
|
| 9 |
-
|
| 10 |
from ..base import Guardrail
|
| 11 |
|
| 12 |
|
|
|
|
| 1 |
from typing import Optional
|
| 2 |
|
| 3 |
import torch
|
| 4 |
+
import wandb
|
| 5 |
import weave
|
| 6 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
| 7 |
from transformers.pipelines.base import Pipeline
|
| 8 |
|
|
|
|
|
|
|
| 9 |
from ..base import Guardrail
|
| 10 |
|
| 11 |
|
guardrails_genie/{train_classifier.py → train/train_classifier.py}
RENAMED
|
@@ -1,54 +1,17 @@
|
|
| 1 |
import evaluate
|
| 2 |
import numpy as np
|
| 3 |
import streamlit as st
|
|
|
|
| 4 |
from datasets import load_dataset
|
| 5 |
from transformers import (
|
| 6 |
AutoModelForSequenceClassification,
|
| 7 |
AutoTokenizer,
|
| 8 |
DataCollatorWithPadding,
|
| 9 |
Trainer,
|
| 10 |
-
TrainerCallback,
|
| 11 |
TrainingArguments,
|
| 12 |
)
|
| 13 |
-
from transformers.trainer_callback import TrainerControl, TrainerState
|
| 14 |
-
|
| 15 |
-
import wandb
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class StreamlitProgressbarCallback(TrainerCallback):
|
| 19 |
-
"""
|
| 20 |
-
StreamlitProgressbarCallback is a custom callback for the Hugging Face Trainer
|
| 21 |
-
that integrates a progress bar into a Streamlit application. This class updates
|
| 22 |
-
the progress bar at each training step, providing real-time feedback on the
|
| 23 |
-
training process within the Streamlit interface.
|
| 24 |
|
| 25 |
-
|
| 26 |
-
progress_bar (streamlit.delta_generator.DeltaGenerator): A Streamlit progress
|
| 27 |
-
bar object initialized to 0 with the text "Training".
|
| 28 |
-
|
| 29 |
-
Methods:
|
| 30 |
-
on_step_begin(args, state, control, **kwargs):
|
| 31 |
-
Updates the progress bar at the beginning of each training step. The progress
|
| 32 |
-
is calculated as the percentage of completed steps out of the total steps.
|
| 33 |
-
The progress bar text is updated to show the current step and the total steps.
|
| 34 |
-
"""
|
| 35 |
-
|
| 36 |
-
def __init__(self, *args, **kwargs):
|
| 37 |
-
super().__init__(*args, **kwargs)
|
| 38 |
-
self.progress_bar = st.progress(0, text="Training")
|
| 39 |
-
|
| 40 |
-
def on_step_begin(
|
| 41 |
-
self,
|
| 42 |
-
args: TrainingArguments,
|
| 43 |
-
state: TrainerState,
|
| 44 |
-
control: TrainerControl,
|
| 45 |
-
**kwargs,
|
| 46 |
-
):
|
| 47 |
-
super().on_step_begin(args, state, control, **kwargs)
|
| 48 |
-
self.progress_bar.progress(
|
| 49 |
-
(state.global_step * 100 // state.max_steps) + 1,
|
| 50 |
-
text=f"Training {state.global_step} / {state.max_steps}",
|
| 51 |
-
)
|
| 52 |
|
| 53 |
|
| 54 |
def train_binary_classifier(
|
|
|
|
| 1 |
import evaluate
|
| 2 |
import numpy as np
|
| 3 |
import streamlit as st
|
| 4 |
+
import wandb
|
| 5 |
from datasets import load_dataset
|
| 6 |
from transformers import (
|
| 7 |
AutoModelForSequenceClassification,
|
| 8 |
AutoTokenizer,
|
| 9 |
DataCollatorWithPadding,
|
| 10 |
Trainer,
|
|
|
|
| 11 |
TrainingArguments,
|
| 12 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
from guardrails_genie.utils import StreamlitProgressbarCallback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def train_binary_classifier(
|
guardrails_genie/utils.py
CHANGED
|
@@ -1,5 +1,12 @@
|
|
| 1 |
import pandas as pd
|
|
|
|
| 2 |
import weave
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
class EvaluationCallManager:
|
|
@@ -91,3 +98,39 @@ class EvaluationCallManager:
|
|
| 91 |
call["score"]["correct"] for call in guardrail_call["calls"]
|
| 92 |
]
|
| 93 |
return pd.DataFrame(dataframe)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
+
import streamlit as st
|
| 3 |
import weave
|
| 4 |
+
from transformers.trainer_callback import (
|
| 5 |
+
TrainerCallback,
|
| 6 |
+
TrainerControl,
|
| 7 |
+
TrainerState,
|
| 8 |
+
TrainingArguments,
|
| 9 |
+
)
|
| 10 |
|
| 11 |
|
| 12 |
class EvaluationCallManager:
|
|
|
|
| 98 |
call["score"]["correct"] for call in guardrail_call["calls"]
|
| 99 |
]
|
| 100 |
return pd.DataFrame(dataframe)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class StreamlitProgressbarCallback(TrainerCallback):
|
| 104 |
+
"""
|
| 105 |
+
StreamlitProgressbarCallback is a custom callback for the Hugging Face Trainer
|
| 106 |
+
that integrates a progress bar into a Streamlit application. This class updates
|
| 107 |
+
the progress bar at each training step, providing real-time feedback on the
|
| 108 |
+
training process within the Streamlit interface.
|
| 109 |
+
|
| 110 |
+
Attributes:
|
| 111 |
+
progress_bar (streamlit.delta_generator.DeltaGenerator): A Streamlit progress
|
| 112 |
+
bar object initialized to 0 with the text "Training".
|
| 113 |
+
|
| 114 |
+
Methods:
|
| 115 |
+
on_step_begin(args, state, control, **kwargs):
|
| 116 |
+
Updates the progress bar at the beginning of each training step. The progress
|
| 117 |
+
is calculated as the percentage of completed steps out of the total steps.
|
| 118 |
+
The progress bar text is updated to show the current step and the total steps.
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
def __init__(self, *args, **kwargs):
|
| 122 |
+
super().__init__(*args, **kwargs)
|
| 123 |
+
self.progress_bar = st.progress(0, text="Training")
|
| 124 |
+
|
| 125 |
+
def on_step_begin(
|
| 126 |
+
self,
|
| 127 |
+
args: TrainingArguments,
|
| 128 |
+
state: TrainerState,
|
| 129 |
+
control: TrainerControl,
|
| 130 |
+
**kwargs,
|
| 131 |
+
):
|
| 132 |
+
super().on_step_begin(args, state, control, **kwargs)
|
| 133 |
+
self.progress_bar.progress(
|
| 134 |
+
(state.global_step * 100 // state.max_steps) + 1,
|
| 135 |
+
text=f"Training {state.global_step} / {state.max_steps}",
|
| 136 |
+
)
|