Spaces:
Sleeping
Sleeping
Commit
·
29d8de2
1
Parent(s):
cb2cffc
Gradio details panels, docstrings
Browse files- demo_watermark.py +95 -21
- requirements.txt +1 -0
demo_watermark.py
CHANGED
|
@@ -33,6 +33,7 @@ from transformers import (AutoTokenizer,
|
|
| 33 |
from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
|
| 34 |
|
| 35 |
def str2bool(v):
|
|
|
|
| 36 |
if isinstance(v, bool):
|
| 37 |
return v
|
| 38 |
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
|
@@ -43,6 +44,7 @@ def str2bool(v):
|
|
| 43 |
raise argparse.ArgumentTypeError('Boolean value expected.')
|
| 44 |
|
| 45 |
def parse_args():
|
|
|
|
| 46 |
|
| 47 |
parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
|
| 48 |
|
|
@@ -164,6 +166,8 @@ def parse_args():
|
|
| 164 |
return args
|
| 165 |
|
| 166 |
def load_model(args):
|
|
|
|
|
|
|
| 167 |
args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
|
| 168 |
args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
|
| 169 |
if args.is_seq2seq_model:
|
|
@@ -185,7 +189,10 @@ def load_model(args):
|
|
| 185 |
return model, tokenizer, device
|
| 186 |
|
| 187 |
def generate(prompt, args, model=None, device=None, tokenizer=None):
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
| 189 |
print(f"Generating with {args}")
|
| 190 |
|
| 191 |
watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
|
|
@@ -251,25 +258,16 @@ def generate(prompt, args, model=None, device=None, tokenizer=None):
|
|
| 251 |
# decoded_output_with_watermark)
|
| 252 |
|
| 253 |
def format_names(s):
|
|
|
|
| 254 |
s=s.replace("num_tokens_scored","Tokens Counted (T)")
|
| 255 |
s=s.replace("num_green_tokens","# Tokens in Greenlist")
|
| 256 |
s=s.replace("green_fraction","Fraction of T in Greenlist")
|
| 257 |
s=s.replace("z_score","z-score")
|
| 258 |
s=s.replace("p_value","p value")
|
| 259 |
return s
|
| 260 |
-
|
| 261 |
-
# output_str = f"@ z-score threshold={detection_threshold}:\n\n"
|
| 262 |
-
# for k,v in score_dict.items():
|
| 263 |
-
# if k=='green_fraction':
|
| 264 |
-
# output_str+=f"{format_names(k)}={v:.1%}"
|
| 265 |
-
# elif k=='confidence':
|
| 266 |
-
# output_str+=f"{format_names(k)}={v:.3%}"
|
| 267 |
-
# elif isinstance(v, float):
|
| 268 |
-
# output_str+=f"{format_names(k)}={v:.3g}"
|
| 269 |
-
# else:
|
| 270 |
-
# output_str += v
|
| 271 |
-
# return output_str
|
| 272 |
def list_format_scores(score_dict, detection_threshold):
|
|
|
|
| 273 |
lst_2d = []
|
| 274 |
lst_2d.append(["z-score threshold", f"{detection_threshold}"])
|
| 275 |
for k,v in score_dict.items():
|
|
@@ -286,6 +284,8 @@ def list_format_scores(score_dict, detection_threshold):
|
|
| 286 |
return lst_2d
|
| 287 |
|
| 288 |
def detect(input_text, args, device=None, tokenizer=None):
|
|
|
|
|
|
|
| 289 |
watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
|
| 290 |
gamma=args.gamma,
|
| 291 |
seeding_scheme=args.seeding_scheme,
|
|
@@ -306,7 +306,7 @@ def detect(input_text, args, device=None, tokenizer=None):
|
|
| 306 |
return output, args
|
| 307 |
|
| 308 |
def run_gradio(args, model=None, device=None, tokenizer=None):
|
| 309 |
-
|
| 310 |
generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
|
| 311 |
detect_partial = partial(detect, device=device, tokenizer=tokenizer)
|
| 312 |
|
|
@@ -315,15 +315,38 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
| 315 |
# Top section, greeting and instructions
|
| 316 |
gr.Markdown("## 💧 [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) 🔍")
|
| 317 |
gr.Markdown("[jwkirchenbauer/lm-watermarking](https://github.com/jwkirchenbauer/lm-watermarking)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
with gr.Accordion("A note on model capability",open=False):
|
| 320 |
gr.Markdown(
|
| 321 |
"""
|
| 322 |
-
The models that can be used in this demo are limited to those that are open source
|
| 323 |
-
|
| 324 |
-
Therefore,
|
| 325 |
|
| 326 |
-
We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
|
|
|
|
|
|
|
| 327 |
"""
|
| 328 |
)
|
| 329 |
|
|
@@ -407,9 +430,59 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
| 407 |
seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
|
| 408 |
with gr.Column(scale=1):
|
| 409 |
select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
|
| 410 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
gr.HTML("""
|
| 412 |
-
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
|
|
|
| 413 |
<br/>
|
| 414 |
<a href="https://huggingface.co/spaces/tomg-group-umd/lm-watermarking?duplicate=true">
|
| 415 |
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
|
@@ -506,7 +579,8 @@ def run_gradio(args, model=None, device=None, tokenizer=None):
|
|
| 506 |
demo.launch()
|
| 507 |
|
| 508 |
def main(args):
|
| 509 |
-
|
|
|
|
| 510 |
# Initial arg processing and log
|
| 511 |
args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
|
| 512 |
print(args)
|
|
|
|
| 33 |
from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
|
| 34 |
|
| 35 |
def str2bool(v):
|
| 36 |
+
"""Util function for user friendly boolean flag args"""
|
| 37 |
if isinstance(v, bool):
|
| 38 |
return v
|
| 39 |
if v.lower() in ('yes', 'true', 't', 'y', '1'):
|
|
|
|
| 44 |
raise argparse.ArgumentTypeError('Boolean value expected.')
|
| 45 |
|
| 46 |
def parse_args():
|
| 47 |
+
"""Command line argument specification"""
|
| 48 |
|
| 49 |
parser = argparse.ArgumentParser(description="A minimum working example of applying the watermark to any LLM that supports the huggingface 🤗 `generate` API")
|
| 50 |
|
|
|
|
| 166 |
return args
|
| 167 |
|
| 168 |
def load_model(args):
|
| 169 |
+
"""Load and return the model and tokenizer"""
|
| 170 |
+
|
| 171 |
args.is_seq2seq_model = any([(model_type in args.model_name_or_path) for model_type in ["t5","T0"]])
|
| 172 |
args.is_decoder_only_model = any([(model_type in args.model_name_or_path) for model_type in ["gpt","opt","bloom"]])
|
| 173 |
if args.is_seq2seq_model:
|
|
|
|
| 189 |
return model, tokenizer, device
|
| 190 |
|
| 191 |
def generate(prompt, args, model=None, device=None, tokenizer=None):
|
| 192 |
+
"""Instatiate the WatermarkLogitsProcessor according to the watermark parameters
|
| 193 |
+
and generate watermarked text by passing it to the generate method of the model
|
| 194 |
+
as a logits processor. """
|
| 195 |
+
|
| 196 |
print(f"Generating with {args}")
|
| 197 |
|
| 198 |
watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
|
|
|
|
| 258 |
# decoded_output_with_watermark)
|
| 259 |
|
| 260 |
def format_names(s):
|
| 261 |
+
"""Format names for the gradio demo interface"""
|
| 262 |
s=s.replace("num_tokens_scored","Tokens Counted (T)")
|
| 263 |
s=s.replace("num_green_tokens","# Tokens in Greenlist")
|
| 264 |
s=s.replace("green_fraction","Fraction of T in Greenlist")
|
| 265 |
s=s.replace("z_score","z-score")
|
| 266 |
s=s.replace("p_value","p value")
|
| 267 |
return s
|
| 268 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
def list_format_scores(score_dict, detection_threshold):
|
| 270 |
+
"""Format the detection metrics into a gradio dataframe input format"""
|
| 271 |
lst_2d = []
|
| 272 |
lst_2d.append(["z-score threshold", f"{detection_threshold}"])
|
| 273 |
for k,v in score_dict.items():
|
|
|
|
| 284 |
return lst_2d
|
| 285 |
|
| 286 |
def detect(input_text, args, device=None, tokenizer=None):
|
| 287 |
+
"""Instantiate the WatermarkDetection object and call detect on
|
| 288 |
+
the input text returning the scores and outcome of the test"""
|
| 289 |
watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
|
| 290 |
gamma=args.gamma,
|
| 291 |
seeding_scheme=args.seeding_scheme,
|
|
|
|
| 306 |
return output, args
|
| 307 |
|
| 308 |
def run_gradio(args, model=None, device=None, tokenizer=None):
|
| 309 |
+
"""Define and launch the gradio demo interface"""
|
| 310 |
generate_partial = partial(generate, model=model, device=device, tokenizer=tokenizer)
|
| 311 |
detect_partial = partial(detect, device=device, tokenizer=tokenizer)
|
| 312 |
|
|
|
|
| 315 |
# Top section, greeting and instructions
|
| 316 |
gr.Markdown("## 💧 [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) 🔍")
|
| 317 |
gr.Markdown("[jwkirchenbauer/lm-watermarking](https://github.com/jwkirchenbauer/lm-watermarking)")
|
| 318 |
+
gr.Markdown(f"Language model: {args.model_name_or_path}")
|
| 319 |
+
with gr.Accordion("Understanding the output metrics",open=False):
|
| 320 |
+
gr.Markdown(
|
| 321 |
+
"""
|
| 322 |
+
- `z-score threshold` : The cuttoff for the hypothesis test
|
| 323 |
+
- `Tokens Counted (T)` : The number of tokens in the output that were counted by the detection algorithm.
|
| 324 |
+
The first token is ommitted in the simple, single token seeding scheme since there is no way to generate
|
| 325 |
+
a greenlist for it as it has no prefix token(s). Under the "Ignore Bigram Repeats" detection algorithm,
|
| 326 |
+
described in the bottom panel, this can be much less than the total number of tokens generated if there is a lot of repetition.
|
| 327 |
+
- `# Tokens in Greenlist` : The number of tokens that were observed to fall in their respective greenlist
|
| 328 |
+
- `Fraction of T in Greenlist` : The `# Tokens in Greenlist` / `T`. This is expected to be approximately `gamma` for human/unwatermarked text.
|
| 329 |
+
- `z-score` : The test statistic for the detection hypothesis test. If larger than the `z-score threshold`
|
| 330 |
+
we "reject the null hypothesis" that the text is human/unwatermarked, and conclude it is watermarked
|
| 331 |
+
- `p value` : The likelihood of observing the computed `z-score` under the null hypothesis. This is the likelihood of
|
| 332 |
+
observing the `Fraction of T in Greenlist` given that the text was generated without knowledge of the watermark procedure/greenlists.
|
| 333 |
+
If this is extremely _small_ we are confident that this many green tokens was not chosen by random chance.
|
| 334 |
+
- `prediction` : The outcome of the hypothesis test - whether the observed `z-score` was higher than the `z-score threshold`
|
| 335 |
+
- `confidence` : If we reject the null hypothesis, and the `prediction` is "Watermarked", then we report 1-`p value` to represent
|
| 336 |
+
the confidence of the detection based on the unlikeliness of this `z-score` observation.
|
| 337 |
+
"""
|
| 338 |
+
)
|
| 339 |
|
| 340 |
with gr.Accordion("A note on model capability",open=False):
|
| 341 |
gr.Markdown(
|
| 342 |
"""
|
| 343 |
+
The models that can be used in this demo are limited to those that are both open source and that fit on a single commodity GPU.
|
| 344 |
+
In particular, there aren't many models above a few billion parameters and almost none trained using both Instruction-finetuning an/or RLHF.
|
| 345 |
+
Therefore, in both it's un-watermarked (normal) and watermarked states, the model is not generally able to respond well to the kinds of prompts that a 100B+ Instruction and RLHF tuned model such as ChatGPT, Claude, or Bard is.
|
| 346 |
|
| 347 |
+
We suggest you try prompts that give the model a few sentences and then allow it to 'continue' the prompt, as these weaker models are more capable in this simpler language modeling setting.
|
| 348 |
+
Some examples include the opening paragraph of a wikipedia article, or the first few sentences of a story.
|
| 349 |
+
Longer prompts and stopping mid sentence often helps encourage more fluent, longer genrations.
|
| 350 |
"""
|
| 351 |
)
|
| 352 |
|
|
|
|
| 430 |
seed_separately = gr.Checkbox(label="Seed both generations separately", value=args.seed_separately)
|
| 431 |
with gr.Column(scale=1):
|
| 432 |
select_green_tokens = gr.Checkbox(label="Select 'greenlist' from partition", value=args.select_green_tokens)
|
| 433 |
+
|
| 434 |
+
with gr.Accordion("Understanding the settings",open=False):
|
| 435 |
+
gr.Markdown(
|
| 436 |
+
"""
|
| 437 |
+
#### Generation Parameters:
|
| 438 |
+
|
| 439 |
+
- Decoding Method : We can generate tokens from the model using either multinomial sampling or we can generate using greedy decoding.
|
| 440 |
+
- Sampling Temperature : If using multinomial sampling we can set the temperature of the sampling distribution.
|
| 441 |
+
0.0 is equivalent to greedy decoding, and 1.0 is the maximum amount of variability/entropy in the next token distribution.
|
| 442 |
+
0.7 strikes a nice balance between faithfulness to the model's estimate of top candidates while adding variety. Does not apply for greedy decoding.
|
| 443 |
+
- Generation Seed : The integer to pass to the torch random number generator before running generation. Makes the multinomial sampling strategy
|
| 444 |
+
outputs reproducible. Does not apply for greedy decoding.
|
| 445 |
+
- Number of Beams : When using greedy decoding, we can also set the number of beams to > 1 to enable beam search.
|
| 446 |
+
This is not implemented/excluded from paper for multinomial sampling but may be added in future.
|
| 447 |
+
- Max Generated Tokens : The `max_new_tokens` parameter passed to the generation method to stop the output at a certain number of new tokens.
|
| 448 |
+
Note that the model is free to generate fewer tokens depending on the prompt.
|
| 449 |
+
Implicitly this sets the maximum number of prompt tokens possible as the model's maximum input length minus `max_new_tokens`,
|
| 450 |
+
and inputs will be truncated accordingly.
|
| 451 |
+
|
| 452 |
+
#### Watermark Parameters:
|
| 453 |
+
|
| 454 |
+
- gamma : The fraction of the vocabulary to be partitioned into the greenlist at each generation step.
|
| 455 |
+
Smaller gamma values create a stronger watermark by enabling the watermarked model to achieve
|
| 456 |
+
a greater differentiation from human/unwatermarked text because it is preferentially sampling
|
| 457 |
+
from a smaller green set making those tokens less likely to occur by chance.
|
| 458 |
+
- delta : The amount of positive bias to add to the logits of every token in the greenlist
|
| 459 |
+
at each generation step before sampling/choosing the next token. Higher delta values
|
| 460 |
+
mean that the greenlist tokens are more heavily preferred by the watermarked model
|
| 461 |
+
and as the bias becomes very large the watermark transitions from "soft" to "hard".
|
| 462 |
+
For a hard watermark, nearly all tokens are green, but this can have a detrimental effect on
|
| 463 |
+
generation quality, especially when there is not a lot of flexibility in the distribution.
|
| 464 |
+
- z-score threshold : the z-score cuttoff for the hypothesis test. Higher thresholds (such as 4.0) make
|
| 465 |
+
_false positives_ (predicting that human/unwatermarked text is watermarked) very unlikely
|
| 466 |
+
as a genuine human text with a significant number of tokens will almost never achieve
|
| 467 |
+
that high of a z-score. Lower thresholds will capture more _true positives_ as some watermarked
|
| 468 |
+
texts will contain less green tokens and achive a lower z-score, but still pass the lower bar and
|
| 469 |
+
be flagged as "watermarked". However, a lowere threshold will increase the chance that human text
|
| 470 |
+
that contains a slightly higher than average number of green tokens is erroneously flagged.
|
| 471 |
+
4.0-5.0 offers extremely low false positive rates while still accurately catching most watermarked text.
|
| 472 |
+
- Ignore Bigram Repeats : This alternate detection algorithm only considers the unique bigrams in the text during detection,
|
| 473 |
+
computing the greenlists based on the first in each pair and checking whether the second falls within the list.
|
| 474 |
+
This means that `T` is now the unique number of bigrams in the text, which becomes less than the total
|
| 475 |
+
number of tokens generated if the text contains a lot of repetition. See the paper for a more detailed discussion.
|
| 476 |
+
- Normalizations : we implement a few basic normaliations to defend against various adversarial perturbations of the
|
| 477 |
+
text analyzed during detection. Currently we support converting all chracters to unicode,
|
| 478 |
+
replacing homoglyphs with a canonical form, and standardizing the capitalization.
|
| 479 |
+
See the paper for a detailed discussion of input normalization.
|
| 480 |
+
"""
|
| 481 |
+
)
|
| 482 |
+
|
| 483 |
gr.HTML("""
|
| 484 |
+
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
| 485 |
+
Follow the github link at the top and host the demo on your own GPU hardware to test out larger models.
|
| 486 |
<br/>
|
| 487 |
<a href="https://huggingface.co/spaces/tomg-group-umd/lm-watermarking?duplicate=true">
|
| 488 |
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
|
|
|
| 579 |
demo.launch()
|
| 580 |
|
| 581 |
def main(args):
|
| 582 |
+
"""Run a command line version of the generation and detection operations
|
| 583 |
+
and optionally launch and serve the gradio demo"""
|
| 584 |
# Initial arg processing and log
|
| 585 |
args.normalizers = (args.normalizers.split(",") if args.normalizers else [])
|
| 586 |
print(args)
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
nltk
|
| 2 |
scipy
|
| 3 |
torch
|
|
|
|
| 1 |
+
gradio
|
| 2 |
nltk
|
| 3 |
scipy
|
| 4 |
torch
|