Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,7 @@ def load_model(model_name):
|
|
| 16 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 17 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 18 |
return model, tokenizer
|
| 19 |
-
def extend(input_text, num_return_sequences, max_size=20, top_k=50, top_p=0.95):
|
| 20 |
if len(input_text) == 0:
|
| 21 |
input_text = ""
|
| 22 |
encoded_prompt = tokenizer.encode(
|
|
@@ -27,10 +27,18 @@ def extend(input_text, num_return_sequences, max_size=20, top_k=50, top_p=0.95):
|
|
| 27 |
else:
|
| 28 |
input_ids = encoded_prompt
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
output_sequences = model.generate(
|
| 31 |
input_ids=input_ids,
|
| 32 |
max_length=max_size + len(encoded_prompt[0]),
|
| 33 |
top_k=top_k,
|
|
|
|
| 34 |
top_p=top_p,
|
| 35 |
do_sample=True,
|
| 36 |
num_return_sequences=num_return_sequences)
|
|
@@ -73,6 +81,8 @@ if __name__ == "__main__":
|
|
| 73 |
num_return_sequences = st.sidebar.slider("Outputs", 1, 50, 5,help="The number of outputs to be returned.")
|
| 74 |
top_k = st.sidebar.slider("Top-K", 0, 100, 40, help="The number of highest probability vocabulary tokens to keep for top-k-filtering.")
|
| 75 |
top_p = st.sidebar.slider("Top-P", 0.0, 1.0, 0.92, help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.")
|
|
|
|
|
|
|
| 76 |
if st.button("Generate Text"):
|
| 77 |
with st.spinner(text="Generating results..."):
|
| 78 |
st.subheader("Result")
|
|
@@ -83,7 +93,8 @@ if __name__ == "__main__":
|
|
| 83 |
num_return_sequences=int(num_return_sequences),
|
| 84 |
max_size=int(max_len),
|
| 85 |
top_k=int(top_k),
|
| 86 |
-
top_p=float(top_p)
|
|
|
|
| 87 |
print("Done length: " + str(len(result)) + " bytes")
|
| 88 |
#<div class="rtl" dir="rtl" style="text-align:right;">
|
| 89 |
st.markdown(f"{result}", unsafe_allow_html=True)
|
|
|
|
| 16 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 17 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 18 |
return model, tokenizer
|
| 19 |
+
def extend(input_text, num_return_sequences, max_size=20, top_k=50, top_p=0.95, bad_words):
|
| 20 |
if len(input_text) == 0:
|
| 21 |
input_text = ""
|
| 22 |
encoded_prompt = tokenizer.encode(
|
|
|
|
| 27 |
else:
|
| 28 |
input_ids = encoded_prompt
|
| 29 |
|
| 30 |
+
bad_words = bad_words.split()
|
| 31 |
+
bad_word_ids = []
|
| 32 |
+
for bad_word in bad_words:
|
| 33 |
+
bad_word = " " + bad_word
|
| 34 |
+
ids = tokenizer(bad_word).input_ids
|
| 35 |
+
bad_word_ids.append(ids)
|
| 36 |
+
|
| 37 |
output_sequences = model.generate(
|
| 38 |
input_ids=input_ids,
|
| 39 |
max_length=max_size + len(encoded_prompt[0]),
|
| 40 |
top_k=top_k,
|
| 41 |
+
bad_word_ids = bad_word_ids,
|
| 42 |
top_p=top_p,
|
| 43 |
do_sample=True,
|
| 44 |
num_return_sequences=num_return_sequences)
|
|
|
|
| 81 |
num_return_sequences = st.sidebar.slider("Outputs", 1, 50, 5,help="The number of outputs to be returned.")
|
| 82 |
top_k = st.sidebar.slider("Top-K", 0, 100, 40, help="The number of highest probability vocabulary tokens to keep for top-k-filtering.")
|
| 83 |
top_p = st.sidebar.slider("Top-P", 0.0, 1.0, 0.92, help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.")
|
| 84 |
+
bad_words = st.text_input("Words You Do Not Want Generated", " core lemon height time ")
|
| 85 |
+
|
| 86 |
if st.button("Generate Text"):
|
| 87 |
with st.spinner(text="Generating results..."):
|
| 88 |
st.subheader("Result")
|
|
|
|
| 93 |
num_return_sequences=int(num_return_sequences),
|
| 94 |
max_size=int(max_len),
|
| 95 |
top_k=int(top_k),
|
| 96 |
+
top_p=float(top_p),
|
| 97 |
+
bad_words = bad_words))
|
| 98 |
print("Done length: " + str(len(result)) + " bytes")
|
| 99 |
#<div class="rtl" dir="rtl" style="text-align:right;">
|
| 100 |
st.markdown(f"{result}", unsafe_allow_html=True)
|