Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -39,7 +39,6 @@ desc2opt = {v: k for k, v in opt2desc.items()}
|
|
| 39 |
optimization_options = list(opt2desc.values())
|
| 40 |
|
| 41 |
|
| 42 |
-
|
| 43 |
def download_and_tokenize(
|
| 44 |
ds_name,
|
| 45 |
ds_config,
|
|
@@ -51,7 +50,6 @@ def download_and_tokenize(
|
|
| 51 |
num2embed,
|
| 52 |
progress=gr.Progress(track_tqdm=True),
|
| 53 |
):
|
| 54 |
-
|
| 55 |
num_samples = download_dataset(ds_name, ds_config, ds_split, num2skip, num2embed)
|
| 56 |
|
| 57 |
opt_level = desc2opt[opt_desc]
|
|
@@ -69,8 +67,6 @@ def download_and_tokenize(
|
|
| 69 |
)
|
| 70 |
|
| 71 |
return f"Downloaded! It has {len(num_samples)} docs."
|
| 72 |
-
|
| 73 |
-
|
| 74 |
|
| 75 |
|
| 76 |
def embed(
|
|
@@ -85,7 +81,6 @@ def embed(
|
|
| 85 |
num2embed,
|
| 86 |
progress=gr.Progress(track_tqdm=True),
|
| 87 |
):
|
| 88 |
-
|
| 89 |
ds = load_tokenized_dataset(ds_name, ds_config, ds_split)
|
| 90 |
|
| 91 |
opt_level = desc2opt[opt_desc]
|
|
@@ -154,7 +149,9 @@ with gr.Blocks(title="Bulk embeddings") as demo:
|
|
| 154 |
value="wikipedia",
|
| 155 |
)
|
| 156 |
ds_config = gr.Textbox(
|
| 157 |
-
lines=1,
|
|
|
|
|
|
|
| 158 |
)
|
| 159 |
|
| 160 |
column_name = gr.Textbox(lines=1, label="Enter column to embed", value="text")
|
|
@@ -208,18 +205,20 @@ with gr.Blocks(title="Bulk embeddings") as demo:
|
|
| 208 |
)
|
| 209 |
|
| 210 |
with gr.Row():
|
| 211 |
-
|
| 212 |
download_btn = gr.Button(value="Download and tokenize dataset!")
|
| 213 |
embed_btn = gr.Button(value="Embed texts!")
|
| 214 |
|
| 215 |
last = gr.Textbox(value="")
|
| 216 |
|
| 217 |
download_btn.click(
|
| 218 |
-
fn=
|
| 219 |
inputs=[
|
| 220 |
ds_name,
|
| 221 |
ds_config,
|
|
|
|
| 222 |
ds_split,
|
|
|
|
|
|
|
| 223 |
num2skip,
|
| 224 |
num2embed,
|
| 225 |
],
|
|
@@ -244,4 +243,4 @@ with gr.Blocks(title="Bulk embeddings") as demo:
|
|
| 244 |
|
| 245 |
|
| 246 |
if __name__ == "__main__":
|
| 247 |
-
demo.queue(concurrency_count=20).launch(show_error=True, debug=True)
|
|
|
|
| 39 |
optimization_options = list(opt2desc.values())
|
| 40 |
|
| 41 |
|
|
|
|
| 42 |
def download_and_tokenize(
|
| 43 |
ds_name,
|
| 44 |
ds_config,
|
|
|
|
| 50 |
num2embed,
|
| 51 |
progress=gr.Progress(track_tqdm=True),
|
| 52 |
):
|
|
|
|
| 53 |
num_samples = download_dataset(ds_name, ds_config, ds_split, num2skip, num2embed)
|
| 54 |
|
| 55 |
opt_level = desc2opt[opt_desc]
|
|
|
|
| 67 |
)
|
| 68 |
|
| 69 |
return f"Downloaded! It has {len(num_samples)} docs."
|
|
|
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
def embed(
|
|
|
|
| 81 |
num2embed,
|
| 82 |
progress=gr.Progress(track_tqdm=True),
|
| 83 |
):
|
|
|
|
| 84 |
ds = load_tokenized_dataset(ds_name, ds_config, ds_split)
|
| 85 |
|
| 86 |
opt_level = desc2opt[opt_desc]
|
|
|
|
| 149 |
value="wikipedia",
|
| 150 |
)
|
| 151 |
ds_config = gr.Textbox(
|
| 152 |
+
lines=1,
|
| 153 |
+
label="Dataset config (leave blank to use default)",
|
| 154 |
+
value="20220301.en",
|
| 155 |
)
|
| 156 |
|
| 157 |
column_name = gr.Textbox(lines=1, label="Enter column to embed", value="text")
|
|
|
|
| 205 |
)
|
| 206 |
|
| 207 |
with gr.Row():
|
|
|
|
| 208 |
download_btn = gr.Button(value="Download and tokenize dataset!")
|
| 209 |
embed_btn = gr.Button(value="Embed texts!")
|
| 210 |
|
| 211 |
last = gr.Textbox(value="")
|
| 212 |
|
| 213 |
download_btn.click(
|
| 214 |
+
fn=download_and_tokenize,
|
| 215 |
inputs=[
|
| 216 |
ds_name,
|
| 217 |
ds_config,
|
| 218 |
+
column_name,
|
| 219 |
ds_split,
|
| 220 |
+
model_choice,
|
| 221 |
+
opt_desc,
|
| 222 |
num2skip,
|
| 223 |
num2embed,
|
| 224 |
],
|
|
|
|
| 243 |
|
| 244 |
|
| 245 |
if __name__ == "__main__":
|
| 246 |
+
demo.queue(concurrency_count=20).launch(show_error=True, debug=True)
|