use multithreading instead of multiprocessing
Browse files
app.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
| 1 |
import json
|
| 2 |
import pandas as pd
|
| 3 |
import requests
|
| 4 |
-
|
| 5 |
-
from functools import partial
|
| 6 |
import streamlit as st
|
| 7 |
|
| 8 |
|
| 9 |
-
GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
|
| 10 |
MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
|
| 11 |
GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
|
| 12 |
|
|
@@ -17,7 +15,14 @@ def load_examples():
|
|
| 17 |
return examples
|
| 18 |
|
| 19 |
|
| 20 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
url = (
|
| 22 |
f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
|
| 23 |
)
|
|
@@ -25,12 +30,21 @@ def generate_code(model_name, gen_prompt, max_new_tokens, temperature, seed):
|
|
| 25 |
url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
|
| 26 |
)
|
| 27 |
generated_text = r.json()["data"][0]
|
| 28 |
-
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
st.set_page_config(page_icon=":laptop:", layout="wide")
|
| 36 |
with open("utils/table_contents.txt", "r") as f:
|
|
@@ -45,9 +59,11 @@ read_markdown("utils/intro.txt")
|
|
| 45 |
st.subheader("1 - Code datasets")
|
| 46 |
read_markdown("datasets/intro.txt")
|
| 47 |
read_markdown("datasets/github_code.txt")
|
|
|
|
| 48 |
#st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
|
| 49 |
#df = pd.read_csv("utils/data_preview.csv")
|
| 50 |
#st.dataframe(df)
|
|
|
|
| 51 |
col1, col2= st.columns([1,2])
|
| 52 |
with col1:
|
| 53 |
selected_model = st.selectbox("", MODELS, key=1)
|
|
@@ -107,19 +123,21 @@ gen_prompt = st.text_area(
|
|
| 107 |
).strip()
|
| 108 |
if st.button("Generate code!"):
|
| 109 |
with st.spinner("Generating code..."):
|
| 110 |
-
#
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
| 114 |
gen_prompt=gen_prompt,
|
| 115 |
max_new_tokens=max_new_tokens,
|
| 116 |
temperature=temperature,
|
| 117 |
seed=seed,
|
| 118 |
)
|
| 119 |
-
|
| 120 |
-
|
|
|
|
| 121 |
st.markdown(f"**{selected_models[i]}**")
|
| 122 |
-
st.code(
|
| 123 |
|
| 124 |
# Resources
|
| 125 |
st.subheader("Resources")
|
|
|
|
| 1 |
import json
|
| 2 |
import pandas as pd
|
| 3 |
import requests
|
| 4 |
+
import threading
|
|
|
|
| 5 |
import streamlit as st
|
| 6 |
|
| 7 |
|
|
|
|
| 8 |
MODELS = ["CodeParrot", "InCoder", "CodeGen", "PolyCoder"]
|
| 9 |
GENERATION_MODELS = ["CodeParrot", "InCoder", "CodeGen"]
|
| 10 |
|
|
|
|
| 15 |
return examples
|
| 16 |
|
| 17 |
|
| 18 |
+
def read_markdown(path):
|
| 19 |
+
with open(path, "r") as f:
|
| 20 |
+
output = f.read()
|
| 21 |
+
st.markdown(output, unsafe_allow_html=True)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def generate_code(generations, model_name, gen_prompt, max_new_tokens, temperature, seed):
|
| 25 |
+
# call space using its API endpoint
|
| 26 |
url = (
|
| 27 |
f"https://hf.space/embed/loubnabnl/{model_name.lower()}-subspace/+/api/predict/"
|
| 28 |
)
|
|
|
|
| 30 |
url=url, json={"data": [gen_prompt, max_new_tokens, temperature, seed]}
|
| 31 |
)
|
| 32 |
generated_text = r.json()["data"][0]
|
| 33 |
+
generations.append(generated_text)
|
| 34 |
|
| 35 |
+
|
| 36 |
+
def generate_code_threads(generations, models, gen_prompt, max_new_tokens, temperature, seed):
|
| 37 |
+
threads = []
|
| 38 |
+
for model_name in models:
|
| 39 |
+
# create the thread
|
| 40 |
+
threads.append(
|
| 41 |
+
threading.Thread(target=generate_code, args=(generations, model_name, gen_prompt, max_new_tokens, temperature, seed))
|
| 42 |
+
)
|
| 43 |
+
threads[-1].start()
|
| 44 |
+
|
| 45 |
+
for t in threads:
|
| 46 |
+
t.join()
|
| 47 |
+
|
| 48 |
|
| 49 |
st.set_page_config(page_icon=":laptop:", layout="wide")
|
| 50 |
with open("utils/table_contents.txt", "r") as f:
|
|
|
|
| 59 |
st.subheader("1 - Code datasets")
|
| 60 |
read_markdown("datasets/intro.txt")
|
| 61 |
read_markdown("datasets/github_code.txt")
|
| 62 |
+
#GITHUB_CODE = "https://huggingface.co/datasets/lvwerra/github-code"
|
| 63 |
#st.markdown(f"Preview of some code files from Github repositories in [Github-code dataset]({GITHUB_CODE}):")
|
| 64 |
#df = pd.read_csv("utils/data_preview.csv")
|
| 65 |
#st.dataframe(df)
|
| 66 |
+
|
| 67 |
col1, col2= st.columns([1,2])
|
| 68 |
with col1:
|
| 69 |
selected_model = st.selectbox("", MODELS, key=1)
|
|
|
|
| 123 |
).strip()
|
| 124 |
if st.button("Generate code!"):
|
| 125 |
with st.spinner("Generating code..."):
|
| 126 |
+
# use threading
|
| 127 |
+
generations = []
|
| 128 |
+
generate_code_threads(
|
| 129 |
+
generations,
|
| 130 |
+
selected_models,
|
| 131 |
gen_prompt=gen_prompt,
|
| 132 |
max_new_tokens=max_new_tokens,
|
| 133 |
temperature=temperature,
|
| 134 |
seed=seed,
|
| 135 |
)
|
| 136 |
+
for i in range(len(generations)):
|
| 137 |
+
print(generations[i])
|
| 138 |
+
for i in range(len(generations)):
|
| 139 |
st.markdown(f"**{selected_models[i]}**")
|
| 140 |
+
st.code(generations[i])
|
| 141 |
|
| 142 |
# Resources
|
| 143 |
st.subheader("Resources")
|