Spaces:

chrisjay
/

mnist-adversarial

Runtime error

App Files Files Community

chrisjay commited on Jul 19, 2022

Commit

e4a62fe

1 Parent(s): 603879a

fix to dashboard not loading

Browse files

Files changed (3) hide show

app.py +25 -18
data_mnist +1 -1
utils.py +20 -9

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ os.makedirs(LOCAL_DIR,exist_ok=True)
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_REPO = 'mnist-adversarial-model'
 HF_DATASET ="mnist-adversarial-dataset"
@@ -74,10 +74,11 @@ class MNISTAdversarial_Dataset(Dataset):
             image_path =os.path.join(self.FOLDER,'image.png')
             if os.path.exists(image_path) and os.path.exists(metadata_path):
-                img = Image.open(image_path)
-                self.images.append(img)
                 metadata = read_json_lines(metadata_path)
-                self.numbers.append(metadata[0]['correct_number'])
         assert len(self.images)==len(self.numbers), f"Length of images and numbers must be the same. Got {len(self.images)} for images and {len(self.numbers)} for numbers."
     def __len__(self):
         return len(self.images)
@@ -395,8 +396,15 @@ def flag(input_image,correct_result,adversarial_number):
     return output,adversarial_number
 def get_number_dict(DATA_DIR):
     files = [f.name for f in os.scandir(DATA_DIR)]
-    numbers = [read_json_lines(os.path.join(os.path.join(DATA_DIR,f),'metadata.jsonl'))[0]['correct_number'] for f in files]
     numbers_count = Counter(numbers)
     numbers_count_keys = list(numbers_count.keys())
     numbers_count_values = [numbers_count[k] for k in numbers_count_keys]
@@ -425,10 +433,8 @@ def get_statistics():
     repo.git_pull()
     DATA_DIR = './data_mnist/data'
     numbers_count_keys,numbers_count_values = get_number_dict(DATA_DIR)
     STATS_EXPLANATION_ = STATS_EXPLANATION.format(num_adv_samples = sum(numbers_count_values))
-    plt_digits = plot_bar(numbers_count_values,numbers_count_keys,'Number of adversarial samples',"Digit",f"Distribution of adversarial samples per digit")
     fig_d, ax_d = plt.subplots(tight_layout=True)
@@ -440,26 +446,25 @@ def get_statistics():
                 ax_d.plot(x_i, metric_dict[str(i)],label=str(i))
             except Exception:
                 continue
     else:
         metric_dict={}
     fig_d.legend()
     ax_d.set(xlabel='Adversarial train steps', ylabel='MNIST_C Test Accuracy',title="Test Accuracy over digits per train step")
-    done_html =  """<div style="color: green">
-                <p> ✅ Statistics loaded successfully!</p>
                 </div>
                """
     # Plot for total test accuracy for all digits
     fig_all, ax_all = plt.subplots(tight_layout=True)
     x_i = [i+1 for i in range(len(metric_dict['all']))]
     ax_all.plot(x_i, metric_dict['all'])
-    fig_all.legend()
     ax_all.set(xlabel='Adversarial train steps', ylabel='MNIST_C Test Accuracy',title="Test Accuracy for all digits")
     return plt_digits,ax_d.figure,ax_all.figure,done_html,STATS_EXPLANATION_
@@ -485,7 +490,7 @@ def main():
                 number_dropdown = gr.Dropdown(choices=[i for i in range(10)],type='value',default=None,label="What was the correct prediction?")
                 flag_btn = gr.Button("Flag")
                 output_result = gr.outputs.HTML()
@@ -496,8 +501,9 @@ def main():
                 flag_btn.click(flag,inputs=[image_input,number_dropdown,adversarial_number],outputs=[output_result,adversarial_number])
             with gr.TabItem('Dashboard') as dashboard:
-                notification = gr.HTML("""<div style="color: green">
-                                        <p> ⌛ Creating statistics... </p>
                                         </div>
                                     """)
@@ -508,7 +514,8 @@ def main():
                 gr.Markdown(DASHBOARD_EXPLANATION_TEST)
                 test_results_all=gr.Plot(type="matplotlib")
-            dashboard.select(get_statistics,inputs=[],outputs=[stat_adv_image,test_results,test_results_all,notification,stats])

+GET_STATISTICS_MESSAGE = "Get Statistics"
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_REPO = 'mnist-adversarial-model'
 HF_DATASET ="mnist-adversarial-dataset"
             image_path =os.path.join(self.FOLDER,'image.png')
             if os.path.exists(image_path) and os.path.exists(metadata_path):
                 metadata = read_json_lines(metadata_path)
+                if metadata is not None:
+                    img = Image.open(image_path)
+                    self.images.append(img)
+                    self.numbers.append(metadata[0]['correct_number'])
         assert len(self.images)==len(self.numbers), f"Length of images and numbers must be the same. Got {len(self.images)} for images and {len(self.numbers)} for numbers."
     def __len__(self):
         return len(self.images)
     return output,adversarial_number
 def get_number_dict(DATA_DIR):
+    """
+    It takes a directory as input, and returns a list of the number of times each number appears in the
+    metadata.jsonl files in that directory
+    :param DATA_DIR: The directory where the data is stored
+    """
     files = [f.name for f in os.scandir(DATA_DIR)]
+    metadata_jsons = [read_json_lines(os.path.join(os.path.join(DATA_DIR,f),'metadata.jsonl')) for f in files]
+    numbers = [m[0]['correct_number'] for m in metadata_jsons if m is not None]
     numbers_count = Counter(numbers)
     numbers_count_keys = list(numbers_count.keys())
     numbers_count_values = [numbers_count[k] for k in numbers_count_keys]
     repo.git_pull()
     DATA_DIR = './data_mnist/data'
     numbers_count_keys,numbers_count_values = get_number_dict(DATA_DIR)
     STATS_EXPLANATION_ = STATS_EXPLANATION.format(num_adv_samples = sum(numbers_count_values))
+    plt_digits = plot_bar(numbers_count_values,numbers_count_keys,'Number of adversarial samples',"Digit",f"Distribution of adversarial samples per digit",True)
     fig_d, ax_d = plt.subplots(tight_layout=True)
                 ax_d.plot(x_i, metric_dict[str(i)],label=str(i))
             except Exception:
                 continue
+        ax_d.set_xticks(range(0, len(metric_dict['0'])+1, 1))
     else:
         metric_dict={}
     fig_d.legend()
     ax_d.set(xlabel='Adversarial train steps', ylabel='MNIST_C Test Accuracy',title="Test Accuracy over digits per train step")
+    done_html =  f"""<div style="color: green">
+                <p> ✅ Statistics loaded successfully! Click `{GET_STATISTICS_MESSAGE}`to reload.</p>
                 </div>
                """
     # Plot for total test accuracy for all digits
     fig_all, ax_all = plt.subplots(tight_layout=True)
     x_i = [i+1 for i in range(len(metric_dict['all']))]
     ax_all.plot(x_i, metric_dict['all'])
     ax_all.set(xlabel='Adversarial train steps', ylabel='MNIST_C Test Accuracy',title="Test Accuracy for all digits")
+    ax_all.set_xticks(range(0, x_i[-1]+1, 1))
     return plt_digits,ax_d.figure,ax_all.figure,done_html,STATS_EXPLANATION_
                 number_dropdown = gr.Dropdown(choices=[i for i in range(10)],type='value',default=None,label="What was the correct prediction?")
+                gr.Markdown('Please wait a while after you press `Flag`. It takes time.')
                 flag_btn = gr.Button("Flag")
                 output_result = gr.outputs.HTML()
                 flag_btn.click(flag,inputs=[image_input,number_dropdown,adversarial_number],outputs=[output_result,adversarial_number])
             with gr.TabItem('Dashboard') as dashboard:
+                get_stat = gr.Button(f'{GET_STATISTICS_MESSAGE}')
+                notification = gr.HTML(f"""<div style="color: green">
+                                        <p> ⌛ Click `{GET_STATISTICS_MESSAGE}` to generate statistics... </p>
                                         </div>
                                     """)
                 gr.Markdown(DASHBOARD_EXPLANATION_TEST)
                 test_results_all=gr.Plot(type="matplotlib")
+            #dashboard.select(get_statistics,inputs=[],outputs=[stat_adv_image,test_results,notification,stats])
+            get_stat.click(get_statistics,inputs=[],outputs=[stat_adv_image,test_results,test_results_all,notification,stats])

data_mnist CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~ed62a26e764902f519ff43df850842e07dfe2cc0~~


1	+ Subproject commit 0d5120c897f5b71d2f99b7fb2ef5dc28e3d7000d

utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ import json
 import hashlib
 import random
 import string
 import matplotlib.pyplot as plt
 TITLE = "# MNIST Adversarial: Try to fool this MNIST model"
@@ -25,7 +26,7 @@ MODEL_IS_WRONG = """
 DEFAULT_TEST_METRIC = "<html> Current test metric - Avg. loss: 1000, Accuracy: 30/1000 (30%) </html>"
 DASHBOARD_EXPLANATION="To test the effect of adversarial training on out-of-distribution data, we track the performance progress of the model on the [MNIST Corrupted test dataset](https://zenodo.org/record/3239543). We are using {TEST_PER_SAMPLE} samples per digit."
-DASHBOARD_EXPLANATION_TEST="Test accuracy on out-of-distribution data for all numbers."
 STATS_EXPLANATION = "Here is the distribution of the __{num_adv_samples}__ adversarial samples we've got. The dataset can be found [here](https://huggingface.co/datasets/chrisjay/mnist-adversarial-dataset)."
@@ -39,12 +40,16 @@ def read_json(file):
         return json.load(f)
 def read_json_lines(file):
-    with open(file,'r',encoding="utf8") as f:
-        lines = f.readlines()
-        data=[]
-        for l in lines:
-            data.append(json.loads(l))
-        return data
 def json_dump(thing):
@@ -63,11 +68,17 @@ def dump_json(thing,file):
         json.dump(thing,f)
-def plot_bar(value,name,x_name,y_name,title):
     fig, ax = plt.subplots(tight_layout=True)
     ax.set(xlabel=x_name, ylabel=y_name,title=title)
     ax.barh(name, value)
-    return ax.figure

 import hashlib
 import random
 import string
+import warnings
 import matplotlib.pyplot as plt
 TITLE = "# MNIST Adversarial: Try to fool this MNIST model"
 DEFAULT_TEST_METRIC = "<html> Current test metric - Avg. loss: 1000, Accuracy: 30/1000 (30%) </html>"
 DASHBOARD_EXPLANATION="To test the effect of adversarial training on out-of-distribution data, we track the performance progress of the model on the [MNIST Corrupted test dataset](https://zenodo.org/record/3239543). We are using {TEST_PER_SAMPLE} samples per digit."
+DASHBOARD_EXPLANATION_TEST="Test accuracy on out-of-distribution data for all numbers combined."
 STATS_EXPLANATION = "Here is the distribution of the __{num_adv_samples}__ adversarial samples we've got. The dataset can be found [here](https://huggingface.co/datasets/chrisjay/mnist-adversarial-dataset)."
         return json.load(f)
 def read_json_lines(file):
+    try:
+        with open(file,'r',encoding="utf8") as f:
+            lines = f.readlines()
+            data=[]
+            for l in lines:
+                data.append(json.loads(l))
+            return data
+    except Exception as err:
+        warnings.warn(f"{err}")
+        return None
 def json_dump(thing):
         json.dump(thing,f)
+def plot_bar(value,name,x_name,y_name,title,set_yticks=False,set_xticks=False):
     fig, ax = plt.subplots(tight_layout=True)
     ax.set(xlabel=x_name, ylabel=y_name,title=title)
+    if set_yticks:
+        ax.set_yticks(range(min(name), max(name)+1, 1))
+    if set_xticks:
+        ax.set_xticks(range(min(name), max(name)+1, 1))
     ax.barh(name, value)
+    return ax.figure