Spaces:

jrheiner
/

thesis-demo

Runtime error

App Files Files Community

Jonas Rheiner commited on Aug 16, 2024

Commit

e20beac

1 Parent(s): 8b18a0c

Reformat

Browse files

Files changed (1) hide show

app.py +299 -126

app.py CHANGED Viewed

@@ -18,52 +18,158 @@ device = "cuda" if CUDA_AVAILABLE else "cpu"
 print(f"count={torch.cuda.device_count()}")
 print(f"current={torch.cuda.get_device_name(torch.cuda.current_device())}")
-continent_model = CLIPModel.from_pretrained("jrheiner/thesis-clip-geoloc-continent", token=os.getenv("token"))
-country_model = CLIPModel.from_pretrained("jrheiner/thesis-clip-geoloc-country", token=os.getenv("token"))
-processor = CLIPProcessor.from_pretrained("jrheiner/thesis-clip-geoloc-continent", token=os.getenv("token"))
 continent_model = continent_model.to(device)
 country_model = country_model.to(device)
-continents = ["Africa", "Asia", "Europe",
-              "North America", "Oceania", "South America"]
 countries_per_continent = {
     "Africa": [
-        "Botswana", "Eswatini", "Ghana", "Kenya", "Lesotho", "Nigeria", "Senegal",
-        "South Africa", "Rwanda", "Uganda", "Tanzania", "Madagascar", "Djibouti",
-        "Mali", "Libya", "Morocco", "Somalia", "Tunisia", "Egypt", "Réunion"
     ],
     "Asia": [
-        "Bangladesh", "Bhutan", "Cambodia", "China", "India", "Indonesia", "Israel",
-        "Japan", "Jordan", "Kyrgyzstan", "Laos", "Malaysia", "Mongolia", "Nepal",
-        "Palestine", "Philippines", "Singapore", "South Korea", "Sri Lanka",
-        "Taiwan", "Thailand", "United Arab Emirates", "Vietnam", "Afghanistan",
-        "Azerbaijan", "Cyprus", "Iran", "Syria", "Tajikistan", "Turkey", "Russia",
-        "Pakistan", "Hong Kong"
     ],
     "Europe": [
-        "Albania", "Andorra", "Austria", "Belgium", "Bulgaria", "Croatia", "Czechia",
-        "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary",
-        "Iceland", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg",
-        "Montenegro", "Netherlands", "North Macedonia", "Norway", "Poland",
-        "Portugal", "Romania", "Russia", "Serbia", "Slovakia", "Slovenia", "Spain",
-        "Sweden", "Switzerland", "Ukraine", "United Kingdom", "Bosnia and Herzegovina",
-        "Cyprus", "Turkey", "Greenland", "Faroe Islands"
     ],
     "North America": [
-        "Canada", "Dominican Republic", "Guatemala", "Mexico", "United States",
-        "Bahamas", "Cuba", "Panama", "Puerto Rico", "Bermuda", "Greenland"
     ],
     "Oceania": [
-        "Australia", "New Zealand", "Fiji", "Papua New Guinea", "Solomon Islands", "Vanuatu"
     ],
     "South America": [
-        "Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Ecuador", "Paraguay",
-        "Peru", "Uruguay"
-    ]
 }
-countries = list(set(itertools.chain.from_iterable(
-    countries_per_continent.values())))
 country_to_center_coords = {
     "Indonesia": (-2.4833826, 117.8902853),
@@ -181,7 +287,7 @@ country_to_center_coords = {
     "Djibouti": (11.8145966, 42.8453061),
     "Senegal": (14.4750607, -14.4529612),
     "Bermuda": (32.3040273, -64.7563086),
-    "United States": (39.7837304, -100.445882)
 }
 INTIAL_VERSUS_IMAGE = "versus_images/Europe_Germany_49.069183_10.319444_im2gps3k.jpg"
@@ -191,29 +297,35 @@ INITAL_VERSUS_STATE = {
     "country": INTIAL_VERSUS_IMAGE.split("/")[-1].split("_")[1],
     "lat": INTIAL_VERSUS_IMAGE.split("/")[-1].split("_")[2],
     "lon": INTIAL_VERSUS_IMAGE.split("/")[-1].split("_")[3],
-    "score": {
-        "HUMAN": 0,
-        "AI": 0
-    },
-    "idx": 0
 }
 def predict(input_img):
-    inputs = processor(text=[f"A photo from {
-                       geo}." for geo in continents], images=input_img, return_tensors="pt", padding=True)
     inputs = inputs.to(device)
     with torch.no_grad():
         outputs = continent_model(**inputs)
         logits_per_image = outputs.logits_per_image
         probs = logits_per_image.softmax(dim=-1)
         pred_id = probs.argmax().cpu().item()
-    continent_probs = {label: prob for label,
-                       prob in zip(continents, probs.tolist()[0])}
     model_continent = continents[pred_id]
     predicted_continent_countries = countries_per_continent[model_continent]
-    inputs = processor(text=[f"A photo from {
-                       geo}." for geo in predicted_continent_countries], images=input_img, return_tensors="pt", padding=True)
     inputs = inputs.to(device)
     with torch.no_grad():
         outputs = country_model(**inputs)
@@ -221,26 +333,37 @@ def predict(input_img):
         probs = logits_per_image.softmax(dim=-1)
         pred_id = probs.argmax().cpu().item()
     model_country = predicted_continent_countries[pred_id]
-    country_probs = {label: prob for label, prob in zip(
-        predicted_continent_countries, probs.tolist()[0])}
     hash = hashlib.sha1(np.asarray(input_img).data.tobytes()).hexdigest()
     metadata_block = gr.Accordion(visible=False)
     metadata_map = None
     if hash in EXAMPLE_METADATA.keys():
         model_result = ""
-        if model_continent == EXAMPLE_METADATA[hash]['continent'] and model_country == EXAMPLE_METADATA[hash]['country']:
             model_result = "The AI 🤖 correctly guessed continent and country  ✅ ✅."
-        elif model_continent == EXAMPLE_METADATA[hash]['continent']:
             model_result = "The AI 🤖 only guessed the correct continent ❌ ✅."
-        elif model_country == EXAMPLE_METADATA[hash]['country'] and model_continent != EXAMPLE_METADATA[hash]['continent']:
             model_result = "The AI 🤖 only guessed the correct country ✅ ❌."
         else:
             model_result = "The AI 🤖 failed to guess country and continent ❌ ❌."
-        metadata_block = gr.Accordion(visible=True, label=f"This photo was taken in {EXAMPLE_METADATA[hash]['country']}, {EXAMPLE_METADATA[hash]['continent']}.\n{model_result}")
         metadata_map = make_versus_map(None, model_country, EXAMPLE_METADATA[hash])
     return continent_probs, country_probs, metadata_block, metadata_map
 def make_versus_map(human_country, model_country, versus_state):
     if human_country:
         human_coordinates = country_to_center_coords[human_country]
@@ -248,64 +371,66 @@ def make_versus_map(human_country, model_country, versus_state):
         human_coordinates = (None, None)
     model_coordinates = country_to_center_coords[model_country]
     fig = go.Figure()
-    fig.add_trace(go.Scattermapbox(
-        lon=[versus_state["lon"]],
-        lat=[versus_state["lat"]],
-        text=[f"📷 Photo taken in {versus_state['country']}, {
-            versus_state['continent']}"],
-        mode='markers',
-        hoverinfo='text',
-        marker=dict(size=14, color='#0C5DA5'),
-        showlegend=True,
-        name="📷 Photo Location"
-    ))
-    if human_country == model_country:
-        fig.add_trace(go.Scattermapbox(
-            lat=[human_coordinates[0], model_coordinates[0]],
-            lon=[human_coordinates[1], model_coordinates[1]],
-            text=f"🧑 🤖 Human & AI guess {human_country}",
-            mode='markers',
-            hoverinfo='text',
-            marker=dict(size=14, color='#FF9500'),
             showlegend=True,
-            name="🧑 🤖 Human & AI Guess"
-        ))
     else:
         if human_country:
-            fig.add_trace(go.Scattermapbox(
-                lat=[human_coordinates[0]],
-                lon=[human_coordinates[1]],
-                text=[f"🧑 Human guesses {human_country}"],
-                mode='markers',
-                hoverinfo='text',
-                marker=dict(size=14, color='#FF9500'),
                 showlegend=True,
-                name="🧑 Human Guess"
-            ))
-        fig.add_trace(go.Scattermapbox(
-            lat=[model_coordinates[0]],
-            lon=[model_coordinates[1]],
-            text=[f"🤖 AI guesses {model_country}"],
-            mode='markers',
-            hoverinfo='text',
-            marker=dict(size=14, color='#474747'),
-            showlegend=True,
-            name="🤖 AI Guess"
-        ))
     fig.update_layout(
         mapbox=dict(
             style="carto-positron",
             center=dict(lat=float(versus_state["lat"]), lon=float(versus_state["lon"])),
-            zoom=2
         ),
         margin={"r": 0, "t": 0, "l": 0, "b": 0},
-        legend=dict(
-            yanchor="bottom",
-            y=0.01,
-            xanchor="left",
-            x=0.01
-        )
     )
     return fig
@@ -323,12 +448,13 @@ def versus_mode_inputs(input_img, human_continent, human_country, versus_state):
         human_points += 1
     else:
         continent_result = "❌"
-    human_result = f"The photo is from **{versus_state['country']}** {
-        country_result} in **{versus_state['continent']}** {continent_result}"
-    human_score_update = f"+{human_points} points" if human_points > 0 else "0 Points..."
-    versus_state['score']['HUMAN'] += human_points
-    continent_probs, country_probs, _,_ = predict(input_img)
     model_country = max(country_probs, key=country_probs.get)
     model_continent = max(continent_probs, key=continent_probs.get)
     if model_country == versus_state["country"]:
@@ -341,11 +467,16 @@ def versus_mode_inputs(input_img, human_continent, human_country, versus_state):
         model_points += 1
     else:
         model_continent_result = "❌"
-    model_score_update = f"+{model_points} points" if model_points > 0 else "0 Points... The model was completely wrong, it seems the world is not doomed yet."
-    versus_state['score']['AI'] += model_points
     map = make_versus_map(human_country, model_country, versus_state)
-    return f"""
 ## {human_result}
 ### The AI 🤖 thinks this photo is from **{model_country}** {model_country_result} in **{model_continent}** {model_continent_result}
@@ -353,7 +484,12 @@ def versus_mode_inputs(input_img, human_continent, human_country, versus_state):
 🤖 {model_score_update}
 ### Score     🧑 {versus_state['score']['HUMAN']} : {versus_state['score']['AI']} 🤖
-""", continent_probs, country_probs, map, versus_state
 def get_example_images(dir):
@@ -393,45 +529,65 @@ for img_path in example_images:
 demo = gr.Blocks(title="Thesis Demo")
 with demo:
-    gr.HTML("""
 <h1 style="text-align: center; margin-bottom: 1rem">Image Geolocation Thesis Demo</h1>
 <h3> This Demo showcases the developed models and allows interacting with the optimized prototype.</h3>
 <p>Try the <b>"Image Geolocation Demo"</b> tab with your own images or with one of the examples. For all example image the ground truth is available and will be displayed together with the model predictions.</p>
 <p>In the  <b>"Versus Mode"</b> tab to play against the AI, guessing the country and continent where images where taken. Images in the versus mode are from the <a href="http://graphics.cs.cmu.edu/projects/im2gps/"><code>Im2GPS</code></a> and <a href="https://arxiv.org/abs/1705.04838"><code>Im2GPS3k</code></a> geolocation literature benchmarks. Can you beat the AI?
-""")
-    with gr.Accordion(label="The demo currently encompasses 116 countries from 6 continents 🌍", open=False):
-        gr.Code(json.dumps(countries_per_continent, indent=2, ensure_ascii=False), label="countries_per_continent.json", language="json", interactive=False)
     with gr.Tab("Image Geolocation Demo"):
         with gr.Row():
             with gr.Column():
-                image = gr.Image(label="Image", type="pil",
-                                 sources=["upload", "clipboard"])
                 predict_btn = gr.Button("Predict")
                 example_images = get_example_images("kerger-test-images")
                 # example_images.extend(get_example_images("versus_images"))
-                gr.Examples(examples=example_images,
-                            inputs=image, examples_per_page=24)
             with gr.Column():
                 with gr.Accordion(visible=False) as metadata_block:
                     map = gr.Plot(label="Locations")
                 with gr.Group():
                     continents_label = gr.Label(label="Continents")
-                    country_label = gr.Label(
-                        num_top_classes=5, label="Top countries")
-    predict_btn.click(predict, inputs=image, outputs=[
-                      continents_label, country_label, metadata_block, map])
     with gr.Tab("Versus Mode"):
         versus_state = gr.State(value=INITAL_VERSUS_STATE)
         with gr.Row():
             with gr.Column():
-                versus_image = gr.Image(
-                    INITAL_VERSUS_STATE["image"], interactive=False)
                 continent_selection = gr.Radio(
-                    continents, label="Continents", info="Where was this image taken? (1 Point)")
-                country_selection = gr.Dropdown(countries, label="Countries", info="Can you guess the exact country? (2 Points)"),
                 with gr.Row():
                     next_img_btn = gr.Button("Try new image")
                     versus_btn = gr.Button("Submit guess")
@@ -443,11 +599,28 @@ with demo:
                     with gr.Group():
                         continents_label = gr.Label(label="Continents")
                         country_label = gr.Label(
-                            num_top_classes=5, label="Top countries")
-        next_img_btn.click(next_versus_image, inputs=[versus_state], outputs=[
-                           versus_image, versus_state, continent_selection, country_selection[0]])
-        versus_btn.click(versus_mode_inputs, inputs=[versus_image, continent_selection, country_selection[0], versus_state], outputs=[
-                         versus_output, continents_label, country_label, map, versus_state])
 if __name__ == "__main__":

 print(f"count={torch.cuda.device_count()}")
 print(f"current={torch.cuda.get_device_name(torch.cuda.current_device())}")
+continent_model = CLIPModel.from_pretrained(
+    "jrheiner/thesis-clip-geoloc-continent",
+    token=os.getenv("token"),
+)
+country_model = CLIPModel.from_pretrained(
+    "jrheiner/thesis-clip-geoloc-country",
+    token=os.getenv("token"),
+)
+processor = CLIPProcessor.from_pretrained(
+    "jrheiner/thesis-clip-geoloc-continent",
+    token=os.getenv("token"),
+)
 continent_model = continent_model.to(device)
 country_model = country_model.to(device)
+continents = ["Africa", "Asia", "Europe", "North America", "Oceania", "South America"]
 countries_per_continent = {
     "Africa": [
+        "Botswana",
+        "Eswatini",
+        "Ghana",
+        "Kenya",
+        "Lesotho",
+        "Nigeria",
+        "Senegal",
+        "South Africa",
+        "Rwanda",
+        "Uganda",
+        "Tanzania",
+        "Madagascar",
+        "Djibouti",
+        "Mali",
+        "Libya",
+        "Morocco",
+        "Somalia",
+        "Tunisia",
+        "Egypt",
+        "Réunion",
     ],
     "Asia": [
+        "Bangladesh",
+        "Bhutan",
+        "Cambodia",
+        "China",
+        "India",
+        "Indonesia",
+        "Israel",
+        "Japan",
+        "Jordan",
+        "Kyrgyzstan",
+        "Laos",
+        "Malaysia",
+        "Mongolia",
+        "Nepal",
+        "Palestine",
+        "Philippines",
+        "Singapore",
+        "South Korea",
+        "Sri Lanka",
+        "Taiwan",
+        "Thailand",
+        "United Arab Emirates",
+        "Vietnam",
+        "Afghanistan",
+        "Azerbaijan",
+        "Cyprus",
+        "Iran",
+        "Syria",
+        "Tajikistan",
+        "Turkey",
+        "Russia",
+        "Pakistan",
+        "Hong Kong",
     ],
     "Europe": [
+        "Albania",
+        "Andorra",
+        "Austria",
+        "Belgium",
+        "Bulgaria",
+        "Croatia",
+        "Czechia",
+        "Denmark",
+        "Estonia",
+        "Finland",
+        "France",
+        "Germany",
+        "Greece",
+        "Hungary",
+        "Iceland",
+        "Ireland",
+        "Italy",
+        "Latvia",
+        "Lithuania",
+        "Luxembourg",
+        "Montenegro",
+        "Netherlands",
+        "North Macedonia",
+        "Norway",
+        "Poland",
+        "Portugal",
+        "Romania",
+        "Russia",
+        "Serbia",
+        "Slovakia",
+        "Slovenia",
+        "Spain",
+        "Sweden",
+        "Switzerland",
+        "Ukraine",
+        "United Kingdom",
+        "Bosnia and Herzegovina",
+        "Cyprus",
+        "Turkey",
+        "Greenland",
+        "Faroe Islands",
     ],
     "North America": [
+        "Canada",
+        "Dominican Republic",
+        "Guatemala",
+        "Mexico",
+        "United States",
+        "Bahamas",
+        "Cuba",
+        "Panama",
+        "Puerto Rico",
+        "Bermuda",
+        "Greenland",
     ],
     "Oceania": [
+        "Australia",
+        "New Zealand",
+        "Fiji",
+        "Papua New Guinea",
+        "Solomon Islands",
+        "Vanuatu",
     ],
     "South America": [
+        "Argentina",
+        "Bolivia",
+        "Brazil",
+        "Chile",
+        "Colombia",
+        "Ecuador",
+        "Paraguay",
+        "Peru",
+        "Uruguay",
+    ],
 }
+countries = list(set(itertools.chain.from_iterable(countries_per_continent.values())))
 country_to_center_coords = {
     "Indonesia": (-2.4833826, 117.8902853),
     "Djibouti": (11.8145966, 42.8453061),
     "Senegal": (14.4750607, -14.4529612),
     "Bermuda": (32.3040273, -64.7563086),
+    "United States": (39.7837304, -100.445882),
 }
 INTIAL_VERSUS_IMAGE = "versus_images/Europe_Germany_49.069183_10.319444_im2gps3k.jpg"
     "country": INTIAL_VERSUS_IMAGE.split("/")[-1].split("_")[1],
     "lat": INTIAL_VERSUS_IMAGE.split("/")[-1].split("_")[2],
     "lon": INTIAL_VERSUS_IMAGE.split("/")[-1].split("_")[3],
+    "score": {"HUMAN": 0, "AI": 0},
+    "idx": 0,
 }
 def predict(input_img):
+    inputs = processor(
+        text=[f"A photo from {geo}." for geo in continents],
+        images=input_img,
+        return_tensors="pt",
+        padding=True,
+    )
     inputs = inputs.to(device)
     with torch.no_grad():
         outputs = continent_model(**inputs)
         logits_per_image = outputs.logits_per_image
         probs = logits_per_image.softmax(dim=-1)
         pred_id = probs.argmax().cpu().item()
+    continent_probs = {
+        label: prob for label, prob in zip(continents, probs.tolist()[0])
+    }
     model_continent = continents[pred_id]
     predicted_continent_countries = countries_per_continent[model_continent]
+    inputs = processor(
+        text=[f"A photo from {geo}." for geo in predicted_continent_countries],
+        images=input_img,
+        return_tensors="pt",
+        padding=True,
+    )
     inputs = inputs.to(device)
     with torch.no_grad():
         outputs = country_model(**inputs)
         probs = logits_per_image.softmax(dim=-1)
         pred_id = probs.argmax().cpu().item()
     model_country = predicted_continent_countries[pred_id]
+    country_probs = {
+        label: prob for label, prob in zip(predicted_continent_countries, probs.tolist()[0])
+    }
     hash = hashlib.sha1(np.asarray(input_img).data.tobytes()).hexdigest()
     metadata_block = gr.Accordion(visible=False)
     metadata_map = None
     if hash in EXAMPLE_METADATA.keys():
         model_result = ""
+        if (
+            model_continent == EXAMPLE_METADATA[hash]["continent"]
+            and model_country == EXAMPLE_METADATA[hash]["country"]
+        ):
             model_result = "The AI 🤖 correctly guessed continent and country  ✅ ✅."
+        elif model_continent == EXAMPLE_METADATA[hash]["continent"]:
             model_result = "The AI 🤖 only guessed the correct continent ❌ ✅."
+        elif (
+            model_country == EXAMPLE_METADATA[hash]["country"]
+            and model_continent != EXAMPLE_METADATA[hash]["continent"]
+        ):
             model_result = "The AI 🤖 only guessed the correct country ✅ ❌."
         else:
             model_result = "The AI 🤖 failed to guess country and continent ❌ ❌."
+        metadata_block = gr.Accordion(
+            visible=True,
+            label=f"This photo was taken in {EXAMPLE_METADATA[hash]['country']}, {EXAMPLE_METADATA[hash]['continent']}.\n{model_result}",
+        )
         metadata_map = make_versus_map(None, model_country, EXAMPLE_METADATA[hash])
     return continent_probs, country_probs, metadata_block, metadata_map
 def make_versus_map(human_country, model_country, versus_state):
     if human_country:
         human_coordinates = country_to_center_coords[human_country]
         human_coordinates = (None, None)
     model_coordinates = country_to_center_coords[model_country]
     fig = go.Figure()
+    fig.add_trace(
+        go.Scattermapbox(
+            lon=[versus_state["lon"]],
+            lat=[versus_state["lat"]],
+            text=[f"📷 Photo taken in {versus_state['country']}, {versus_state['continent']}"],
+            mode="markers",
+            hoverinfo="text",
+            marker=dict(size=14, color="#0C5DA5"),
             showlegend=True,
+            name="📷 Photo Location",
+        )
+    )
+    if human_country == model_country:
+        fig.add_trace(
+            go.Scattermapbox(
+                lat=[human_coordinates[0], model_coordinates[0]],
+                lon=[human_coordinates[1], model_coordinates[1]],
+                text=f"🧑 🤖 Human & AI guess {human_country}",
+                mode="markers",
+                hoverinfo="text",
+                marker=dict(size=14, color="#FF9500"),
+                showlegend=True,
+                name="🧑 🤖 Human & AI Guess",
+            )
+        )
     else:
         if human_country:
+            fig.add_trace(
+                go.Scattermapbox(
+                    lat=[human_coordinates[0]],
+                    lon=[human_coordinates[1]],
+                    text=[f"🧑 Human guesses {human_country}"],
+                    mode="markers",
+                    hoverinfo="text",
+                    marker=dict(size=14, color="#FF9500"),
+                    showlegend=True,
+                    name="🧑 Human Guess",
+                )
+            )
+        fig.add_trace(
+            go.Scattermapbox(
+                lat=[model_coordinates[0]],
+                lon=[model_coordinates[1]],
+                text=[f"🤖 AI guesses {model_country}"],
+                mode="markers",
+                hoverinfo="text",
+                marker=dict(size=14, color="#474747"),
                 showlegend=True,
+                name="🤖 AI Guess",
+            )
+        )
     fig.update_layout(
         mapbox=dict(
             style="carto-positron",
             center=dict(lat=float(versus_state["lat"]), lon=float(versus_state["lon"])),
+            zoom=2,
         ),
         margin={"r": 0, "t": 0, "l": 0, "b": 0},
+        legend=dict(yanchor="bottom", y=0.01, xanchor="left", x=0.01),
     )
     return fig
         human_points += 1
     else:
         continent_result = "❌"
+    human_result = f"The photo is from **{versus_state['country']}** {country_result} in **{versus_state['continent']}** {continent_result}"
+    human_score_update = (
+        f"+{human_points} points" if human_points > 0 else "0 Points..."
+    )
+    versus_state["score"]["HUMAN"] += human_points
+    continent_probs, country_probs, _, _ = predict(input_img)
     model_country = max(country_probs, key=country_probs.get)
     model_continent = max(continent_probs, key=continent_probs.get)
     if model_country == versus_state["country"]:
         model_points += 1
     else:
         model_continent_result = "❌"
+    model_score_update = (
+        f"+{model_points} points"
+        if model_points > 0
+        else "0 Points... The model was completely wrong, it seems the world is not doomed yet."
+    )
+    versus_state["score"]["AI"] += model_points
     map = make_versus_map(human_country, model_country, versus_state)
+    return (
+        f"""
 ## {human_result}
 ### The AI 🤖 thinks this photo is from **{model_country}** {model_country_result} in **{model_continent}** {model_continent_result}
 🤖 {model_score_update}
 ### Score     🧑 {versus_state['score']['HUMAN']} : {versus_state['score']['AI']} 🤖
+""",
+        continent_probs,
+        country_probs,
+        map,
+        versus_state,
+    )
 def get_example_images(dir):
 demo = gr.Blocks(title="Thesis Demo")
 with demo:
+    gr.HTML(
+        """
 <h1 style="text-align: center; margin-bottom: 1rem">Image Geolocation Thesis Demo</h1>
 <h3> This Demo showcases the developed models and allows interacting with the optimized prototype.</h3>
 <p>Try the <b>"Image Geolocation Demo"</b> tab with your own images or with one of the examples. For all example image the ground truth is available and will be displayed together with the model predictions.</p>
 <p>In the  <b>"Versus Mode"</b> tab to play against the AI, guessing the country and continent where images where taken. Images in the versus mode are from the <a href="http://graphics.cs.cmu.edu/projects/im2gps/"><code>Im2GPS</code></a> and <a href="https://arxiv.org/abs/1705.04838"><code>Im2GPS3k</code></a> geolocation literature benchmarks. Can you beat the AI?
+"""
+    )
+    with gr.Accordion(
+        label="The demo currently encompasses 116 countries from 6 continents 🌍",
+        open=False,
+    ):
+        gr.Code(
+            json.dumps(countries_per_continent, indent=2, ensure_ascii=False),
+            label="countries_per_continent.json",
+            language="json",
+            interactive=False,
+        )
     with gr.Tab("Image Geolocation Demo"):
         with gr.Row():
             with gr.Column():
+                image = gr.Image(
+                    label="Image", type="pil", sources=["upload", "clipboard"]
+                )
                 predict_btn = gr.Button("Predict")
                 example_images = get_example_images("kerger-test-images")
                 # example_images.extend(get_example_images("versus_images"))
+                gr.Examples(examples=example_images, inputs=image, examples_per_page=24)
             with gr.Column():
                 with gr.Accordion(visible=False) as metadata_block:
                     map = gr.Plot(label="Locations")
                 with gr.Group():
                     continents_label = gr.Label(label="Continents")
+                    country_label = gr.Label(num_top_classes=5, label="Top countries")
+    predict_btn.click(
+        predict,
+        inputs=image,
+        outputs=[continents_label, country_label, metadata_block, map],
+    )
     with gr.Tab("Versus Mode"):
         versus_state = gr.State(value=INITAL_VERSUS_STATE)
         with gr.Row():
             with gr.Column():
+                versus_image = gr.Image(INITAL_VERSUS_STATE["image"], interactive=False)
                 continent_selection = gr.Radio(
+                    continents,
+                    label="Continents",
+                    info="Where was this image taken? (1 Point)",
+                )
+                country_selection = (
+                    gr.Dropdown(
+                        countries,
+                        label="Countries",
+                        info="Can you guess the exact country? (2 Points)",
+                    ),
+                )
                 with gr.Row():
                     next_img_btn = gr.Button("Try new image")
                     versus_btn = gr.Button("Submit guess")
                     with gr.Group():
                         continents_label = gr.Label(label="Continents")
                         country_label = gr.Label(
+                            num_top_classes=5, label="Top countries"
+                        )
+        next_img_btn.click(
+            next_versus_image,
+            inputs=[versus_state],
+            outputs=[
+                versus_image,
+                versus_state,
+                continent_selection,
+                country_selection[0],
+            ],
+        )
+        versus_btn.click(
+            versus_mode_inputs,
+            inputs=[
+                versus_image,
+                continent_selection,
+                country_selection[0],
+                versus_state,
+            ],
+            outputs=[versus_output, continents_label, country_label, map, versus_state],
+        )
 if __name__ == "__main__":