Spaces:
Runtime error
Runtime error
Commit
·
8d1a80c
1
Parent(s):
0750d7a
Add randomization to urls required to fetch all nearby pages
Browse files- If #required nearby pages < #available nearby pages, then randomize the urls in order to avoid biasing of results in one direction. Since the small circles are stacked in a hexagonal manner (and it stacks from south to north), not randomizing urls will result in cluttering of results in south with no result on north, in case # required pages < # available pages
- moved `fetch_url` to utils.py
- removed filtering based on distance on frontend side for nearby point rendering
- backend/utils.py +27 -1
- frontend/src/components/Map.js +1 -3
- main.py +19 -15
backend/utils.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
import math
|
|
|
|
|
|
|
| 2 |
def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=10):
|
| 3 |
"""
|
| 4 |
Generate a list of centers of small circles (radius=10km) needed to cover a larger circle.
|
|
@@ -40,4 +42,28 @@ def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=1
|
|
| 40 |
lon = center_lon + delta_lon
|
| 41 |
results.append((lat, lon))
|
| 42 |
|
| 43 |
-
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import math
|
| 2 |
+
import httpx
|
| 3 |
+
|
| 4 |
def generate_circle_centers(center_lat, center_lon, radius_km, small_radius_km=10):
|
| 5 |
"""
|
| 6 |
Generate a list of centers of small circles (radius=10km) needed to cover a larger circle.
|
|
|
|
| 42 |
lon = center_lon + delta_lon
|
| 43 |
results.append((lat, lon))
|
| 44 |
|
| 45 |
+
return results
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
async def fetch_url(client: httpx.AsyncClient, url: str):
|
| 50 |
+
"""
|
| 51 |
+
Fetch a URL asynchronously using httpx and return the response status and data.
|
| 52 |
+
This function is asynchrounously used to fetch multiple URLs in parallel when search radius > 10km.
|
| 53 |
+
Input:
|
| 54 |
+
- client: httpx.AsyncClient instance
|
| 55 |
+
- url: URL to fetch
|
| 56 |
+
Output:
|
| 57 |
+
- A dictionary with the URL, status code, and data if available.
|
| 58 |
+
- Data includes the JSON format of wiki geosearch response.
|
| 59 |
+
If an error occurs, return a dictionary with the URL and the error message.
|
| 60 |
+
"""
|
| 61 |
+
try:
|
| 62 |
+
response = await client.get(url, timeout=10.0)
|
| 63 |
+
return {
|
| 64 |
+
"url": url,
|
| 65 |
+
"status": response.status_code,
|
| 66 |
+
"data": response.json() if response.status_code == 200 else None,
|
| 67 |
+
}
|
| 68 |
+
except Exception as e:
|
| 69 |
+
return {"url": url, "error": str(e)}
|
frontend/src/components/Map.js
CHANGED
|
@@ -247,9 +247,7 @@ const Map = ( { onMapClick, searchQuery, contentType, setSearchQuery, setSubmitt
|
|
| 247 |
|
| 248 |
if (res.ok) {
|
| 249 |
const data = await res.json();
|
| 250 |
-
const markers = data.pages.
|
| 251 |
-
page => typeof page.dist === "number" && page.dist <= explorationRadius * 1000
|
| 252 |
-
).map(page => ({
|
| 253 |
position: [page.lat, page.lon],
|
| 254 |
title: page.title,
|
| 255 |
distance: page.dist
|
|
|
|
| 247 |
|
| 248 |
if (res.ok) {
|
| 249 |
const data = await res.json();
|
| 250 |
+
const markers = data.pages.map(page => ({
|
|
|
|
|
|
|
| 251 |
position: [page.lat, page.lon],
|
| 252 |
title: page.title,
|
| 253 |
distance: page.dist
|
main.py
CHANGED
|
@@ -8,7 +8,8 @@ import geopy.distance
|
|
| 8 |
from cachetools import TTLCache
|
| 9 |
import os
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
-
from
|
|
|
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
|
|
@@ -175,16 +176,7 @@ def get_geodistance(payload: Geodistance):
|
|
| 175 |
)
|
| 176 |
|
| 177 |
|
| 178 |
-
|
| 179 |
-
try:
|
| 180 |
-
response = await client.get(url, timeout=10.0)
|
| 181 |
-
return {
|
| 182 |
-
"url": url,
|
| 183 |
-
"status": response.status_code,
|
| 184 |
-
"data": response.json() if response.status_code == 200 else None,
|
| 185 |
-
}
|
| 186 |
-
except Exception as e:
|
| 187 |
-
return {"url": url, "error": str(e)}
|
| 188 |
|
| 189 |
@app.post("/wiki/nearby")
|
| 190 |
async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
@@ -217,7 +209,9 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
| 217 |
radius = payload.radius
|
| 218 |
limit = payload.limit
|
| 219 |
|
| 220 |
-
|
|
|
|
|
|
|
| 221 |
url = ("https://en.wikipedia.org/w/api.php"+"?action=query"
|
| 222 |
"&list=geosearch"
|
| 223 |
f"&gscoord={lat_center}|{lon_center}"
|
|
@@ -248,11 +242,14 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
| 248 |
content={"error": str(e)},
|
| 249 |
status_code=500
|
| 250 |
)
|
| 251 |
-
|
|
|
|
|
|
|
| 252 |
small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
|
| 253 |
all_pages = []
|
| 254 |
base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
|
| 255 |
-
urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=
|
|
|
|
| 256 |
|
| 257 |
print("URL Counts:", len(urls))
|
| 258 |
try:
|
|
@@ -263,18 +260,25 @@ async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
| 263 |
# print(results)
|
| 264 |
for result in results:
|
| 265 |
for unit in result.get("data", {}).get("query", {}).get("geosearch", []):
|
|
|
|
|
|
|
| 266 |
lat, lon = unit.get("lat"), unit.get("lon")
|
| 267 |
if lat is not None and lon is not None:
|
| 268 |
dist = int(geopy.distance.distance(
|
| 269 |
(lat_center, lon_center), (lat, lon)
|
| 270 |
).m)
|
| 271 |
-
print(dist)
|
| 272 |
else:
|
| 273 |
dist = None
|
| 274 |
|
|
|
|
|
|
|
|
|
|
| 275 |
unit_with_dist = {**unit, "dist": dist}
|
| 276 |
all_pages.append(unit_with_dist)
|
| 277 |
|
|
|
|
|
|
|
| 278 |
return JSONResponse(
|
| 279 |
content={
|
| 280 |
"pages": all_pages,
|
|
|
|
| 8 |
from cachetools import TTLCache
|
| 9 |
import os
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
+
from random import shuffle
|
| 12 |
+
from backend.utils import generate_circle_centers, fetch_url
|
| 13 |
|
| 14 |
load_dotenv()
|
| 15 |
|
|
|
|
| 176 |
)
|
| 177 |
|
| 178 |
|
| 179 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
@app.post("/wiki/nearby")
|
| 182 |
async def get_nearby_wiki_pages(payload: NearbyWikiPage):
|
|
|
|
| 209 |
radius = payload.radius
|
| 210 |
limit = payload.limit
|
| 211 |
|
| 212 |
+
wiki_geosearch_radius_limit_meters = 10000 # Wikipedia API limit for geosearch radius in meters
|
| 213 |
+
|
| 214 |
+
if radius <= wiki_geosearch_radius_limit_meters:
|
| 215 |
url = ("https://en.wikipedia.org/w/api.php"+"?action=query"
|
| 216 |
"&list=geosearch"
|
| 217 |
f"&gscoord={lat_center}|{lon_center}"
|
|
|
|
| 242 |
content={"error": str(e)},
|
| 243 |
status_code=500
|
| 244 |
)
|
| 245 |
+
|
| 246 |
+
elif radius > wiki_geosearch_radius_limit_meters:
|
| 247 |
+
print(radius)
|
| 248 |
small_circle_centers = generate_circle_centers(lat_center, lon_center, radius / 1000, small_radius_km=10)
|
| 249 |
all_pages = []
|
| 250 |
base_url = "https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gscoord={lat}|{lon}&gsradius={small_radius_km}&gslimit={page_limit}&format=json"
|
| 251 |
+
urls = [base_url.format(lat=center[0], lon=center[1], small_radius_km=wiki_geosearch_radius_limit_meters, page_limit=100) for center in small_circle_centers]
|
| 252 |
+
shuffle(urls) # If # available pages > # requested pages by user, randomize the results to avoid clustering around a single direction.
|
| 253 |
|
| 254 |
print("URL Counts:", len(urls))
|
| 255 |
try:
|
|
|
|
| 260 |
# print(results)
|
| 261 |
for result in results:
|
| 262 |
for unit in result.get("data", {}).get("query", {}).get("geosearch", []):
|
| 263 |
+
if len(all_pages) >= limit:
|
| 264 |
+
break
|
| 265 |
lat, lon = unit.get("lat"), unit.get("lon")
|
| 266 |
if lat is not None and lon is not None:
|
| 267 |
dist = int(geopy.distance.distance(
|
| 268 |
(lat_center, lon_center), (lat, lon)
|
| 269 |
).m)
|
| 270 |
+
# print(dist)
|
| 271 |
else:
|
| 272 |
dist = None
|
| 273 |
|
| 274 |
+
if (not dist) or (dist and dist > radius):
|
| 275 |
+
continue
|
| 276 |
+
|
| 277 |
unit_with_dist = {**unit, "dist": dist}
|
| 278 |
all_pages.append(unit_with_dist)
|
| 279 |
|
| 280 |
+
# print(all_pages)
|
| 281 |
+
|
| 282 |
return JSONResponse(
|
| 283 |
content={
|
| 284 |
"pages": all_pages,
|