Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,10 +5,38 @@ import pandas as pd
|
|
| 5 |
import json
|
| 6 |
|
| 7 |
async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
|
| 8 |
-
URL = f"https://
|
| 9 |
async with httpx.AsyncClient() as session:
|
| 10 |
response = await session.get(URL)
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
async def get_valid_datasets() -> List[str]:
|
| 14 |
URL = f"https://huggingface.co/api/datasets"
|
|
@@ -20,13 +48,6 @@ async def get_valid_datasets() -> List[str]:
|
|
| 20 |
datasets = [] # Set a default value if the response is not in the expected format
|
| 21 |
return datasets
|
| 22 |
|
| 23 |
-
async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
|
| 24 |
-
URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
|
| 25 |
-
async with httpx.AsyncClient() as session:
|
| 26 |
-
response = await session.get(URL)
|
| 27 |
-
print(URL)
|
| 28 |
-
gr.Markdown(URL)
|
| 29 |
-
return response.json()
|
| 30 |
|
| 31 |
def get_df_from_rows(api_output):
|
| 32 |
dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
|
|
@@ -53,11 +74,6 @@ async def update_dataset(split_name: str, config_name: str, dataset_name: str):
|
|
| 53 |
df = get_df_from_rows(rows)
|
| 54 |
return df
|
| 55 |
|
| 56 |
-
# Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
|
| 57 |
-
async def update_URL(dataset: str, config: str, split: str) -> str:
|
| 58 |
-
URL = f"https://datasets-server.huggingface.co/first-rows?dataset={dataset}&config={config}&split={split}"
|
| 59 |
-
URL = f"https://huggingface.co/datasets/{split}"
|
| 60 |
-
return (URL)
|
| 61 |
|
| 62 |
async def openurl(URL: str) -> str:
|
| 63 |
html = f"<a href={URL} target=_blank>{URL}</a>"
|
|
|
|
| 5 |
import json
|
| 6 |
|
| 7 |
async def get_splits(dataset_name: str) -> Dict[str, List[Dict]]:
|
| 8 |
+
URL = f"https://huggingface.co/api/datasets/{dataset_name}"
|
| 9 |
async with httpx.AsyncClient() as session:
|
| 10 |
response = await session.get(URL)
|
| 11 |
+
dataset_info = response.json()
|
| 12 |
+
return {
|
| 13 |
+
"splits": [
|
| 14 |
+
{"split": split_name, "config": config_name}
|
| 15 |
+
for config_name, config_info in dataset_info.get("config", {}).items()
|
| 16 |
+
for split_name in config_info.get("splits", [])
|
| 17 |
+
]
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
async def get_first_rows(dataset: str, config: str, split: str) -> Dict[str, Dict[str, List[Dict]]]:
|
| 21 |
+
URL = f"https://huggingface.co/datasets/{dataset}/resolve/main/dataset_info.json"
|
| 22 |
+
async with httpx.AsyncClient() as session:
|
| 23 |
+
response = await session.get(URL)
|
| 24 |
+
dataset_info = response.json()
|
| 25 |
+
split_info = dataset_info["splits"][split]
|
| 26 |
+
first_rows = {
|
| 27 |
+
"rows": [
|
| 28 |
+
{"row": row} for row in split_info["examples"][:10]
|
| 29 |
+
]
|
| 30 |
+
}
|
| 31 |
+
return first_rows
|
| 32 |
+
|
| 33 |
+
# Guido von Roissum: https://www.youtube.com/watch?v=-DVyjdw4t9I
|
| 34 |
+
async def update_URL(dataset: str, config: str, split: str) -> str:
|
| 35 |
+
URL = f"https://huggingface.co/datasets/{dataset}/tree/main/{config}/{split}"
|
| 36 |
+
return URL
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
|
| 41 |
async def get_valid_datasets() -> List[str]:
|
| 42 |
URL = f"https://huggingface.co/api/datasets"
|
|
|
|
| 48 |
datasets = [] # Set a default value if the response is not in the expected format
|
| 49 |
return datasets
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def get_df_from_rows(api_output):
|
| 53 |
dfFromSort = pd.DataFrame([row["row"] for row in api_output["rows"]])
|
|
|
|
| 74 |
df = get_df_from_rows(rows)
|
| 75 |
return df
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
async def openurl(URL: str) -> str:
|
| 79 |
html = f"<a href={URL} target=_blank>{URL}</a>"
|