Spaces:
Runtime error
Runtime error
Commit
路
1b9b717
1
Parent(s):
e1b455b
add new dataset
Browse files
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
app.py
CHANGED
|
@@ -35,7 +35,7 @@ datasets_map = {
|
|
| 35 |
{
|
| 36 |
"display_name": "Dokumentacja - QA",
|
| 37 |
"description": "Zbi贸r pyta艅 i odpowiedzi do zanonimizowanej dokumentacji medycznej.",
|
| 38 |
-
"primary_column": "
|
| 39 |
},
|
| 40 |
"wikipedia":
|
| 41 |
{
|
|
@@ -49,6 +49,23 @@ datasets_map = {
|
|
| 49 |
"description": "Zbi贸r pyta艅 i odpowiedzi na podstawie ulotek medycznych.",
|
| 50 |
"primary_column": "question",
|
| 51 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
}
|
| 53 |
|
| 54 |
|
|
@@ -108,7 +125,7 @@ def filter_splits(dataset: Dict[str, Dataset], split: str) -> Dict[str, Dataset]
|
|
| 108 |
def generate_wordcloud(dataset_name, split):
|
| 109 |
dataset_name = reverse_dataset_names_map.get(dataset_name, dataset_name)
|
| 110 |
|
| 111 |
-
dataset: Dataset = load_dataset(BASE_DATASET, dataset_name, split=f"{split}[:
|
| 112 |
|
| 113 |
primary_column = datasets_map[dataset_name]["primary_column"]
|
| 114 |
|
|
|
|
| 35 |
{
|
| 36 |
"display_name": "Dokumentacja - QA",
|
| 37 |
"description": "Zbi贸r pyta艅 i odpowiedzi do zanonimizowanej dokumentacji medycznej.",
|
| 38 |
+
"primary_column": "context",
|
| 39 |
},
|
| 40 |
"wikipedia":
|
| 41 |
{
|
|
|
|
| 49 |
"description": "Zbi贸r pyta艅 i odpowiedzi na podstawie ulotek medycznych.",
|
| 50 |
"primary_column": "question",
|
| 51 |
},
|
| 52 |
+
"polish_medinstruct":
|
| 53 |
+
{
|
| 54 |
+
"display_name": "Instrukcje medyczne",
|
| 55 |
+
"description": "Przetumaczony zbi贸r medinstruct",
|
| 56 |
+
"primary_column": "output",
|
| 57 |
+
},
|
| 58 |
+
"kor_epikryzy_summarization": {
|
| 59 |
+
"display_name": "Dokumentacja - Sumaryzacja",
|
| 60 |
+
"description": "Zbi贸r streszcze艅 zanonimizowanej dokumentacji medycznej.",
|
| 61 |
+
"primary_column": "summary",
|
| 62 |
+
},
|
| 63 |
+
"znany_lekarz_multiturn":
|
| 64 |
+
{
|
| 65 |
+
"display_name": "Porady - wieloturnowe",
|
| 66 |
+
"description": "Zbi贸r pyta艅 i odpowiedzi odno艣nie medycyny z wieloma turami rozmowy.",
|
| 67 |
+
"primary_column": "multiturn",
|
| 68 |
+
},
|
| 69 |
}
|
| 70 |
|
| 71 |
|
|
|
|
| 125 |
def generate_wordcloud(dataset_name, split):
|
| 126 |
dataset_name = reverse_dataset_names_map.get(dataset_name, dataset_name)
|
| 127 |
|
| 128 |
+
dataset: Dataset = load_dataset(BASE_DATASET, dataset_name, split=f"{split}[:100]", token=read_key)
|
| 129 |
|
| 130 |
primary_column = datasets_map[dataset_name]["primary_column"]
|
| 131 |
|