File size: 4,263 Bytes
bff727e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
import polars as pl

# Paths or HF Hub URLs for Parquet files
RICH_PARQUET_PATH = "all_rich_dataset_cards.parquet"
MISSING_PARQUET_PATH = "all_minimal_dataset_cards.parquet"

ROWS_PER_PAGE = 50

# Lazy load datasets
lazy_rich = pl.scan_parquet(RICH_PARQUET_PATH)
lazy_missing = pl.scan_parquet(MISSING_PARQUET_PATH)

current_lazy_df = lazy_missing  # Default dataset

# Helper function to fetch a page
def get_page(lazy_df: pl.LazyFrame, page: int, column: str = None, query: str = ""):
    filtered_df = lazy_df
    if column and query:
        query_lower = query.lower().strip()
        # Case-insensitive search
        filtered_df = filtered_df.with_columns([
            pl.col(column).cast(pl.Utf8).str.to_lowercase().alias(column)
        ]).filter(pl.col(column).str.contains(query_lower, literal=False))
    start = page * ROWS_PER_PAGE
    page_df = filtered_df.slice(start, ROWS_PER_PAGE).collect().to_pandas()
    total_rows = filtered_df.collect().height
    total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1
    return page_df, total_pages

# Initialize first page
initial_df, total_pages = get_page(current_lazy_df, 0)
columns = list(initial_df.columns)

with gr.Blocks() as demo:
    gr.Markdown("## Dataset Insight Portal")

    # Dataset selection
    dataset_select = gr.Dropdown(
        choices=["DatasetCards rich in information", "DatasetCards missing information"],
        value="DatasetCards missing information",
        label="Select Dataset"
    )

    # Pagination controls
    with gr.Row():
        prev_btn = gr.Button("Previous", elem_id="small-btn")
        next_btn = gr.Button("Next", elem_id="small-btn")
        page_number = gr.Number(value=0, label="Page", precision=0)
        total_pages_display = gr.Label(value=f"Total Pages: {total_pages}")

    # Data table
    data_table = gr.Dataframe(
        value=initial_df, headers=columns, datatype="str",
        interactive=False, row_count=ROWS_PER_PAGE
    )

    # Column search
    with gr.Row():
        col_dropdown = gr.Dropdown(choices=columns, label="Column")
        search_text = gr.Textbox(label="Search")
        search_btn = gr.Button("Search", elem_id="small-btn")
        reset_btn = gr.Button("Reset", elem_id="small-btn")

    # --- Functions ---
    def load_dataset(dataset_choice):
        global current_lazy_df
        current_lazy_df = lazy_rich if dataset_choice == "DatasetCards rich in information" else lazy_missing
        initial_df, total_pages = get_page(current_lazy_df, 0)
        columns = list(initial_df.columns)
        return (
            gr.update(value=initial_df, headers=columns),
            f"Total Pages: {total_pages}",
            0,
            gr.update(choices=columns, value=columns[0])
        )

    def next_page_func(page, column, query):
        page += 1
        page_df, total_pages = get_page(current_lazy_df, page, column, query)
        if page >= total_pages:
            page = total_pages - 1
            page_df, total_pages = get_page(current_lazy_df, page, column, query)
        return page_df, f"Total Pages: {total_pages}", page

    def prev_page_func(page, column, query):
        page -= 1
        page = max(0, page)
        page_df, total_pages = get_page(current_lazy_df, page, column, query)
        return page_df, f"Total Pages: {total_pages}", page

    def search_func(column, query):
        page_df, total_pages = get_page(current_lazy_df, 0, column, query)
        return page_df, f"Total Pages: {total_pages}", 0

    def reset_func():
        page_df, total_pages = get_page(current_lazy_df, 0)
        return page_df, f"Total Pages: {total_pages}", 0

    # --- Event Listeners ---
    dataset_select.change(load_dataset, dataset_select, [data_table, total_pages_display, page_number, col_dropdown])
    next_btn.click(next_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number])
    prev_btn.click(prev_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number])
    search_btn.click(search_func, [col_dropdown, search_text], [data_table, total_pages_display, page_number])
    reset_btn.click(reset_func, [], [data_table, total_pages_display, page_number])

demo.launch()