| # import gradio as gr | |
| # import polars as pl | |
| # # Path for the combined Parquet file | |
| # COMBINED_PARQUET_PATH = "datasetcards.parquet" | |
| # ROWS_PER_PAGE = 50 | |
| # # Lazy load dataset | |
| # lazy_df = pl.scan_parquet(COMBINED_PARQUET_PATH) | |
| # lazy_df = lazy_df.sort( | |
| # by=["downloads", "last_modified"], | |
| # descending=[True, True] | |
| # ) | |
| # # Helper function to fetch a page | |
| # def get_page(lazy_df: pl.LazyFrame, page: int, column: str = None, query: str = ""): | |
| # filtered_df = lazy_df | |
| # if column and query: | |
| # query_lower = query.lower().strip() | |
| # filtered_df = filtered_df.with_columns([ | |
| # pl.col(column).cast(pl.Utf8).str.to_lowercase().alias(column) | |
| # ]).filter(pl.col(column).str.contains(query_lower, literal=False)) | |
| # start = page * ROWS_PER_PAGE | |
| # page_df = filtered_df.slice(start, ROWS_PER_PAGE).collect().to_pandas() | |
| # # Replace NaN/None with empty string for display | |
| # page_df = page_df.fillna("") | |
| # total_rows = filtered_df.collect().height | |
| # total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1 | |
| # return page_df, total_pages | |
| # # Initialize first page | |
| # initial_df, total_pages = get_page(lazy_df, 0) | |
| # columns = list(initial_df.columns) | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown("## Dataset Insight Portal") | |
| # gr.Markdown("This space allows you to explore the dataset of DatasetCards.<br>" | |
| # "You can navigate pages, search within columns, and inspect the dataset easily.<br>" | |
| # ) | |
| # # Pagination controls | |
| # with gr.Row(): | |
| # prev_btn = gr.Button("Previous", elem_id="small-btn") | |
| # next_btn = gr.Button("Next", elem_id="small-btn") | |
| # page_number = gr.Number(value=0, label="Page", precision=0) | |
| # total_pages_display = gr.Label(value=f"Total Pages: {total_pages}") | |
| # # Data table | |
| # data_table = gr.Dataframe( | |
| # value=initial_df, headers=columns, datatype="str", | |
| # interactive=False, row_count=ROWS_PER_PAGE | |
| # ) | |
| # # Column search | |
| # with gr.Row(): | |
| # col_dropdown = gr.Dropdown(choices=columns, label="Column") | |
| # search_text = gr.Textbox(label="Search") | |
| # search_btn = gr.Button("Search", elem_id="small-btn") | |
| # reset_btn = gr.Button("Reset", elem_id="small-btn") | |
| # # --- Functions --- | |
| # current_lazy_df = lazy_df # single dataset | |
| # def next_page_func(page, column, query): | |
| # page += 1 | |
| # page_df, total_pages = get_page(current_lazy_df, page, column, query) | |
| # if page >= total_pages: | |
| # page = total_pages - 1 | |
| # page_df, total_pages = get_page(current_lazy_df, page, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", page | |
| # def prev_page_func(page, column, query): | |
| # page -= 1 | |
| # page = max(0, page) | |
| # page_df, total_pages = get_page(current_lazy_df, page, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", page | |
| # def search_func(column, query): | |
| # page_df, total_pages = get_page(current_lazy_df, 0, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", 0 | |
| # def reset_func(): | |
| # page_df, total_pages = get_page(current_lazy_df, 0) | |
| # return page_df, f"Total Pages: {total_pages}", 0 | |
| # # --- Event Listeners --- | |
| # next_btn.click(next_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number]) | |
| # prev_btn.click(prev_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number]) | |
| # search_btn.click(search_func, [col_dropdown, search_text], [data_table, total_pages_display, page_number]) | |
| # reset_btn.click(reset_func, [], [data_table, total_pages_display, page_number]) | |
| # demo.launch() | |
| # import gradio as gr | |
| # import polars as pl | |
| # COMBINED_PARQUET_PATH = "datasetcards.parquet" | |
| # ROWS_PER_PAGE = 50 | |
| # # Load dataset | |
| # df = pl.read_parquet(COMBINED_PARQUET_PATH) # eager DataFrame | |
| # # Columns with dropdown instead of text search | |
| # DROPDOWN_COLUMNS = ["reason", "category", "field", "keyword"] | |
| # # Get unique values for the dropdown columns | |
| # unique_values = { | |
| # col: sorted(df[col].drop_nulls().unique().to_list()) for col in DROPDOWN_COLUMNS | |
| # } | |
| # # Get page helper | |
| # def get_page(df, page, column, query): | |
| # filtered_df = df | |
| # if column and query: | |
| # if column in DROPDOWN_COLUMNS: | |
| # # Exact match from dropdown | |
| # filtered_df = filtered_df.filter(pl.col(column) == query) | |
| # else: | |
| # # Text search | |
| # q = query.lower().strip() | |
| # filtered_df = ( | |
| # filtered_df.with_columns([ | |
| # pl.col(column).str.to_lowercase().alias(column) | |
| # ]) | |
| # .filter(pl.col(column).str.contains(q, literal=False)) | |
| # ) | |
| # start = page * ROWS_PER_PAGE | |
| # page_df = filtered_df[start:start + ROWS_PER_PAGE].to_pandas().fillna("") | |
| # total_rows = filtered_df.height | |
| # total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1 if total_rows > 0 else 1 | |
| # return page_df, total_pages | |
| # # Initial page | |
| # initial_df, total_pages = get_page(df, 0, None, "") | |
| # columns = list(initial_df.columns) | |
| # # Build Gradio app | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown("## Dataset Insight Portal") | |
| # gr.Markdown( | |
| # "This space allows you to explore the dataset of DatasetCards.<br>" | |
| # "You can navigate pages, search within columns, and inspect the dataset easily.<br>" | |
| # ) | |
| # with gr.Row(): | |
| # prev_btn = gr.Button("Previous") | |
| # next_btn = gr.Button("Next") | |
| # page_number = gr.Number(value=0, label="Page", precision=0) | |
| # total_pages_display = gr.Label(value=f"Total Pages: {total_pages}") | |
| # data_table = gr.Dataframe( | |
| # value=initial_df, | |
| # headers=columns, | |
| # datatype="str", | |
| # interactive=False, | |
| # row_count=ROWS_PER_PAGE, | |
| # ) | |
| # with gr.Row(): | |
| # col_dropdown = gr.Dropdown(choices=columns, label="Column to Search") | |
| # search_text = gr.Textbox(label="Search Text") | |
| # search_dropdown = gr.Dropdown(choices=[], label="Select Value", visible=False) | |
| # search_btn = gr.Button("Search") | |
| # reset_btn = gr.Button("Reset") | |
| # # Show dropdown only for certain columns | |
| # def update_search_input(column): | |
| # if column in DROPDOWN_COLUMNS: | |
| # return gr.update(choices=unique_values[column], visible=True), gr.update(visible=False) | |
| # else: | |
| # return gr.update(visible=False), gr.update(visible=True) | |
| # col_dropdown.change(update_search_input, col_dropdown, [search_dropdown, search_text]) | |
| # # Search function | |
| # def search_func(page, column, txt, ddl): | |
| # query = ddl if column in DROPDOWN_COLUMNS else txt | |
| # page_df, total_pages = get_page(df, page, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", 0 | |
| # def next_page(page, column, txt, ddl): | |
| # page += 1 | |
| # query = ddl if column in DROPDOWN_COLUMNS else txt | |
| # page_df, total_pages = get_page(df, page, column, query) | |
| # if page >= total_pages: | |
| # page = total_pages - 1 | |
| # page_df, total_pages = get_page(df, page, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", page | |
| # def prev_page(page, column, txt, ddl): | |
| # page = max(0, page - 1) | |
| # query = ddl if column in DROPDOWN_COLUMNS else txt | |
| # page_df, total_pages = get_page(df, page, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", page | |
| # def reset_func(): | |
| # page_df, total_pages = get_page(df, 0, None, "") | |
| # return page_df, f"Total Pages: {total_pages}", 0, "", "" | |
| # # Wire events | |
| # inputs = [page_number, col_dropdown, search_text, search_dropdown] | |
| # outputs = [data_table, total_pages_display, page_number] | |
| # search_btn.click(search_func, inputs, outputs) | |
| # next_btn.click(next_page, inputs, outputs) | |
| # prev_btn.click(prev_page, inputs, outputs) | |
| # reset_btn.click(reset_func, [], outputs + [search_text, search_dropdown]) | |
| # demo.launch() | |
| # import gradio as gr | |
| # import polars as pl | |
| # from huggingface_hub import HfApi | |
| # import re | |
| # # --- Hugging Face Org --- | |
| # org_name = "hugging-science" | |
| # api = HfApi() | |
| # def fetch_members(): | |
| # members = api.list_organization_members(org_name) | |
| # return [member.username for member in members] | |
| # member_list = fetch_members() | |
| # # --- Dataset --- | |
| # COMBINED_PARQUET_PATH = "datasetcards_new.parquet" | |
| # UPDATED_PARQUET_PATH = "datasetcards_new.parquet" | |
| # ROWS_PER_PAGE = 50 | |
| # # df = pl.read_parquet(COMBINED_PARQUET_PATH) | |
| # df = pl.read_parquet(COMBINED_PARQUET_PATH) | |
| # df = df.with_columns([ | |
| # pl.lit("todo").alias("status"), | |
| # pl.lit("").alias("assigned_to") | |
| # ]).sort(by=["downloads", "last_modified", "usedStorage"], descending=[True, True, True]) | |
| # if "reason" in df.columns: | |
| # df = df.with_columns([ | |
| # pl.Series( | |
| # "reason", | |
| # ["short description" if x and "short description" in x.lower() else (x if x is not None else "") for x in df["reason"]] | |
| # ) | |
| # ]) | |
| # # Add editable columns if missing | |
| # for col in ["assigned_to", "status"]: | |
| # if col not in df.columns: | |
| # default_val = "" if col == "assigned_to" else "todo" | |
| # df = df.with_columns(pl.lit(default_val).alias(col)) | |
| # else: | |
| # # Fill nulls with default | |
| # default_val = "" if col == "assigned_to" else "todo" | |
| # df = df.with_columns(pl.col(col).fill_null(default_val)) | |
| # # --- Columns --- | |
| # DROPDOWN_COLUMNS = ["reason", "category", "field", "keyword", "assigned_to", "status"] | |
| # STATUS_OPTIONS = ["todo", "inprogress", "PR submitted", "PR merged"] | |
| # # Prepare unique values for dropdown search | |
| # unique_values = {col: sorted(df[col].drop_nulls().unique().to_list()) for col in DROPDOWN_COLUMNS} | |
| # unique_values['assigned_to'] = sorted(member_list) | |
| # unique_values['status'] = STATUS_OPTIONS | |
| # # --- Helper to get page --- | |
| # def get_page(df, page, column=None, query=None): | |
| # filtered_df = df | |
| # if column and query: | |
| # if column in DROPDOWN_COLUMNS: | |
| # filtered_df = filtered_df.filter(pl.col(column) == query) | |
| # else: | |
| # q = query.lower().strip() | |
| # filtered_df = ( | |
| # filtered_df.with_columns([pl.col(column).str.to_lowercase().alias(column)]) | |
| # .filter(pl.col(column).str.contains(q, literal=False)) | |
| # ) | |
| # start = page * ROWS_PER_PAGE | |
| # page_df = filtered_df[start:start + ROWS_PER_PAGE].to_pandas().fillna("") | |
| # total_rows = filtered_df.height | |
| # total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1 if total_rows > 0 else 1 | |
| # return page_df, total_pages | |
| # initial_df, total_pages = get_page(df, 0) | |
| # columns = list(initial_df.columns) | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown(""" | |
| # # Dataset Insight Portal | |
| # Welcome! This portal helps you explore and manage datasets from our Hugging Face organization. | |
| # ## What is this space for? | |
| # This space provides a table of datasets along with metadata. You can: | |
| # - Browse datasets with pagination. | |
| # - Search datasets by various fields. | |
| # - Assign responsibility for reviewing datasets (`assigned_to`). | |
| # - Track progress using `status`. | |
| # ## Why the table? | |
| # The table gives a structured view of all datasets, making it easy to sort, filter, and update information for each dataset. It consists of all datasets until 20-09-2025. | |
| # ## What does the table contain? | |
| # Each row represents a dataset. Columns include: | |
| # - **dataset_id**: Unique identifier of the dataset. | |
| # - **dataset_url**: Link to the dataset page on Hugging Face. | |
| # - **downloads**: Number of downloads. | |
| # - **author**: Dataset author. | |
| # - **license**: License type. | |
| # - **tags**: Tags describing the dataset. Obtained from the dataset card. | |
| # - **task_categories**: Categories of tasks the dataset is useful for. Obtained from the dataset card. | |
| # - **last_modified**: Date of last update. | |
| # - **field, keyword**: Metadata columns describing dataset purpose based on heuristics. Use the `field` and `keyword` to filter for science based datasets. | |
| # - **category**: Category of the dataset (`rich` means it is good dataset card. `minimal` means it needs improvement for the reasons below). | |
| # - **reason**: Reason why the dataset is classified as `minimal`. Options: `Failed to load card`, `No metadata and no description`, `No metadata and has description`, `Short description`. | |
| # - **usedStorage**: Storage used by the dataset (bytes). | |
| # - **assigned_to**: Person responsible for the dataset (editable). | |
| # - **status**: Progress status (editable). Options: `todo`, `inprogress`, `PR submitted`, `PR merged`. | |
| # ## How to use search | |
| # - Select a **column** from the dropdown. | |
| # - If the column is textual, type your query in the text box. | |
| # - If the column is a dropdown (like `assigned_to` or `status`), select the value from the dropdown. | |
| # - Click **Search** to filter the table. | |
| # ## How to add or update `assigned_to` and `status` | |
| # 1. Search for the **dataset_id** initially. | |
| # 2. Then, select the **dataset_id** from the dropdown below the table. | |
| # 3. Choose the person responsible in **Assigned To**. If you are a member of the organization, your username should appear in the list. Else refresh and try again. | |
| # 4. Select the current status in **Status**. | |
| # 5. Click **Save Changes** to update the table and persist the changes. | |
| # 6. Use **Refresh All** to reload the table and the latest members list. | |
| # This portal makes it easy to keep track of dataset reviews, assignments, and progress all in one place. | |
| # """) | |
| # # --- Pagination controls --- | |
| # with gr.Row(): | |
| # prev_btn = gr.Button("Previous") | |
| # next_btn = gr.Button("Next") | |
| # page_number = gr.Number(value=0, label="Page", precision=0) | |
| # total_pages_display = gr.Label(value=f"Total Pages: {total_pages}") | |
| # # --- Data table --- | |
| # data_table = gr.Dataframe( | |
| # value=initial_df, | |
| # headers=columns, | |
| # datatype="str", | |
| # interactive=False, | |
| # row_count=ROWS_PER_PAGE | |
| # ) | |
| # # --- Search controls --- | |
| # with gr.Row(): | |
| # col_dropdown = gr.Dropdown(choices=columns, label="Column to Search") | |
| # search_text = gr.Textbox(label="Search Text") | |
| # search_dropdown = gr.Dropdown(choices=[], label="Select Value", visible=False) | |
| # search_btn = gr.Button("Search") | |
| # reset_btn = gr.Button("Reset") | |
| # # --- Dataset selection & editable fields --- | |
| # selected_dataset_id = gr.Dropdown(label="Select dataset_id", choices=initial_df['dataset_id'].tolist()) | |
| # assigned_to_input = gr.Dropdown(choices=member_list, label="Assigned To") | |
| # # status_input = gr.Dropdown(choices=STATUS_OPTIONS, label="Status") | |
| # status_input = gr.Dropdown(choices=STATUS_OPTIONS, label="Status", value="todo") | |
| # save_btn = gr.Button("Save Changes") | |
| # refresh_btn = gr.Button("Refresh All") | |
| # save_message = gr.Textbox(label="Save Status", interactive=False) | |
| # # --- Update search input depending on column --- | |
| # def update_search_input(column): | |
| # if column in DROPDOWN_COLUMNS: | |
| # return gr.update(choices=unique_values[column], visible=True), gr.update(visible=False) | |
| # else: | |
| # return gr.update(visible=False), gr.update(visible=True) | |
| # col_dropdown.change(update_search_input, col_dropdown, [search_dropdown, search_text]) | |
| # # --- Prefill editable fields --- | |
| # def prefill_fields(dataset_id): | |
| # if not dataset_id: | |
| # return "", "todo" | |
| # dataset_id = str(dataset_id) | |
| # filtered = [row for row in df.to_dicts() if str(row.get("dataset_id")) == dataset_id] | |
| # if not filtered: | |
| # return "", "todo" | |
| # row = filtered[0] | |
| # return row.get("assigned_to", ""), row.get("status", "todo") | |
| # selected_dataset_id.change(prefill_fields, selected_dataset_id, [assigned_to_input, status_input]) | |
| # # --- Search function --- | |
| # def search_func(page, column, txt, ddl): | |
| # query = ddl if column in DROPDOWN_COLUMNS else txt | |
| # page_df, total_pages = get_page(df, page, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", 0, gr.update(choices=page_df['dataset_id'].tolist()) | |
| # # --- Pagination functions --- | |
| # def next_page(page, column, txt, ddl): | |
| # page += 1 | |
| # query = ddl if column in DROPDOWN_COLUMNS else txt | |
| # page_df, total_pages = get_page(df, page, column, query) | |
| # if page >= total_pages: | |
| # page = total_pages - 1 | |
| # page_df, total_pages = get_page(df, page, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", page, gr.update(choices=page_df['dataset_id'].tolist()) | |
| # def prev_page(page, column, txt, ddl): | |
| # page = max(0, page - 1) | |
| # query = ddl if column in DROPDOWN_COLUMNS else txt | |
| # page_df, total_pages = get_page(df, page, column, query) | |
| # return page_df, f"Total Pages: {total_pages}", page, gr.update(choices=page_df['dataset_id'].tolist()) | |
| # def reset_func(): | |
| # page_df, total_pages = get_page(df, 0) | |
| # return page_df, f"Total Pages: {total_pages}", 0, gr.update(choices=page_df['dataset_id'].tolist()) | |
| # # --- Save changes & refresh --- | |
| # def save_changes(dataset_id, assigned_to_val, status_val, page_val, col, txt, ddl): | |
| # global df | |
| # if not dataset_id: | |
| # return gr.update(value="Please select a row first."), None, None, None | |
| # df = df.with_columns([ | |
| # pl.when(pl.col("dataset_id") == dataset_id).then(pl.lit(assigned_to_val)).otherwise(pl.col("assigned_to")).alias("assigned_to"), | |
| # pl.when(pl.col("dataset_id") == dataset_id).then(pl.lit(status_val)).otherwise(pl.col("status")).alias("status") | |
| # ]) | |
| # df.write_parquet(UPDATED_PARQUET_PATH) | |
| # page_df, total_pages = get_page(df, page_val, col, txt if col not in DROPDOWN_COLUMNS else ddl) | |
| # return ( | |
| # gr.update(value=f"Saved changes for dataset_id: {dataset_id}"), | |
| # page_df, | |
| # gr.update(choices=page_df['dataset_id'].tolist()), | |
| # f"Total Pages: {total_pages}" | |
| # ) | |
| # # --- Refresh All: table + members --- | |
| # def refresh_all(page, column, txt, ddl): | |
| # global df, member_list, unique_values | |
| # # Refresh members | |
| # member_list = fetch_members() | |
| # unique_values['assigned_to'] = sorted(member_list) | |
| # # Refresh table | |
| # try: | |
| # df = pl.read_parquet(UPDATED_PARQUET_PATH) | |
| # except FileNotFoundError: | |
| # pass | |
| # page_df, total_pages = get_page(df, page, column, txt if column not in DROPDOWN_COLUMNS else ddl) | |
| # return page_df, f"Total Pages: {total_pages}", page, gr.update(choices=page_df['dataset_id'].tolist()), gr.update(choices=member_list) | |
| # # --- Wire buttons --- | |
| # inputs_search = [page_number, col_dropdown, search_text, search_dropdown] | |
| # outputs_search = [data_table, total_pages_display, page_number, selected_dataset_id] | |
| # search_btn.click(search_func, inputs_search, outputs_search) | |
| # next_btn.click(next_page, inputs_search, outputs_search) | |
| # prev_btn.click(prev_page, inputs_search, outputs_search) | |
| # reset_btn.click(reset_func, [], outputs_search) | |
| # save_btn.click( | |
| # save_changes, | |
| # [selected_dataset_id, assigned_to_input, status_input, page_number, col_dropdown, search_text, search_dropdown], | |
| # [save_message, data_table, selected_dataset_id, total_pages_display] | |
| # ) | |
| # refresh_btn.click( | |
| # refresh_all, | |
| # inputs=[page_number, col_dropdown, search_text, search_dropdown], | |
| # outputs=[data_table, total_pages_display, page_number, selected_dataset_id, assigned_to_input] | |
| # ) | |
| # demo.launch() | |
| import gradio as gr | |
| import polars as pl | |
| import os | |
| import subprocess | |
| import threading | |
| import time | |
| # --- Config --- | |
| COMBINED_PARQUET_PATH = "datasetcards_new.parquet" | |
| UPDATED_PARQUET_PATH = "datasetcards_new.parquet" # overwrite same file | |
| ROWS_PER_PAGE = 50 | |
| ORG_NAME = "hugging-science" # replace with your org | |
| SPACE_NAME = "dataset-insight-portal" # replace with your space | |
| # --- Load dataset --- | |
| df = pl.read_parquet(COMBINED_PARQUET_PATH).with_columns([ | |
| pl.lit("").alias("assigned_to"), | |
| pl.lit("todo").alias("status") | |
| ]) | |
| columns = df.columns | |
| total_pages = (len(df) + ROWS_PER_PAGE - 1) // ROWS_PER_PAGE | |
| # --- Git push helpers --- | |
| def save_and_push(): | |
| """Commit and push parquet file changes to the repo.""" | |
| try: | |
| subprocess.run(["git", "config", "--global", "user.email", "[email protected]"]) | |
| subprocess.run(["git", "config", "--global", "user.name", "Santosh Sanjeev"]) | |
| hf_token = os.environ["HF_TOKEN"] | |
| repo_url = f"https://user:{hf_token}@huggingface.co/spaces/{ORG_NAME}/{SPACE_NAME}" | |
| subprocess.run(["git", "remote", "set-url", "origin", repo_url]) | |
| # Commit only if parquet changed | |
| subprocess.run(["git", "add", UPDATED_PARQUET_PATH]) | |
| result = subprocess.run(["git", "diff", "--cached", "--quiet"]) | |
| if result.returncode != 0: | |
| subprocess.run(["git", "commit", "-m", "Auto-update parquet file"]) | |
| subprocess.run(["git", "push", "origin", "main"]) | |
| print("โ Pushed parquet to repo") | |
| else: | |
| print("โน๏ธ No parquet changes to push") | |
| except Exception as e: | |
| print("โ ๏ธ Push failed:", e) | |
| def auto_push_loop(interval=300): | |
| """Run save_and_push every `interval` seconds (default 5 min).""" | |
| while True: | |
| save_and_push() | |
| time.sleep(interval) | |
| # --- Gradio app functions --- | |
| def get_page(page_num, col, search_text, search_dropdown): | |
| global df | |
| filtered = df | |
| if col and col in df.columns: | |
| if col in DROPDOWN_COLUMNS and search_dropdown: | |
| filtered = filtered.filter(pl.col(col) == search_dropdown) | |
| elif search_text: | |
| filtered = filtered.filter(pl.col(col).cast(str).str.contains(search_text, literal=False)) | |
| total_pages = (len(filtered) + ROWS_PER_PAGE - 1) // ROWS_PER_PAGE | |
| start, end = (page_num - 1) * ROWS_PER_PAGE, page_num * ROWS_PER_PAGE | |
| page_df = filtered[start:end] | |
| return page_df.to_pandas(), f"of {total_pages}", page_num, "", "", "" | |
| def save_changes(dataset_id, assigned_to, status): | |
| global df | |
| mask = df["dataset_id"] == dataset_id | |
| if mask.any(): | |
| df = df.with_columns([ | |
| pl.when(mask).then(assigned_to).otherwise(df["assigned_to"]).alias("assigned_to"), | |
| pl.when(mask).then(status).otherwise(df["status"]).alias("status") | |
| ]) | |
| df.write_parquet(UPDATED_PARQUET_PATH) | |
| save_and_push() # push immediately after change | |
| return f"Saved for {dataset_id} โ " | |
| def refresh_all(page_num, col, search_text, search_dropdown): | |
| return get_page(page_num, col, search_text, search_dropdown) | |
| # --- UI --- | |
| DROPDOWN_COLUMNS = ["status", "assigned_to"] | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| col_dropdown = gr.Dropdown(choices=columns, label="Search Column") | |
| search_text = gr.Textbox(label="Search Text") | |
| search_dropdown = gr.Dropdown(choices=["todo", "inprogress", "PR submitted", "PR merged"], label="Status") | |
| with gr.Row(): | |
| page_number = gr.Number(value=1, precision=0, label="Page #") | |
| total_pages_display = gr.Textbox(value=f"of {total_pages}", interactive=False) | |
| data_table = gr.Dataframe(headers=columns, datatype=["str"] * len(columns), row_count=ROWS_PER_PAGE) | |
| selected_dataset_id = gr.Textbox(label="Selected Dataset ID", interactive=False) | |
| assigned_to_input = gr.Textbox(label="Assigned To") | |
| status_input = gr.Dropdown(choices=["todo", "inprogress", "PR submitted", "PR merged"], label="Status") | |
| save_btn = gr.Button("Save Changes") | |
| refresh_btn = gr.Button("Refresh") | |
| output_msg = gr.Textbox(label="Message", interactive=False) | |
| page_number.change(get_page, inputs=[page_number, col_dropdown, search_text, search_dropdown], | |
| outputs=[data_table, total_pages_display, page_number, | |
| selected_dataset_id, assigned_to_input, status_input]) | |
| save_btn.click(save_changes, inputs=[selected_dataset_id, assigned_to_input, status_input], outputs=[output_msg]) | |
| refresh_btn.click(refresh_all, inputs=[page_number, col_dropdown, search_text, search_dropdown], | |
| outputs=[data_table, total_pages_display, page_number, | |
| selected_dataset_id, assigned_to_input, status_input]) | |
| # ๐ Start auto-push loop | |
| threading.Thread(target=auto_push_loop, args=(300,), daemon=True).start() | |
| demo.launch() |