Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on Jun 5

Commit

0362333

verified ·

1 Parent(s): c90c7cb

Update app2.py

Browse files

Files changed (1) hide show

app2.py +224 -82

app2.py CHANGED Viewed

@@ -809,63 +809,171 @@ class EnhancedFileProcessor:
         return dataset
-    def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[Dict]:
-        """Enhanced data chunking with sequence metadata"""
         try:
             json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
-            total_length = len(json_str)
-            metadata_template = {
-                "idx": 0,
-                "tc": 1,
-                "tl": total_length,
-                "hash": 0,
-                "data": ""
-            }
-            metadata_template_with_hash = {**metadata_template, "hash": 1234567890}
-            overhead_estimate = len(json.dumps(metadata_template_with_hash, separators=(',', ':'))) + 50
-            effective_chunk_size = max_size - overhead_estimate
-            if effective_chunk_size <= 0:
-                 logger.error(f"Max QR size ({max_size}) is too small for metadata overhead ({overhead_estimate}). Cannot chunk.")
                  return []
-            if total_length <= effective_chunk_size:
-                chunk_data = json_str
-                chunk = {
-                    "idx": 0,
-                    "tc": 1,
-                    "tl": total_length,
-                    "hash": hash(chunk_data) & 0xFFFFFFFF,
-                    "data": chunk_data
-                }
-                return [chunk]
-            num_chunks = -(-total_length // effective_chunk_size)
-            chunks = []
-            current_pos = 0
             for i in range(num_chunks):
-                end_pos = min(current_pos + effective_chunk_size, total_length)
-                chunk_data_str = json_str[current_pos:end_pos]
-                chunk = {
-                    "idx": i,
                     "tc": num_chunks,
-                    "tl": total_length,
-                    "hash": hash(chunk_data_str) & 0xFFFFFFFF,
                     "data": chunk_data_str
                 }
-                chunks.append(chunk)
-                current_pos = end_pos
-            if current_pos < total_length:
-                 logger.error(f"Chunking logic error: Only processed {current_pos} of {total_length} characters.")
                  return []
-            logger.info(f"Chunked data into {num_chunks} chunks for QR codes.")
-            return chunks
         except Exception as e:
             logger.error(f"Error chunking data: {e}")
@@ -880,13 +988,16 @@ def generate_stylish_qr(data: Union[str, Dict],
     """Generate a stylish QR code with enhanced visual appeal"""
     try:
         qr = qrcode.QRCode(
-            version=None,
-            error_correction=qrcode.constants.ERROR_CORRECT_M,
             box_size=size,
             border=border
         )
         if isinstance(data, dict):
             qr.add_data(json.dumps(data, ensure_ascii=False, separators=(',', ':')))
         else:
             qr.add_data(str(data))
@@ -917,8 +1028,11 @@ def generate_stylish_qr(data: Union[str, Dict],
 def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> List[str]:
     """Generate QR codes with enhanced visual appeal and metadata"""
-    if not isinstance(data, list):
-        logger.error("generate_qr_codes received data that is not a list.")
         return []
     try:
@@ -926,14 +1040,18 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
         paths = []
         if combined:
-            chunks = file_processor.chunk_data(data)
-            if not chunks:
                  logger.warning("No chunks generated for combined data.")
                  return []
-            for i, chunk in enumerate(chunks):
-                filename = f'combined_qr_{int(time.time())}_{i+1}_of_{len(chunks)}.png'
                 qr_path = generate_stylish_qr(
-                    data=chunk,
                     filename=filename,
                     fill_color="#1a365d",
                     back_color="#ffffff"
@@ -941,18 +1059,20 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
                 if qr_path:
                     paths.append(qr_path)
                 else:
-                    logger.warning(f"Failed to generate QR for chunk {i+1}/{len(chunks)}.")
         else:
-            if data:
                 for idx, item in enumerate(data):
-                    chunks = file_processor.chunk_data(item)
-                    if not chunks:
                          logger.warning(f"No chunks generated for item {idx+1}.")
                          continue
-                    for chunk_idx, chunk in enumerate(chunks):
-                        filename = f'item_{idx+1}_chunk_{chunk_idx+1}_of_{len(chunks)}_{int(time.time())}.png'
                         qr_path = generate_stylish_qr(
-                            data=chunk,
                             filename=filename,
                             fill_color="#1a365d",
                             back_color="#ffffff"
@@ -960,9 +1080,26 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
                         if qr_path:
                             paths.append(qr_path)
                         else:
-                            logger.warning(f"Failed to generate QR for item {idx+1} chunk {chunk_idx+1}/{len(chunks)}.")
             else:
-                 logger.warning("No items in data list to process individually.")
         logger.info(f"Generated {len(paths)} QR codes.")
         return paths
@@ -973,11 +1110,11 @@ def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> Li
 # --- Chatbot Logic ---
 def respond_to_chat(
-    message: str,
-    chat_history: List[Tuple[str, str]],
     chatbot_data: Optional[List[Dict]],
     # Add current_filtered_df_state as input, it will be updated and returned
-    current_filtered_df_state: Optional[pd.DataFrame]
 ) -> Tuple[List[Tuple[str, str]], List[Dict], Optional[pd.DataFrame]]:
     """
     Responds to user chat messages based on the loaded JSON data.
@@ -992,7 +1129,7 @@ def respond_to_chat(
     response = ""
     lower_message = message.lower().strip()
     # Initialize new_filtered_df_state with the current state to preserve it unless a filter changes it
-    new_filtered_df_state = current_filtered_df_state
     try:
         # Attempt to flatten the data structure for easier querying
@@ -1137,25 +1274,29 @@ def respond_to_chat(
                     new_filtered_df_state = None # Clear previous filter if column not found
                 else:
                     # IMPORTANT: Always filter from the original full dataframe 'df'
-                    active_df_to_filter = df.copy()
                     try:
                         # Attempt to infer value type for comparison
                         target_value: Any
                         col_dtype = df[column_name].dtype
                         if pd.api.types.is_numeric_dtype(col_dtype) and operator in ['>', '>=', '<', '<=', '==', '!=']:
                             try:
                                 target_value = float(value_str)
-                                col_series = pd.to_numeric(filtered_df[column_name], errors='coerce')
                             except ValueError:
                                 response = f"For numeric column '{column_name}', '{value_str}' is not a valid number."
                                 target_value = None # Error case
                         elif pd.api.types.is_bool_dtype(col_dtype) or value_str.lower() in ['true', 'false']:
                             target_value = value_str.lower() == 'true'
-                            col_series = filtered_df[column_name].astype(bool, errors='ignore')
                         else: # Assume string comparison otherwise
                             target_value = str(value_str)
-                            col_series = filtered_df[column_name].astype(str).str.lower() # Case-insensitive for strings
                             value_str_lower = target_value.lower()
@@ -1193,16 +1334,16 @@ def respond_to_chat(
                             if condition is not None:
                                 # Apply condition to the active_df_to_filter (which is a copy of the full df)
-                                filtered_results_df = active_df_to_filter[condition]
                                 if not filtered_results_df.empty:
                                     new_filtered_df_state = filtered_results_df # Update state with new filter result
                                     num_results = len(filtered_results_df)
                                     preview_rows = min(num_results, 5)
                                     preview_cols = min(len(filtered_results_df.columns), 5)
                                     preview_df = filtered_results_df.head(preview_rows).iloc[:, :preview_cols]
                                     preview_str = preview_df.to_string(index=False)
                                     response = (f"Found {num_results} items where '{column_name}' {operator} '{value_str}'.\n"
                                                 f"Here's a preview:\n```\n{preview_str}\n```\n"
                                                 f"The full filtered dataset is now available for download using the 'Download Filtered JSON' button.")
@@ -1224,7 +1365,7 @@ def respond_to_chat(
                         response = f"An error occurred while applying the filter: {e}"
                         logger.error(f"Error applying filter (column='{column_name}', op='{operator}', val='{value_str}'): {e}")
                 # If the message was a filter, new_filtered_df_state is now set (or None/empty if error/no results)
             # --- End of Enhanced Filter Logic ---
             # If `response` is still empty, it means no filter query was matched by the filter_match regex.
@@ -1589,6 +1730,7 @@ def create_modern_interface():
             viewport_html = f'<div class="viewport-container" style="grid-template-columns: repeat({cols}, 1fr);">'
             if enabled_states is None or len(enabled_states) != num_qr_codes:
                  enabled_states = list(range(num_qr_codes))
             for i, path in enumerate(paths):
@@ -1753,14 +1895,14 @@ def create_modern_interface():
             try:
                 data_list = data_df.to_dict(orient='records')
                 json_str = json.dumps(data_list, indent=2, ensure_ascii=False)
                 timestamp = int(time.time())
                 filename = f"{filename_prefix}_{timestamp}.json"
                 file_path = TEMP_DIR / filename
                 with open(file_path, 'w', encoding='utf-8') as f:
                     f.write(json_str)
                 logger.info(f"Successfully created JSON file for download: {file_path}")
                 return str(file_path)
             except Exception as e:
@@ -1772,7 +1914,7 @@ def create_modern_interface():
                 logger.info("No full data available to download.")
                 # Optionally, could return a gr.Warning or gr.Info to the UI if we had a dedicated status Textbox for downloads
                 return None
             # The chatbot_data state is a list of dicts. Convert to DataFrame for download_json_data.
             # The df created in respond_to_chat is not directly used here to ensure we get the *original* full data.
             try:
@@ -1788,7 +1930,7 @@ def create_modern_interface():
             except Exception as e:
                 logger.error(f"Error converting full chatbot_data to DataFrame for download: {e}")
                 return None
             return download_json_data(df_to_download, "full_data")
         def handle_download_filtered_json(current_filtered_df_state: Optional[pd.DataFrame]) -> Optional[str]:
@@ -1817,7 +1959,7 @@ def create_modern_interface():
         - **Archive Support**: Extracts and processes supported files from .zip, .tar, .gz archives. **(Now performs real extraction)**
         - **Robust Encoding Detection**: Uses `chardet` for reliable character encoding identification.
         - **Structured Output**: Provides a consistent JSON output format containing raw content (if applicable), extracted data, and processing notes for each processed item.
-        - **Sequential QR Codes**: Maintains data integrity across multiple codes by chunking the combined/individual processed data.
         - **QR Code Viewport**: Visualize generated QR codes in a sequenced square grid with options to enable/disable individual codes for selective scanning/sharing.
         - **Modern Design**: Clean, responsive interface with visual feedback.
         - **Data Chatbot**: Interact conversationally with the processed JSON data to ask questions about its structure, content, or request specific information.
@@ -1855,4 +1997,4 @@ def main():
         raise
 if __name__ == "__main__":
-    main()

         return dataset
+    def chunk_data(self, data: Union[Dict, List], max_size: int = 2953) -> List[str]:
+        """
+        Enhanced data chunking for QR codes with sequence metadata and start/end tags.
+        max_size is the maximum *byte* capacity for a QR code (e.g., 2953 bytes for Version 40-L).
+        """
         try:
             json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
+            total_length = len(json_str.encode('utf-8')) # Get actual byte length for QR capacity
+            # The overhead needs to be dynamic. The maximum capacity of QR code
+            # is in *bytes*. So we need to encode the JSON and measure its length.
+            # A typical QR code can hold up to 2953 bytes (Version 40-L, Alphanumeric).
+            # For UTF-8, it's roughly 2953 * 0.7 = ~2000 characters if many non-ASCII.
+            # Let's use 2000 characters as a conservative estimate for max_size.
+            # However, the qrcode library will auto-select version based on *bytes* and error correction.
+            # So, our `max_size` (which is in bytes) should be the *maximum possible byte capacity*.
+            # Let's use a standard maximum byte capacity for QR code Version 40-L (error correction M).
+            # Max capacity for byte mode, Version 40-L: 2953 bytes.
+            # We will try to fit as much as possible, but need to subtract for our metadata.
+            # Estimate overhead for metadata like {"idx": 0, "tc": 1, "tl": 12345, "hash": 1234567890, "data": ""}
+            # and for the "{startX}" and "{endX}" tags.
+            # `{start<idx>}` and `{end<idx>}` tags.
+            # Max idx could be e.g. 999 if many chunks, so 11 chars for {start999} and {end999}.
+            # Let's assume a generous overhead for the structural JSON + sequence tags.
+            # A typical metadata JSON string might be ~60-80 bytes. Tags add ~20 bytes.
+            # Let's target a safe `effective_chunk_size` of about 2800 bytes for data content.
+            effective_max_qr_data_bytes = 2800 # A conservative estimate for actual data payload per QR after metadata
+            # Calculate the number of chunks based on byte length
+            # The JSON object for each chunk will contain metadata + a portion of the original data.
+            # Example chunk JSON: {"idx":X,"tc":Y,"tl":Z,"hash":H,"data":"<chunk_payload>"}
+            # The total string for the QR code will be "{startX}{<chunk_json_string>}{endX}"
+            # We need to calculate the maximum length of the string `"<chunk_json_string>"`
+            # such that when wrapped with `{startX}` and `{endX}`, it fits within `max_size` (2953).
+            # Let's estimate the size of the *metadata* part of the chunk JSON, assuming 100 total chunks (max 3 digits for idx, tc).
+            # "{idx":99,"tc":99,"tl":999999,"hash":4294967295,"data":""}" -> roughly 60-70 bytes.
+            # Plus {startXXX} and {endXXX} -> 11 chars each. So, 22 chars + ~70 bytes for json metadata = ~92 bytes.
+            # This is complex because the `hash` and `tl` can vary in length.
+            # Let's fix a `base_metadata_size` and allocate the rest to `data`.
+            base_metadata_size_estimate = len(json.dumps({"idx": 999, "tc": 999, "tl": 99999999999, "hash": 4294967295, "data": ""}, separators=(',', ':'))) # ~70-80 bytes
+            # The actual content for the QR code will be "{startX}{<chunk_payload_with_meta>}{endX}"
+            # The length of "{startX}" and "{endX}" depends on X. Max X can be 999.
+            # So, len("{start999}") = 10 characters.
+            # Total tag overhead per chunk: 10 + 10 = 20 characters.
+            # Let's assume 1 byte per character for these tags for simplicity, as they are ASCII.
+            # Max size of data *including our JSON wrapper and start/end tags* should be `max_size` (2953 bytes)
+            # Let's refine the effective chunk size:
+            # effective_chunk_size_for_data = max_size - (base_metadata_size_estimate + 20)
+            # This is still tricky because `hash` depends on `chunk_data_str`.
+            # A simpler, more reliable approach: calculate how many characters of the *original* `json_str` can fit.
+            # Let's simplify the max_size for internal data: use 2000 characters (bytes for UTF-8) as a safe maximum for payload data.
+            # This accounts for the variable nature of UTF-8 characters and QR capacity.
+            # Max capacity for QR code (Version 40, Error M): 2953 bytes.
+            # Max characters that can be encoded in UTF-8: ~2000 (if mostly ASCII, more like 2953 chars).
+            # To be safe and ensure it fits, we'll aim for a character limit lower than the byte limit.
+            max_chars_per_qr = 1800 # Safe character count to fit within QR code, considering JSON structure and UTF-8
+            # Calculate approximate effective size for the original JSON string *portion*
+            # The full string for the QR will be: "{startX}{"idx":...,"data":"chunk_of_original_json"}{endX}"
+            # We need to account for:
+            # - `{startX}` and `{endX}` tags (approx 20 chars)
+            # - `{"idx":X,"tc":Y,"tl":Z,"hash":H,"data":""}` (approx 70 chars)
+            # Total overhead per QR code: ~90 characters.
+            # So, characters available for `chunk_of_original_json` = `max_chars_per_qr` - 90 = 1710 characters.
+            # This is an approximate, as hash and total_length can vary.
+            # To make it robust, we will iterate and subtract available space.
+            # Let's try to fit as much of the original JSON string as possible into each QR.
+            # The QR code data will be a JSON string like:
+            # {"idx": i, "tc": num_chunks, "tl": total_length, "hash": chunk_hash, "data": "original_json_slice"}
+            # This entire dictionary will then be prefixed/suffixed.
+            # The actual byte capacity of QR codes depends on version, error correction, and mode.
+            # For "Byte" mode, Version 40-L is 2953 bytes.
+            # The `qrcode` library auto-selects the smallest version.
+            # We'll work with `json_str` as UTF-8 bytes to be precise.
+            json_bytes = json_str.encode('utf-8')
+            total_bytes_length = len(json_bytes)
+            # Max bytes for data payload inside the chunk_dict (e.g., "original_json_slice")
+            # We need to calculate the overhead for the chunk metadata *itself* + start/end tags.
+            # Let's take a fixed max QR capacity (V40-L) of 2953 bytes for now.
+            # Then estimate the *maximum possible overhead* for the wrapper JSON and tags.
+            # Max overhead for `{"idx":999,"tc":999,"tl":99999999999,"hash":4294967295,"data":""}` (approx 70-80 bytes)
+            # Plus tags `{start999}` `{end999}` (20 bytes). Total overhead ~100 bytes.
+            # So, `effective_payload_bytes_per_qr` = 2953 - 100 = 2853 bytes.
+            # This estimate is critical. If it's too high, QR generation will fail; too low, too many QRs.
+            # A more accurate way: try to generate a QR with a very small payload and full metadata, see max capacity.
+            # Let's go with a fixed `MAX_QR_CODE_BYTE_CAPACITY = 2953` (V40-L) as the total bytes a QR can hold.
+            # And `MAX_OVERHEAD_PER_CHUNK_BYTES` = 100.
+            # This means `MAX_DATA_PAYLOAD_BYTES_PER_CHUNK` = `MAX_QR_CODE_BYTE_CAPACITY` - `MAX_OVERHEAD_PER_CHUNK_BYTES`
+            # = 2953 - 100 = 2853 bytes.
+            MAX_QR_CODE_BYTE_CAPACITY = 2953 # Version 40, Error Correction M, Byte mode
+            # Max possible length for tags {start<idx>}{end<idx>} and the meta fields
+            # Example: {start12345}{"idx":12344,"tc":12345,"tl":999999999,"hash":1234567890,"data":""}{end12345}
+            # Max idx/tc up to 5 digits means {start99999} is 12 chars. So 24 chars for tags.
+            # Min length of meta JSON (empty data): {"idx":0,"tc":1,"tl":0,"hash":0,"data":""} is ~50 chars.
+            # Max length of meta JSON (large numbers, empty data): {"idx":10000,"tc":10000,"tl":10000000000,"hash":4294967295,"data":""} is ~80 chars.
+            # Total maximum overhead estimate: 24 (tags) + 80 (meta) = 104 bytes.
+            # Let's use 120 bytes as a safe, generous overhead.
+            MAX_OVERHEAD_PER_CHUNK_BYTES = 120
+            effective_payload_bytes_per_chunk = MAX_QR_CODE_BYTE_CAPACITY - MAX_OVERHEAD_PER_CHUNK_BYTES
+            if effective_payload_bytes_per_chunk <= 0:
+                 logger.error(f"Max QR size ({MAX_QR_CODE_BYTE_CAPACITY}) is too small for metadata overhead ({MAX_OVERHEAD_PER_CHUNK_BYTES}). Cannot chunk.")
                  return []
+            # Calculate number of chunks based on the original data's byte length
+            num_chunks = math.ceil(total_bytes_length / effective_payload_bytes_per_chunk)
+            if num_chunks == 0: # Handle empty input data
+                 return []
+            chunks_for_qr: List[str] = []
+            current_byte_pos = 0
             for i in range(num_chunks):
+                # Determine the slice of the original JSON bytes
+                end_byte_pos = min(current_byte_pos + effective_payload_bytes_per_chunk, total_bytes_length)
+                chunk_data_bytes = json_bytes[current_byte_pos:end_byte_pos]
+                chunk_data_str = chunk_data_bytes.decode('utf-8', errors='replace')
+                # Create the inner JSON structure for the chunk
+                chunk_dict = {
+                    "idx": i + 1, # 1-based indexing for user readability
                     "tc": num_chunks,
+                    "tl": total_bytes_length, # Total length in bytes
+                    "hash": hash(chunk_data_bytes) & 0xFFFFFFFF, # Hash of the byte slice
                     "data": chunk_data_str
                 }
+                inner_json_string = json.dumps(chunk_dict, ensure_ascii=False, separators=(',', ':'))
+                # Prepend {startN} and append {endN} tags
+                # Ensure N is fixed to the sequence number for rejoining.
+                final_qr_string = f"{{start{i+1}}}{inner_json_string}{{end{i+1}}}"
+                # Double check if the final_qr_string actually fits.
+                # This is a critical check for robustness, but might slow down if data is very large.
+                # For now, rely on our calculation based on fixed max capacity.
+                # If final_qr_string.encode('utf-8') > MAX_QR_CODE_BYTE_CAPACITY, then our estimates are off.
+                if len(final_qr_string.encode('utf-8')) > MAX_QR_CODE_BYTE_CAPACITY:
+                     logger.warning(f"Chunk {i+1} exceeds estimated QR capacity. Calculated: {len(final_qr_string.encode('utf-8'))} bytes, Max: {MAX_QR_CODE_BYTE_CAPACITY} bytes. Adjusting MAX_OVERHEAD_PER_CHUNK_BYTES might be needed.")
+                     # As a fallback, we can try to reduce the chunk_data_str length,
+                     # but this means recalculating. For now, log warning and continue.
+                     # A more robust solution might dynamically adjust effective_payload_bytes_per_chunk if this happens.
+                chunks_for_qr.append(final_qr_string)
+                current_byte_pos = end_byte_pos
+            if current_byte_pos < total_bytes_length:
+                 logger.error(f"Chunking logic error: Only processed {current_byte_pos} of {total_bytes_length} bytes.")
                  return []
+            logger.info(f"Chunked data into {num_chunks} chunks for QR codes, with positional sequencing tags.")
+            return chunks_for_qr
         except Exception as e:
             logger.error(f"Error chunking data: {e}")
     """Generate a stylish QR code with enhanced visual appeal"""
     try:
         qr = qrcode.QRCode(
+            version=None, # Let the library determine the best version for the data
+            error_correction=qrcode.constants.ERROR_CORRECT_M, # High error correction
             box_size=size,
             border=border
         )
+        # `data` here is expected to be the pre-formatted string from chunk_data,
+        # including the {startN} and {endN} tags, and the inner JSON.
         if isinstance(data, dict):
+            # This path should ideally not be taken if chunk_data always returns strings
             qr.add_data(json.dumps(data, ensure_ascii=False, separators=(',', ':')))
         else:
             qr.add_data(str(data))
 def generate_qr_codes(data: Union[str, Dict, List], combined: bool = True) -> List[str]:
     """Generate QR codes with enhanced visual appeal and metadata"""
+    # The `data` here should be the full processed dataset (List[Dict]) or a single item (Dict/str).
+    # The chunk_data method will handle turning this into strings suitable for QR codes.
+    if not isinstance(data, (list, dict, str)):
+        logger.error("generate_qr_codes received data that is not a list, dict, or string.")
         return []
     try:
         paths = []
         if combined:
+            # When combined, we treat the entire `data` (which should be List[Dict]) as one large string
+            # to be chunked across multiple QRs.
+            chunks_of_combined_data = file_processor.chunk_data(data)
+            if not chunks_of_combined_data:
                  logger.warning("No chunks generated for combined data.")
                  return []
+            for i, chunk_str in enumerate(chunks_of_combined_data):
+                # The filename now includes the chunk number within the sequence
+                # and total number of chunks.
+                filename = f'combined_qr_{i+1}_of_{len(chunks_of_combined_data)}_{int(time.time())}.png'
                 qr_path = generate_stylish_qr(
+                    data=chunk_str, # This `chunk_str` already contains the {startN} and {endN} tags
                     filename=filename,
                     fill_color="#1a365d",
                     back_color="#ffffff"
                 if qr_path:
                     paths.append(qr_path)
                 else:
+                    logger.warning(f"Failed to generate QR for combined chunk {i+1}/{len(chunks_of_combined_data)}.")
         else:
+            # If not combined, each top-level item in the data list is processed individually.
+            # Each individual item might itself be chunked into multiple QRs.
+            if isinstance(data, list):
                 for idx, item in enumerate(data):
+                    item_chunks = file_processor.chunk_data(item) # Chunk each item
+                    if not item_chunks:
                          logger.warning(f"No chunks generated for item {idx+1}.")
                          continue
+                    for chunk_idx, chunk_str in enumerate(item_chunks):
+                        filename = f'item_{idx+1}_chunk_{chunk_idx+1}_of_{len(item_chunks)}_{int(time.time())}.png'
                         qr_path = generate_stylish_qr(
+                            data=chunk_str, # This `chunk_str` already contains the {startN} and {endN} tags
                             filename=filename,
                             fill_color="#1a365d",
                             back_color="#ffffff"
                         if qr_path:
                             paths.append(qr_path)
                         else:
+                            logger.warning(f"Failed to generate QR for item {idx+1} chunk {chunk_idx+1}/{len(item_chunks)}.")
+            elif isinstance(data, (dict, str)): # Handle single dict/string inputs if not a list
+                single_item_chunks = file_processor.chunk_data(data)
+                if not single_item_chunks:
+                    logger.warning("No chunks generated for single item.")
+                    return []
+                for chunk_idx, chunk_str in enumerate(single_item_chunks):
+                    filename = f'single_item_chunk_{chunk_idx+1}_of_{len(single_item_chunks)}_{int(time.time())}.png'
+                    qr_path = generate_stylish_qr(
+                        data=chunk_str,
+                        filename=filename,
+                        fill_color="#1a365d",
+                        back_color="#ffffff"
+                    )
+                    if qr_path:
+                        paths.append(qr_path)
+                    else:
+                        logger.warning(f"Failed to generate QR for single item chunk {chunk_idx+1}/{len(single_item_chunks)}.")
             else:
+                 logger.warning("Data is not a list, dict, or string and cannot be processed individually.")
         logger.info(f"Generated {len(paths)} QR codes.")
         return paths
 # --- Chatbot Logic ---
 def respond_to_chat(
+    message: str,
+    chat_history: List[Tuple[str, str]],
     chatbot_data: Optional[List[Dict]],
     # Add current_filtered_df_state as input, it will be updated and returned
+    current_filtered_df_state: Optional[pd.DataFrame]
 ) -> Tuple[List[Tuple[str, str]], List[Dict], Optional[pd.DataFrame]]:
     """
     Responds to user chat messages based on the loaded JSON data.
     response = ""
     lower_message = message.lower().strip()
     # Initialize new_filtered_df_state with the current state to preserve it unless a filter changes it
+    new_filtered_df_state = current_filtered_df_state
     try:
         # Attempt to flatten the data structure for easier querying
                     new_filtered_df_state = None # Clear previous filter if column not found
                 else:
                     # IMPORTANT: Always filter from the original full dataframe 'df'
+                    active_df_to_filter = df.copy()
                     try:
                         # Attempt to infer value type for comparison
                         target_value: Any
                         col_dtype = df[column_name].dtype
+                        # Check if current_filtered_df_state exists and is not empty, use it for filtering
+                        # Otherwise, use the full df
+                        df_to_filter = current_filtered_df_state if current_filtered_df_state is not None and not current_filtered_df_state.empty else df.copy()
                         if pd.api.types.is_numeric_dtype(col_dtype) and operator in ['>', '>=', '<', '<=', '==', '!=']:
                             try:
                                 target_value = float(value_str)
+                                col_series = pd.to_numeric(df_to_filter[column_name], errors='coerce')
                             except ValueError:
                                 response = f"For numeric column '{column_name}', '{value_str}' is not a valid number."
                                 target_value = None # Error case
                         elif pd.api.types.is_bool_dtype(col_dtype) or value_str.lower() in ['true', 'false']:
                             target_value = value_str.lower() == 'true'
+                            col_series = df_to_filter[column_name].astype(bool, errors='ignore')
                         else: # Assume string comparison otherwise
                             target_value = str(value_str)
+                            col_series = df_to_filter[column_name].astype(str).str.lower() # Case-insensitive for strings
                             value_str_lower = target_value.lower()
                             if condition is not None:
                                 # Apply condition to the active_df_to_filter (which is a copy of the full df)
+                                filtered_results_df = df_to_filter[condition] # Use df_to_filter here
                                 if not filtered_results_df.empty:
                                     new_filtered_df_state = filtered_results_df # Update state with new filter result
                                     num_results = len(filtered_results_df)
                                     preview_rows = min(num_results, 5)
                                     preview_cols = min(len(filtered_results_df.columns), 5)
                                     preview_df = filtered_results_df.head(preview_rows).iloc[:, :preview_cols]
                                     preview_str = preview_df.to_string(index=False)
                                     response = (f"Found {num_results} items where '{column_name}' {operator} '{value_str}'.\n"
                                                 f"Here's a preview:\n```\n{preview_str}\n```\n"
                                                 f"The full filtered dataset is now available for download using the 'Download Filtered JSON' button.")
                         response = f"An error occurred while applying the filter: {e}"
                         logger.error(f"Error applying filter (column='{column_name}', op='{operator}', val='{value_str}'): {e}")
                 # If the message was a filter, new_filtered_df_state is now set (or None/empty if error/no results)
             # --- End of Enhanced Filter Logic ---
             # If `response` is still empty, it means no filter query was matched by the filter_match regex.
             viewport_html = f'<div class="viewport-container" style="grid-template-columns: repeat({cols}, 1fr);">'
             if enabled_states is None or len(enabled_states) != num_qr_codes:
+                 # If states are not yet initialized or out of sync, enable all by default
                  enabled_states = list(range(num_qr_codes))
             for i, path in enumerate(paths):
             try:
                 data_list = data_df.to_dict(orient='records')
                 json_str = json.dumps(data_list, indent=2, ensure_ascii=False)
                 timestamp = int(time.time())
                 filename = f"{filename_prefix}_{timestamp}.json"
                 file_path = TEMP_DIR / filename
                 with open(file_path, 'w', encoding='utf-8') as f:
                     f.write(json_str)
                 logger.info(f"Successfully created JSON file for download: {file_path}")
                 return str(file_path)
             except Exception as e:
                 logger.info("No full data available to download.")
                 # Optionally, could return a gr.Warning or gr.Info to the UI if we had a dedicated status Textbox for downloads
                 return None
             # The chatbot_data state is a list of dicts. Convert to DataFrame for download_json_data.
             # The df created in respond_to_chat is not directly used here to ensure we get the *original* full data.
             try:
             except Exception as e:
                 logger.error(f"Error converting full chatbot_data to DataFrame for download: {e}")
                 return None
             return download_json_data(df_to_download, "full_data")
         def handle_download_filtered_json(current_filtered_df_state: Optional[pd.DataFrame]) -> Optional[str]:
         - **Archive Support**: Extracts and processes supported files from .zip, .tar, .gz archives. **(Now performs real extraction)**
         - **Robust Encoding Detection**: Uses `chardet` for reliable character encoding identification.
         - **Structured Output**: Provides a consistent JSON output format containing raw content (if applicable), extracted data, and processing notes for each processed item.
+        - **Sequential QR Codes**: Maintains data integrity across multiple codes by chunking the combined/individual processed data, **including positional sequencing tags `{startN}` and `{endN}` in the QR code content**.
         - **QR Code Viewport**: Visualize generated QR codes in a sequenced square grid with options to enable/disable individual codes for selective scanning/sharing.
         - **Modern Design**: Clean, responsive interface with visual feedback.
         - **Data Chatbot**: Interact conversationally with the processed JSON data to ask questions about its structure, content, or request specific information.
         raise
 if __name__ == "__main__":
+    main()