Spaces:
Runtime error
Runtime error
| # ccv3_parser.py | |
| # | |
| # | |
| # Imports | |
| from typing import Any, Dict, List, Optional, Union | |
| import re | |
| # | |
| # External Imports | |
| # | |
| # Local Imports | |
| from App_Function_Libraries.Personas.models import Lorebook, Asset, CharacterCardV3, CharacterCardV3Data, Decorator, \ | |
| LorebookEntry | |
| from App_Function_Libraries.Personas.utils import validate_iso_639_1, extract_json_from_charx, parse_json_file, \ | |
| extract_text_chunks_from_png, decode_base64 | |
| # | |
| ############################################################################################################ | |
| # | |
| # Functions: | |
| class CCv3ParserError(Exception): | |
| """Custom exception for CCv3 Parser errors.""" | |
| pass | |
| class CharacterCardV3Parser: | |
| REQUIRED_SPEC = 'chara_card_v3' | |
| REQUIRED_VERSION = '3.0' | |
| def __init__(self, input_data: Union[str, bytes], input_type: str): | |
| """ | |
| Initialize the parser with input data. | |
| :param input_data: The input data as a string or bytes. | |
| :param input_type: The type of the input data: 'json', 'png', 'apng', 'charx'. | |
| """ | |
| self.input_data = input_data | |
| self.input_type = input_type.lower() | |
| self.character_card: Optional[CharacterCardV3] = None | |
| def parse(self): | |
| """Main method to parse the input data based on its type.""" | |
| if self.input_type == 'json': | |
| self.parse_json_input() | |
| elif self.input_type in ['png', 'apng']: | |
| self.parse_png_apng_input() | |
| elif self.input_type == 'charx': | |
| self.parse_charx_input() | |
| else: | |
| raise CCv3ParserError(f"Unsupported input type: {self.input_type}") | |
| def parse_json_input(self): | |
| """Parse JSON input directly.""" | |
| try: | |
| data = parse_json_file( | |
| self.input_data.encode('utf-8') if isinstance(self.input_data, str) else self.input_data) | |
| self.character_card = self._build_character_card(data) | |
| except Exception as e: | |
| raise CCv3ParserError(f"Failed to parse JSON input: {e}") | |
| def parse_png_apng_input(self): | |
| """Parse PNG or APNG input by extracting 'ccv3' tEXt chunk.""" | |
| try: | |
| text_chunks = extract_text_chunks_from_png(self.input_data) | |
| if 'ccv3' not in text_chunks: | |
| raise CCv3ParserError("PNG/APNG does not contain 'ccv3' tEXt chunk.") | |
| ccv3_base64 = text_chunks['ccv3'] | |
| ccv3_json_bytes = decode_base64(ccv3_base64) | |
| data = parse_json_file(ccv3_json_bytes) | |
| self.character_card = self._build_character_card(data) | |
| except Exception as e: | |
| raise CCv3ParserError(f"Failed to parse PNG/APNG input: {e}") | |
| def parse_charx_input(self): | |
| """Parse CHARX input by extracting 'card.json' from the ZIP archive.""" | |
| try: | |
| data = extract_json_from_charx(self.input_data) | |
| self.character_card = self._build_character_card(data) | |
| except Exception as e: | |
| raise CCv3ParserError(f"Failed to parse CHARX input: {e}") | |
| def _build_character_card(self, data: Dict[str, Any]) -> CharacterCardV3: | |
| """Build the CharacterCardV3 object from parsed data.""" | |
| # Validate required fields | |
| spec = data.get('spec') | |
| spec_version = data.get('spec_version') | |
| if spec != self.REQUIRED_SPEC: | |
| raise CCv3ParserError(f"Invalid spec: Expected '{self.REQUIRED_SPEC}', got '{spec}'") | |
| if spec_version != self.REQUIRED_VERSION: | |
| # As per spec, should not reject but handle versions | |
| # For now, proceed if version is >=3.0 | |
| try: | |
| version_float = float(spec_version) | |
| if version_float < 3.0: | |
| raise CCv3ParserError(f"Unsupported spec_version: '{spec_version}' (must be >= '3.0')") | |
| except ValueError: | |
| raise CCv3ParserError(f"Invalid spec_version format: '{spec_version}'") | |
| data_field = data.get('data') | |
| if not data_field: | |
| raise CCv3ParserError("Missing 'data' field in CharacterCardV3 object.") | |
| # Extract required fields | |
| required_fields = ['name', 'description', 'tags', 'creator', 'character_version', | |
| 'mes_example', 'extensions', 'system_prompt', | |
| 'post_history_instructions', 'first_mes', | |
| 'alternate_greetings', 'personality', 'scenario', | |
| 'creator_notes', 'group_only_greetings'] | |
| for field_name in required_fields: | |
| if field_name not in data_field: | |
| raise CCv3ParserError(f"Missing required field in data: '{field_name}'") | |
| # Parse assets | |
| assets_data = data_field.get('assets', [{ | |
| 'type': 'icon', | |
| 'uri': 'ccdefault:', | |
| 'name': 'main', | |
| 'ext': 'png' | |
| }]) | |
| assets = self._parse_assets(assets_data) | |
| # Parse creator_notes_multilingual | |
| creator_notes_multilingual = data_field.get('creator_notes_multilingual') | |
| if creator_notes_multilingual: | |
| if not isinstance(creator_notes_multilingual, dict): | |
| raise CCv3ParserError("'creator_notes_multilingual' must be a dictionary.") | |
| # Validate ISO 639-1 codes | |
| for lang_code in creator_notes_multilingual.keys(): | |
| if not validate_iso_639_1(lang_code): | |
| raise CCv3ParserError(f"Invalid language code in 'creator_notes_multilingual': '{lang_code}'") | |
| # Parse character_book | |
| character_book_data = data_field.get('character_book') | |
| character_book = self._parse_lorebook(character_book_data) if character_book_data else None | |
| # Build CharacterCardV3Data | |
| character_card_data = CharacterCardV3Data( | |
| name=data_field['name'], | |
| description=data_field['description'], | |
| tags=data_field['tags'], | |
| creator=data_field['creator'], | |
| character_version=data_field['character_version'], | |
| mes_example=data_field['mes_example'], | |
| extensions=data_field['extensions'], | |
| system_prompt=data_field['system_prompt'], | |
| post_history_instructions=data_field['post_history_instructions'], | |
| first_mes=data_field['first_mes'], | |
| alternate_greetings=data_field['alternate_greetings'], | |
| personality=data_field['personality'], | |
| scenario=data_field['scenario'], | |
| creator_notes=data_field['creator_notes'], | |
| character_book=character_book, | |
| assets=assets, | |
| nickname=data_field.get('nickname'), | |
| creator_notes_multilingual=creator_notes_multilingual, | |
| source=data_field.get('source'), | |
| group_only_greetings=data_field['group_only_greetings'], | |
| creation_date=data_field.get('creation_date'), | |
| modification_date=data_field.get('modification_date') | |
| ) | |
| return CharacterCardV3( | |
| spec=spec, | |
| spec_version=spec_version, | |
| data=character_card_data | |
| ) | |
| def _parse_assets(self, assets_data: List[Dict[str, Any]]) -> List[Asset]: | |
| """Parse and validate assets.""" | |
| assets = [] | |
| for asset_data in assets_data: | |
| # Validate required fields | |
| for field in ['type', 'uri', 'ext']: | |
| if field not in asset_data: | |
| raise CCv3ParserError(f"Asset missing required field: '{field}'") | |
| if not isinstance(asset_data[field], str): | |
| raise CCv3ParserError(f"Asset field '{field}' must be a string.") | |
| # Optional 'name' | |
| name = asset_data.get('name', '') | |
| # Validate 'ext' | |
| ext = asset_data['ext'].lower() | |
| if not re.match(r'^[a-z0-9]+$', ext): | |
| raise CCv3ParserError(f"Invalid file extension in asset: '{ext}'") | |
| # Append to assets list | |
| assets.append(Asset( | |
| type=asset_data['type'], | |
| uri=asset_data['uri'], | |
| name=name, | |
| ext=ext | |
| )) | |
| return assets | |
| def _parse_lorebook(self, lorebook_data: Dict[str, Any]) -> Lorebook: | |
| """Parse and validate Lorebook object.""" | |
| # Validate Lorebook fields | |
| if not isinstance(lorebook_data, dict): | |
| raise CCv3ParserError("Lorebook must be a JSON object.") | |
| # Extract fields with defaults | |
| name = lorebook_data.get('name') | |
| description = lorebook_data.get('description') | |
| scan_depth = lorebook_data.get('scan_depth') | |
| token_budget = lorebook_data.get('token_budget') | |
| recursive_scanning = lorebook_data.get('recursive_scanning') | |
| extensions = lorebook_data.get('extensions', {}) | |
| entries_data = lorebook_data.get('entries', []) | |
| # Parse entries | |
| entries = self._parse_lorebook_entries(entries_data) | |
| return Lorebook( | |
| name=name, | |
| description=description, | |
| scan_depth=scan_depth, | |
| token_budget=token_budget, | |
| recursive_scanning=recursive_scanning, | |
| extensions=extensions, | |
| entries=entries | |
| ) | |
| def _parse_lorebook_entries(self, entries_data: List[Dict[str, Any]]) -> List[LorebookEntry]: | |
| """Parse and validate Lorebook entries.""" | |
| entries = [] | |
| for entry_data in entries_data: | |
| # Validate required fields | |
| for field in ['keys', 'content', 'enabled', 'insertion_order']: | |
| if field not in entry_data: | |
| raise CCv3ParserError(f"Lorebook entry missing required field: '{field}'") | |
| if not isinstance(entry_data['keys'], list) or not all(isinstance(k, str) for k in entry_data['keys']): | |
| raise CCv3ParserError("'keys' field in Lorebook entry must be a list of strings.") | |
| if not isinstance(entry_data['content'], str): | |
| raise CCv3ParserError("'content' field in Lorebook entry must be a string.") | |
| if not isinstance(entry_data['enabled'], bool): | |
| raise CCv3ParserError("'enabled' field in Lorebook entry must be a boolean.") | |
| if not isinstance(entry_data['insertion_order'], (int, float)): | |
| raise CCv3ParserError("'insertion_order' field in Lorebook entry must be a number.") | |
| # Optional fields | |
| use_regex = entry_data.get('use_regex', False) | |
| constant = entry_data.get('constant') | |
| selective = entry_data.get('selective') | |
| secondary_keys = entry_data.get('secondary_keys') | |
| position = entry_data.get('position') | |
| name = entry_data.get('name') | |
| priority = entry_data.get('priority') | |
| entry_id = entry_data.get('id') | |
| comment = entry_data.get('comment') | |
| if selective and not isinstance(selective, bool): | |
| raise CCv3ParserError("'selective' field in Lorebook entry must be a boolean.") | |
| if secondary_keys: | |
| if not isinstance(secondary_keys, list) or not all(isinstance(k, str) for k in secondary_keys): | |
| raise CCv3ParserError("'secondary_keys' field in Lorebook entry must be a list of strings.") | |
| if position and not isinstance(position, str): | |
| raise CCv3ParserError("'position' field in Lorebook entry must be a string.") | |
| # Parse decorators from content | |
| decorators = self._extract_decorators(entry_data['content']) | |
| # Create LorebookEntry | |
| entries.append(LorebookEntry( | |
| keys=entry_data['keys'], | |
| content=entry_data['content'], | |
| enabled=entry_data['enabled'], | |
| insertion_order=int(entry_data['insertion_order']), | |
| use_regex=use_regex, | |
| constant=constant, | |
| selective=selective, | |
| secondary_keys=secondary_keys, | |
| position=position, | |
| decorators=decorators, | |
| name=name, | |
| priority=priority, | |
| id=entry_id, | |
| comment=comment | |
| )) | |
| return entries | |
| def _extract_decorators(self, content: str) -> List[Decorator]: | |
| """Extract decorators from the content field.""" | |
| decorators = [] | |
| lines = content.splitlines() | |
| for line in lines: | |
| if line.startswith('@@'): | |
| decorator = self._parse_decorator_line(line) | |
| if decorator: | |
| decorators.append(decorator) | |
| return decorators | |
| def _parse_decorator_line(self, line: str) -> Optional[Decorator]: | |
| """ | |
| Parses a single decorator line. | |
| Example: | |
| @@decorator_name value | |
| @@@fallback_decorator value | |
| """ | |
| fallback = None | |
| if line.startswith('@@@'): | |
| # Fallback decorator | |
| name_value = line.lstrip('@').strip() | |
| parts = name_value.split(' ', 1) | |
| name = parts[0] | |
| value = parts[1] if len(parts) > 1 else None | |
| fallback = Decorator(name=name, value=value) | |
| return fallback | |
| elif line.startswith('@@'): | |
| # Primary decorator | |
| name_value = line.lstrip('@').strip() | |
| parts = name_value.split(' ', 1) | |
| name = parts[0] | |
| value = parts[1] if len(parts) > 1 else None | |
| # Check for fallback decorators in subsequent lines | |
| # This assumes that fallback decorators follow immediately after the primary | |
| # decorator in the content | |
| # For simplicity, not implemented here. You can enhance this based on your needs. | |
| return Decorator(name=name, value=value) | |
| else: | |
| return None | |
| def get_character_card(self) -> Optional[CharacterCardV3]: | |
| """Returns the parsed CharacterCardV3 object.""" | |
| return self.character_card | |
| # | |
| # End of ccv3_parser.py | |
| ############################################################################################################ |