Data_Generation_LabelingCopy

Sleeping

App Files Files

Data_Generation_LabelingCopy / app110.py

Wedyan2023

Update app110.py

f0d296e verified 6 months ago

raw

history blame

77.7 kB

	import streamlit as st
	import pandas as pd
	import os
	import json
	import base64
	import random
	from streamlit_pdf_viewer import pdf_viewer
	from langchain.prompts import PromptTemplate
	from datetime import datetime
	from pathlib import Path
	from openai import OpenAI
	from dotenv import load_dotenv
	import warnings

	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	warnings.filterwarnings('ignore')

	os.getenv("OAUTH_CLIENT_ID")


	# # Load environment variables and initialize the OpenAI client to use Hugging Face Inference API.
	# load_dotenv()
	# client = OpenAI(
	# base_url="https://api-inference.huggingface.co/v1",
	# api_key=os.environ.get('TOKEN2') # Hugging Face API token
	# )
	#####
	from openai import OpenAI

	client = OpenAI(
	base_url="https://router.huggingface.co/together/v1",
	#api_key="hf_XXXXX",
	api_key=os.environ.get('LLM'), # Hugging Face API token
	)

	completion = client.chat.completions.create(
	model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
	messages=[
	{
	"role": "user",
	"content": "What is the capital of France?"
	}
	],
	)

	print(completion.choices[0].message)
	#####
	##########################################################3
	# import streamlit as st
	# from transformers import AutoModelForCausalLM, AutoTokenizer
	# import torch

	# # Model selection dropdown
	# selected_model = st.selectbox(
	# "Select Model",
	# ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
	# "meta-llama/Llama-3.3-70B-Instruct",
	# "meta-llama/Llama-3.2-3B-Instruct",
	# "meta-llama/Llama-4-Scout-17B-16E-Instruct",
	# "meta-llama/Meta-Llama-3-8B-Instruct",
	# "meta-llama/Llama-3.1-70B-Instruct"],
	# key='model_select'
	# )

	# @st.cache_resource # Cache the model to prevent reloading
	# def load_model(model_name):
	# try:
	# # Optimized model loading configuration
	# model = AutoModelForCausalLM.from_pretrained(
	# model_name,
	# torch_dtype=torch.float16, # Use half precision
	# device_map="auto", # Automatic device mapping
	# load_in_8bit=True, # Enable 8-bit quantization
	# low_cpu_mem_usage=True, # Optimize CPU memory usage
	# max_memory={0: "10GB"} # Limit GPU memory usage
	# )

	# tokenizer = AutoTokenizer.from_pretrained(
	# model_name,
	# padding_side="left",
	# truncation_side="left"
	# )

	# return model, tokenizer

	# except Exception as e:
	# st.error(f"Error loading model: {str(e)}")
	# return None, None

	# # Load the selected model with optimizations
	# if selected_model:
	# model, tokenizer = load_model(selected_model)

	# # Check if model loaded successfully
	# if model is not None:
	# st.success(f"Successfully loaded {selected_model}")
	# else:
	# st.warning("Please select a different model or check your hardware capabilities")

	# # Function to generate text
	# def generate_response(prompt, model, tokenizer):
	# try:
	# inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)

	# with torch.no_grad():
	# outputs = model.generate(
	# inputs["input_ids"],
	# max_length=256,
	# num_return_sequences=1,
	# temperature=0.7,
	# do_sample=True,
	# pad_token_id=tokenizer.pad_token_id
	# )

	# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# return response

	# except Exception as e:
	# return f"Error generating response: {str(e)}"
	############################################################

	####new
	# from openai import OpenAI

	# client = OpenAI(
	# base_url="https://router.huggingface.co/together/v1",
	# api_key=os.environ.get('TOKEN2'),
	# )

	# completion = client.chat.completions.create(
	# model="meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
	# messages=[
	# {
	# "role": "user",
	# "content": "What is the capital of France?"
	# }
	# ],
	# max_tokens=512,
	# )

	# print(completion.choices[0].message)
	#####

	# Create necessary directories
	for dir_name in ['data', 'feedback']:
	if not os.path.exists(dir_name):
	os.makedirs(dir_name)

	# Custom CSS
	st.markdown("""
	<style>
	.stButton > button {
	width: 100%;
	margin-bottom: 10px;
	background-color: #4CAF50;
	color: white;
	border: none;
	padding: 10px;
	border-radius: 5px;
	}
	.task-button {
	background-color: #2196F3 !important;
	}
	.stSelectbox {
	margin-bottom: 20px;
	}
	.output-container {
	padding: 20px;
	border-radius: 5px;
	border: 1px solid #ddd;
	margin: 10px 0;
	}
	.status-container {
	padding: 10px;
	border-radius: 5px;
	margin: 10px 0;
	}
	.sidebar-info {
	padding: 10px;
	background-color: #f0f2f6;
	border-radius: 5px;
	margin: 10px 0;
	}
	.feedback-button {
	background-color: #ff9800 !important;
	}
	.feedback-container {
	padding: 15px;
	background-color: #f5f5f5;
	border-radius: 5px;
	margin: 15px 0;
	}
	</style>
	""", unsafe_allow_html=True)

	# Helper functions
	def read_csv_with_encoding(file):
	encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
	for encoding in encodings:
	try:
	return pd.read_csv(file, encoding=encoding)
	except UnicodeDecodeError:
	continue
	raise UnicodeDecodeError("Failed to read file with any supported encoding")

	#def save_feedback(feedback_data):
	#feedback_file = 'feedback/user_feedback.csv'
	#feedback_df = pd.DataFrame([feedback_data])

	#if os.path.exists(feedback_file):
	#feedback_df.to_csv(feedback_file, mode='a', header=False, index=False)
	#else:
	#feedback_df.to_csv(feedback_file, index=False)

	def reset_conversation():
	st.session_state.conversation = []
	st.session_state.messages = []
	if 'task_choice' in st.session_state:
	del st.session_state.task_choice
	return None
	#new 24 March
	#user_input = st.text_input("Enter your prompt:")
	###########33

	# Initialize session state variables
	if "messages" not in st.session_state:
	st.session_state.messages = []
	if "examples_to_classify" not in st.session_state:
	st.session_state.examples_to_classify = []
	if "system_role" not in st.session_state:
	st.session_state.system_role = ""



	# Main app title
	st.title("🤖🦙 Text Data Labeling and Generation App")
	# def embed_pdf_sidebar(pdf_path):
	# with open(pdf_path, "rb") as f:
	# base64_pdf = base64.b64encode(f.read()).decode('utf-8')
	# pdf_display = f"""
	# <iframe src="data:application/pdf;base64,{base64_pdf}"
	# width="100%" height="400" type="application/pdf"></iframe>
	# """
	# st.markdown(pdf_display, unsafe_allow_html=True)
	#


	# Sidebar settings
	with st.sidebar:
	st.title("⚙️ Settings")


	#this last code works
	with st.sidebar:
	st.markdown("### 📘Data Generation and Labeling Instructions")
	#st.markdown("<h4 style='color: #4A90E2;'>📘 Instructions</h4>", unsafe_allow_html=True)
	with open("User instructions.pdf", "rb") as f:
	st.download_button(
	label="📄 Download Instructions PDF",
	data=f,
	#file_name="instructions.pdf",
	file_name="User instructions.pdf",
	mime="application/pdf"
	)

	selected_model = st.selectbox(
	"Select Model",
	["meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.2-11B-Vision-Instruct","meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
	"meta-llama/Llama-3.1-70B-Instruct"],
	key='model_select'
	)

	#################new oooo

	# # Model selection dropdown
	# selected_model = st.selectbox(
	# "Select Model",
	# [#"meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
	# "meta-llama/Llama-3.2-3B-Instruct",
	# "meta-llama/Llama-3.3-70B-Instruct",
	# "meta-llama/Llama-3.2-3B-Instruct",
	# "meta-llama/Llama-4-Scout-17B-16E-Instruct",
	# "meta-llama/Meta-Llama-3-8B-Instruct",
	# "meta-llama/Llama-3.1-70B-Instruct"],
	# key='model_select'
	# )

	# @st.cache_resource # Cache the model to prevent reloading
	# def load_model(model_name):
	# try:
	# # Optimized model loading configuration
	# model = AutoModelForCausalLM.from_pretrained(
	# model_name,
	# torch_dtype=torch.float16, # Use half precision
	# device_map="auto", # Automatic device mapping
	# load_in_8bit=True, # Enable 8-bit quantization
	# low_cpu_mem_usage=True, # Optimize CPU memory usage
	# max_memory={0: "10GB"} # Limit GPU memory usage
	# )

	# tokenizer = AutoTokenizer.from_pretrained(
	# model_name,
	# padding_side="left",
	# truncation_side="left"
	# )

	# return model, tokenizer

	# except Exception as e:
	# st.error(f"Error loading model: {str(e)}")
	# return None, None

	# # Load the selected model with optimizations
	# if selected_model:
	# model, tokenizer = load_model(selected_model)

	# # Check if model loaded successfully
	# if model is not None:
	# st.success(f"Successfully loaded {selected_model}")
	# else:
	# st.warning("Please select a different model or check your hardware capabilities")

	# # Function to generate text
	# def generate_response(prompt, model, tokenizer):
	# try:
	# inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)

	# with torch.no_grad():
	# outputs = model.generate(
	# inputs["input_ids"],
	# max_length=256,
	# num_return_sequences=1,
	# temperature=0.7,
	# do_sample=True,
	# pad_token_id=tokenizer.pad_token_id
	# )

	# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# return response

	# except Exception as e:
	# return f"Error generating response: {str(e)}"
	# ################

	# model = AutoModelForCausalLM.from_pretrained(
	# "meta-llama/Meta-Llama-3-8B-Instruct",
	# torch_dtype=torch.float16, # Use half precision
	# device_map="auto", # Automatic device mapping
	# load_in_8bit=True # Load in 8-bit precision
	# )
	temperature = st.slider(
	"Temperature",
	0.0, 1.0, 0.7,
	help="Controls randomness in generation"
	)

	st.button("🔄 New Conversation", on_click=reset_conversation)
	with st.container():
	st.markdown(f"""
	<div class="sidebar-info">
	<h4>Current Model: {selected_model}</h4>
	<p><em>Note: Generated content may be inaccurate or false. Check important info.</em></p>
	</div>
	""", unsafe_allow_html=True)

	feedback_url = "https://docs.google.com/forms/d/e/1FAIpQLSdZ_5mwW-pjqXHgxR0xriyVeRhqdQKgb5c-foXlYAV55Rilsg/viewform?usp=header"
	st.sidebar.markdown(
	f'<a href="{feedback_url}" target="_blank"><button style="width: 100%;">Feedback Form</button></a>',
	unsafe_allow_html=True
	)

	# Display conversation
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Main content
	if 'task_choice' not in st.session_state:
	col1, col2 = st.columns(2)
	with col1:
	if st.button("📝 Data Generation", key="gen_button", help="Generate new data"):
	st.session_state.task_choice = "Data Generation"
	with col2:
	if st.button("🏷️ Data Labeling", key="label_button", help="Label existing data"):
	st.session_state.task_choice = "Data Labeling"

	if "task_choice" in st.session_state:
	if st.session_state.task_choice == "Data Generation":
	st.header("📝 Data Generation")

	# 1. Domain selection
	domain_selection = st.selectbox("Domain", [
	"Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"
	])

	# 2. Handle custom domain input
	custom_domain_valid = True # Assume valid until proven otherwise

	if domain_selection == "Custom":
	domain = st.text_input("Specify custom domain")
	if not domain.strip():
	st.error("Please specify a domain name.")
	custom_domain_valid = False
	else:
	domain = domain_selection

	# Classification type selection
	classification_type = st.selectbox(
	"Classification Type",
	["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
	)
	# Labels setup based on classification type
	#labels = []
	labels = []
	labels_valid = False
	errors = []

	def validate_binary_labels(labels):
	errors = []
	normalized = [label.strip().lower() for label in labels]

	if not labels[0].strip():
	errors.append("First class name is required.")
	if not labels[1].strip():
	errors.append("Second class name is required.")
	if normalized[0] == normalized[1] and all(normalized):
	errors.append("Class names must be different.")
	return errors

	if classification_type == "Sentiment Analysis":
	st.write("### Sentiment Analysis Labels (Fixed)")
	col1, col2, col3 = st.columns(3)
	with col1:
	st.text_input("First class", "Positive", disabled=True)
	with col2:
	st.text_input("Second class", "Negative", disabled=True)
	with col3:
	st.text_input("Third class", "Neutral", disabled=True)
	labels = ["Positive", "Negative", "Neutral"]

	elif classification_type == "Binary Classification":
	st.write("### Binary Classification Labels")
	col1, col2 = st.columns(2)
	with col1:
	label_1 = st.text_input("First class", "Positive")
	with col2:
	label_2 = st.text_input("Second class", "Negative")

	labels = [label_1, label_2]
	errors = validate_binary_labels(labels)

	if errors:
	st.error("\n".join(errors))
	else:
	st.success("Binary class names are valid and unique!")


	elif classification_type == "Multi-Class Classification":
	st.write("### Multi-Class Classification Labels")

	default_labels_by_domain = {
	"News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
	"AG News": ["World", "Sports", "Business", "Sci/Tech"],
	"Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
	"Food & Dining", "Local Experience", "Adventure Activities",
	"Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
	"Luxury Tourism"],
	"Restaurant reviews": ["Italian", "French", "American"],
	"E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
	"Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
	"Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
	"Books & Stationery","Toys & Games", "Sports & Fitness",
	"Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
	}

	num_classes = st.slider("Number of classes", 3, 15, 3)

	# Get defaults for selected domain, or empty list
	defaults = default_labels_by_domain.get(domain, [])

	labels = []
	errors = []
	cols = st.columns(3)

	for i in range(num_classes):
	with cols[i % 3]:
	default_value = defaults[i] if i < len(defaults) else ""
	label_input = st.text_input(f"Class {i+1}", default_value)
	normalized_label = label_input.strip().title()

	if not normalized_label:
	errors.append(f"Class {i+1} name is required.")
	else:
	labels.append(normalized_label)

	# Check for duplicates (case-insensitive)
	if len(labels) != len(set(labels)):
	errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")

	# Show validation results
	if errors:
	for error in errors:
	st.error(error)
	else:
	st.success("All Labels names are valid and unique!")
	labels_valid = not errors # Will be True only if there are no label errors

	##############
	#new 22/4/2025
	# add additional attributes
	add_attributes = st.checkbox("Add additional attributes (optional)")
	additional_attributes = []

	if add_attributes:
	num_attributes = st.slider("Number of attributes to add", 1, 5, 1)
	for i in range(num_attributes):
	st.markdown(f"#### Attribute {i+1}")
	attr_name = st.text_input(f"Name of attribute {i+1}", key=f"attr_name_{i}")
	attr_topics = st.text_input(f"Topics (comma-separated) for {attr_name}", key=f"attr_topics_{i}")
	if attr_name and attr_topics:
	topics_list = [topic.strip() for topic in attr_topics.split(",") if topic.strip()]
	additional_attributes.append({"attribute": attr_name, "topics": topics_list})

	################

	# Generation parameters
	col1, col2 = st.columns(2)
	with col1:
	min_words = st.number_input("Min words", 1, 100, 20)
	with col2:
	max_words = st.number_input("Max words", min_words, 100, 50)

	# Few-shot examples
	use_few_shot = st.toggle("Use few-shot examples")
	few_shot_examples = []
	if use_few_shot:
	num_examples = st.slider("Number of few-shot examples", 1, 10, 1)
	for i in range(num_examples):
	with st.expander(f"Example {i+1}"):
	content = st.text_area(f"Content", key=f"few_shot_content_{i}")
	label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
	if content and label:
	few_shot_examples.append({"content": content, "label": label})

	num_to_generate = st.number_input("Number of examples", 1, 100, 10)
	#sytem role after
	# System role customization
	#default_system_role = f"You are a professional {classification_type} expert, your role is to generate text examples for {domain} domain. Always generate unique diverse examples and do not repeat the generated data. The generated text should be between {min_words} to {max_words} words long."
	# System role customization
	default_system_role = (
	f"You are a seasoned expert in {classification_type}, specializing in the {domain} domain. "
	f" Your primary responsibility is to generate high-quality, diverse, and unique text examples "
	f"tailored to this domain. Please ensure that each example adheres to the specified length "
	f"requirements, ranging from {min_words} to {max_words} words, and avoid any repetition in the generated content."
	)
	system_role = st.text_area("Modify System Role (optional)",
	value=default_system_role,
	key="system_role_input")
	st.session_state['system_role'] = system_role if system_role else default_system_role
	# Labels initialization
	#labels = []


	user_prompt = st.text_area("User Prompt (optional)")

	# Updated prompt template including system role
	prompt_template = PromptTemplate(
	input_variables=["system_role", "classification_type", "domain", "num_examples",
	"min_words", "max_words", "labels", "user_prompt", "few_shot_examples", "additional_attributes"],
	template=(
	"{system_role}\n"
	"- Use the following parameters:\n"
	"- Generate {num_examples} examples\n"
	"- Each example should be between {min_words} to {max_words} words long\n"
	"- Use these labels: {labels}.\n"
	"- Use the following additional attributes:\n"
	"- {additional_attributes}\n"
	"- Generate the examples in this format: 'Example text. Label: label'\n"
	"- Do not include word counts or any additional information\n"
	"- Always use your creativity and intelligence to generate unique and diverse text data\n"
	"- In sentiment analysis, ensure that the sentiment classification is clearly identified as Positive, Negative, or Neutral. Do not leave the sentiment ambiguous.\n"
	"- In binary sentiment analysis, classify text strictly as either Positive or Negative. Do not include or imply Neutral as an option.\n"
	"- Write unique examples every time.\n"
	"- DO NOT REPEAT your gnerated text. \n"
	"- For each Output, describe it once and move to the next.\n"
	"- List each Output only once, and avoid repeating details.\n"
	"- Additional instructions: {user_prompt}\n\n"
	"- Use the following examples as a reference in the generation process\n\n {few_shot_examples}. \n"
	"- Think step by step, generate numbered examples, and check each newly generated example to ensure it has not been generated before. If it has, modify it"

	)
	)
	# template=(
	# "{system_role}\n"
	# "- Use the following parameters:\n"
	# "- Generate {num_examples} examples\n"
	# "- Each example should be between {min_words} to {max_words} words long\n"
	# "- Use these labels: {labels}.\n"
	# "- Use the following additional attributes:\n"
	# "{additional_attributes}\n"
	# #"- Format each example like this: 'Example text. Label: [label]. Attribute1: [topic1]. Attribute2: [topic2]'\n"
	# "- Generate the examples in this format: 'Example text. Label: label'\n"
	# "- Additional instructions: {user_prompt}\n"
	# "- Use these few-shot examples if provided:\n{few_shot_examples}\n"
	# "- Think step by step and ensure examples are unique and not repeated."
	# )
	# )
	##########new 22/4/2025
	formatted_attributes = "\n".join([
	f"- {attr['attribute']}: {', '.join(attr['topics'])}" for attr in additional_attributes
	])
	#######################

	# Generate system prompt
	system_prompt = prompt_template.format(
	system_role=st.session_state['system_role'],
	classification_type=classification_type,
	domain=domain,
	num_examples=num_to_generate,
	min_words=min_words,
	max_words=max_words,
	labels=", ".join(labels),
	user_prompt=user_prompt,
	few_shot_examples="\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples]) if few_shot_examples else "",
	additional_attributes=formatted_attributes
	)


	# Store system prompt in session state
	st.session_state['system_prompt'] = system_prompt

	# Display system prompt
	st.write("System Prompt:")
	st.text_area("Current System Prompt", value=st.session_state['system_prompt'],
	height=400, disabled=True)


	if st.button("🎯 Generate Examples"):
	#
	errors = []
	if domain_selection == "Custom" and not domain.strip():
	st.warning("Custom domain name is required.")
	elif len(labels) != len(set(labels)):
	st.warning("Class names must be unique.")
	elif any(not lbl.strip() for lbl in labels):
	st.warning("All class labels must be filled in.")
	#else:
	#st.success("Generating examples for domain: {domain}")

	#if not custom_domain_valid:
	#st.warning("Custom domain name is required.")
	#elif not labels_valid:
	#st.warning("Please fix the label errors before generating examples.")
	#else:
	# Proceed to generate examples
	#st.success(f"Generating examples for domain: {domain}")

	with st.spinner("Generating examples..."):
	try:
	stream = client.chat.completions.create(
	model=selected_model,
	messages=[{"role": "system", "content": st.session_state['system_prompt']}],
	temperature=temperature,
	stream=True,
	#max_tokens=80000,
	max_tokens=4000,
	top_p=0.9,
	# repetition_penalty=1.2,
	#frequency_penalty=0.5, # Discourages frequent words
	#presence_penalty=0.6,
	)
	#st.session_state['system_prompt'] = system_prompt
	#new 24 march
	st.session_state.messages.append({"role": "user", "content": system_prompt})
	# # ####################
	response = st.write_stream(stream)
	st.session_state.messages.append({"role": "assistant", "content": response})
	# Initialize session state variables if they don't exist
	if 'system_prompt' not in st.session_state:
	st.session_state.system_prompt = system_prompt

	if 'response' not in st.session_state:
	st.session_state.response = response

	if 'generated_examples' not in st.session_state:
	st.session_state.generated_examples = []

	if 'generated_examples_csv' not in st.session_state:
	st.session_state.generated_examples_csv = None

	if 'generated_examples_json' not in st.session_state:
	st.session_state.generated_examples_json = None

	# Parse response and generate examples list
	examples_list = []
	for line in response.split('\n'):
	if line.strip():
	parts = line.rsplit('Label:', 1)
	if len(parts) == 2:
	text = parts[0].strip()
	label = parts[1].strip()
	if text and label:
	examples_list.append({
	'text': text,
	'label': label,
	'system_prompt': st.session_state.system_prompt,
	'system_role': st.session_state.system_role,
	'task_type': 'Data Generation',
	'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	})

	# example_dict = {
	# 'text': text,
	# 'label': label,
	# 'system_prompt': st.session_state.system_prompt,
	# 'system_role': st.session_state.system_role,
	# 'task_type': 'Data Generation',
	# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	# }
	# for attr in additional_attributes:
	# example_dict[attr['attribute']] = random.choice(attr['topics'])

	# examples_list.append(example_dict)


	if examples_list:
	# Update session state with new data
	st.session_state.generated_examples = examples_list

	# Generate CSV and JSON data
	df = pd.DataFrame(examples_list)
	st.session_state.generated_examples_csv = df.to_csv(index=False).encode('utf-8')
	st.session_state.generated_examples_json = json.dumps(examples_list, indent=2).encode('utf-8')

	# Vertical layout with centered "or" between buttons
	st.download_button(
	"📥 Download Generated Examples (CSV)",
	st.session_state.generated_examples_csv,
	"generated_examples.csv",
	"text/csv",
	key='download-csv-persistent'
	)

	# Add space and center the "or"
	st.markdown("""
	<div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
	""", unsafe_allow_html=True)

	st.download_button(
	"📥 Download Generated Examples (JSON)",
	st.session_state.generated_examples_json,
	"generated_examples.json",
	"application/json",
	key='download-json-persistent'
	)
	# # Display the labeled examples
	# st.markdown("##### 📋 Labeled Examples Preview")
	# st.dataframe(df, use_container_width=True)

	if st.button("Continue"):
	if follow_up == "Generate more examples":
	st.experimental_rerun()
	elif follow_up == "Data Labeling":
	st.session_state.task_choice = "Data Labeling"
	st.experimental_rerun()

	except Exception as e:
	st.error("An error occurred during generation.")
	st.error(f"Details: {e}")


	# Lableing Process
	elif st.session_state.task_choice == "Data Labeling":
	st.header("🏷️ Data Labeling")

	domain_selection = st.selectbox("Domain", ["Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"])
	# 2. Handle custom domain input
	custom_domain_valid = True # Assume valid until proven otherwise

	if domain_selection == "Custom":
	domain = st.text_input("Specify custom domain")
	if not domain.strip():
	st.error("Please specify a domain name.")
	custom_domain_valid = False
	else:
	domain = domain_selection


	# Classification type selection
	classification_type = st.selectbox(
	"Classification Type",
	["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"]
	)
	#NNew edit
	# Labels setup based on classification type
	labels = []
	labels_valid = False
	errors = []

	if classification_type == "Sentiment Analysis":
	st.write("### Sentiment Analysis Labels (Fixed)")
	col1, col2, col3 = st.columns(3)
	with col1:
	label_1 = st.text_input("First class", "Positive", disabled=True)
	with col2:
	label_2 = st.text_input("Second class", "Negative", disabled=True)
	with col3:
	label_3 = st.text_input("Third class", "Neutral", disabled=True)
	labels = ["Positive", "Negative", "Neutral"]


	elif classification_type == "Binary Classification":
	st.write("### Binary Classification Labels")
	col1, col2 = st.columns(2)

	with col1:
	label_1 = st.text_input("First class", "Positive")
	with col2:
	label_2 = st.text_input("Second class", "Negative")

	errors = []
	labels = [label_1.strip(), label_2.strip()]


	# Strip and lower-case labels for validation
	label_1 = labels[0].strip()
	label_2 = labels[1].strip()

	# Check for empty class names
	if not label_1:
	errors.append("First class name is required.")
	if not label_2:
	errors.append("Second class name is required.")

	# Check for duplicates (case insensitive)
	if label_1.lower() == label_2.lower() and label_1 and label_2:
	errors.append("Class names must be different.")

	# Show errors or success
	if errors:
	for error in errors:
	st.error(error)
	else:
	st.success("Binary class names are valid and unique!")


	elif classification_type == "Multi-Class Classification":
	st.write("### Multi-Class Classification Labels")

	default_labels_by_domain = {
	"News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
	"AG News": ["World", "Sports", "Business", "Sci/Tech"],
	"Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
	"Food & Dining", "Local Experience", "Adventure Activities",
	"Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
	"Luxury Tourism"],
	"Restaurant reviews": ["Italian", "French", "American"],
	"E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
	"Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
	"Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
	"Books & Stationery","Toys & Games", "Sports & Fitness",
	"Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
	}



	# Ask user how many classes they want to define
	num_classes = st.slider("Select the number of classes (labels)", min_value=3, max_value=10, value=3)

	# Use default labels based on selected domain, if available
	defaults = default_labels_by_domain.get(domain, [])

	labels = []
	errors = []
	cols = st.columns(3) # For nicely arranged label inputs

	for i in range(num_classes):
	with cols[i % 3]: # Distribute inputs across columns
	default_value = defaults[i] if i < len(defaults) else ""
	label_input = st.text_input(f"Label {i + 1}", default_value)
	normalized_label = label_input.strip().title()

	if not normalized_label:
	errors.append(f"Label {i + 1} is required.")
	else:
	labels.append(normalized_label)

	# Check for duplicates (case-insensitive)
	normalized_set = {label.lower() for label in labels}
	if len(labels) != len(normalized_set):
	errors.append("Label names must be unique (case-insensitive).")

	# Show validation results
	if errors:
	for error in errors:
	st.error(error)
	else:
	st.success("All label names are valid and unique!")

	labels_valid = not errors # True if no validation errors

	elif classification_type == "Named Entity Recognition (NER)":
	# # NER entity options
	# ner_entities = [
	# "PERSON - Names of people, fictional characters, historical figures",
	# "ORG - Companies, institutions, agencies, teams",
	# "LOC - Physical locations (mountains, oceans, etc.)",
	# "GPE - Countries, cities, states, political regions",
	# "DATE - Calendar dates, years, centuries",
	# "TIME - Times, durations",
	# "MONEY - Monetary values with currency"
	# ]
	# selected_entities = st.multiselect(
	# "Select entities to recognize",
	# ner_entities,
	# default=["PERSON - Names of people, fictional characters, historical figures",
	# "ORG - Companies, institutions, agencies, teams",
	# "LOC - Physical locations (mountains, oceans, etc.)",
	# "GPE - Countries, cities, states, political regions",
	# "DATE - Calendar dates, years, centuries",
	# "TIME - Times, durations",
	# "MONEY - Monetary values with currency"],
	# key="ner_entity_selection"
	# )
	#new 22/4/2025
	#if classification_type == "Named Entity Recognition (NER)":
	use_few_shot = True
	#new 22/4/2025
	few_shot_examples = [
	{"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
	{"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
	{"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
	{"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
	{"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
	{"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
	{"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
	{"content": "She bought the dress for $200.", "label": "MONEY: $200"},
	{"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
	{"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
	]
	###########

	st.write("### Named Entity Recognition (NER) Entities")

	# Predefined standard entities
	ner_entities = [
	"PERSON - Names of people, fictional characters, historical figures",
	"ORG - Companies, institutions, agencies, teams",
	"LOC - Physical locations (mountains, oceans, etc.)",
	"GPE - Countries, cities, states, political regions",
	"DATE - Calendar dates, years, centuries",
	"TIME - Times, durations",
	"MONEY - Monetary values with currency"
	]

	# User can add custom NER types
	custom_ner_entities = []
	if st.checkbox("Add custom NER entities?"):
	num_custom_ner = st.slider("Number of custom NER entities", 1, 10, 1)
	for i in range(num_custom_ner):
	st.markdown(f"#### Custom Entity {i+1}")
	custom_type = st.text_input(f"Entity type {i+1}", key=f"custom_ner_type_{i}")
	custom_description = st.text_input(f"Description for {custom_type}", key=f"custom_ner_desc_{i}")
	if custom_type and custom_description:
	custom_ner_entities.append(f"{custom_type.upper()} - {custom_description}")

	# Combine built-in and custom NERs
	all_ner_options = ner_entities + custom_ner_entities

	selected_entities = st.multiselect(
	"Select entities to recognize",
	all_ner_options,
	default=ner_entities
	)

	# Extract entity type names (before the dash)
	labels = [entity.split(" - ")[0].strip() for entity in selected_entities]

	if not labels:
	st.warning("Please select at least one entity type.")
	labels = ["PERSON"]

	##########

	# # Extract just the entity type (before the dash)
	# labels = [entity.split(" - ")[0] for entity in selected_entities]

	# if not labels:
	# st.warning("Please select at least one entity type")
	# labels = ["PERSON"] # Default if nothing selected





	#NNew edit
	# elif classification_type == "Multi-Class Classification":
	# st.write("### Multi-Class Classification Labels")

	# default_labels_by_domain = {
	# "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
	# "AG News": ["World", "Sports", "Business", "Sci/Tech"],
	# "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
	# "Food & Dining", "Local Experience", "Adventure Activities",
	# "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
	# "Luxury Tourism"],
	# "Restaurant reviews": ["Italian", "French", "American"]
	# }
	# num_classes = st.slider("Number of classes", 3, 10, 3)

	# # Get defaults for selected domain, or empty list
	# defaults = default_labels_by_domain.get(domain, [])

	# labels = []
	# errors = []
	# cols = st.columns(3)

	# for i in range(num_classes):
	# with cols[i % 3]:
	# default_value = defaults[i] if i < len(defaults) else ""
	# label_input = st.text_input(f"Class {i+1}", default_value)
	# normalized_label = label_input.strip().title()

	# if not normalized_label:
	# errors.append(f"Class {i+1} name is required.")
	# else:
	# labels.append(normalized_label)

	# # Check for duplicates (case-insensitive)
	# if len(labels) != len(set(labels)):
	# errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")

	# # Show validation results
	# if errors:
	# for error in errors:
	# st.error(error)
	# else:
	# st.success("All Labels names are valid and unique!")
	# labels_valid = not errors # Will be True only if there are no label errors




	# else:
	# num_classes = st.slider("Number of classes", 3, 23, 3, key="label_num_classes")
	# labels = []
	# cols = st.columns(3)
	# for i in range(num_classes):
	# with cols[i % 3]:
	# label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
	# labels.append(label)

	use_few_shot = st.toggle("Use few-shot examples for labeling")
	few_shot_examples = []
	if use_few_shot:
	num_few_shot = st.slider("Number of few-shot examples", 1, 10, 1)
	for i in range(num_few_shot):
	with st.expander(f"Few-shot Example {i+1}"):
	content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
	label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
	if content and label:
	few_shot_examples.append(f"{content}\nLabel: {label}")

	num_examples = st.number_input("Number of examples to classify", 1, 100, 1)

	examples_to_classify = []
	if num_examples <= 10:
	for i in range(num_examples):
	example = st.text_area(f"Example {i+1}", key=f"example_{i}")
	if example:
	examples_to_classify.append(example)
	else:
	examples_text = st.text_area(
	"Enter examples (one per line)",
	height=300,
	help="Enter each example on a new line"
	)
	if examples_text:
	examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
	if len(examples_to_classify) > num_examples:
	examples_to_classify = examples_to_classify[:num_examples]

	#New Wedyan
	#default_system_role = f"You are a professional {classification_type} expert, your role is to classify the provided text examples for {domain} domain."
	# System role customization
	default_system_role = (f"You are a highly skilled {classification_type} expert."
	f" Your task is to accurately classify the provided text examples within the {domain} domain."
	f" Ensure that all classifications are precise, context-aware, and aligned with domain-specific standards and best practices."
	)
	system_role = st.text_area("Modify System Role (optional)",
	value=default_system_role,
	key="system_role_input")
	st.session_state['system_role'] = system_role if system_role else default_system_role
	# Labels initialization
	#labels = []
	####

	user_prompt = st.text_area("User prompt (optional)", key="label_instructions")

	few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
	examples_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify)])

	# Customize prompt template based on classification type
	if classification_type == "Named Entity Recognition (NER)":
	# label_prompt_template = PromptTemplate(
	# input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
	# template=(
	# "{system_role}\n"
	# #"- You are a professional Named Entity Recognition (NER) expert in {domain} domain. Your role is to identify and extract the following entity types: {labels}.\n"
	# "- For each text example provided, identify all entities of the requested types.\n"
	# "- Use the following entities: {labels}.\n"
	# "- Return each example followed by the entities you found in this format: 'Example text.\n \n Entities:\n [ENTITY_TYPE: entity text\n\n, ENTITY_TYPE: entity text\n\n, ...] or [No entities found]'\n"
	# "- If no entities of the requested types are found, indicate 'No entities found' in this text.\n"
	# "- Be precise about entity boundaries - don't include unnecessary words.\n"
	# "- Do not provide any additional information or explanations.\n"
	# "- Additional instructions:\n {user_prompt}\n\n"
	# "- Use user few-shot examples as guidance if provided:\n{few_shot_examples}\n\n"
	# "- Examples to analyze:\n{examples}\n\n"
	# "Output:\n"
	# )
	# )
	#new 22/4/2025
	# label_prompt_template = PromptTemplate(
	# input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
	# template=(
	# "{system_role}\n"
	# "- You are performing Named Entity Recognition (NER) in the domain of {domain}.\n"
	# "- Use the following entity types: {labels}.\n\n"
	# "### Reasoning Steps:\n"
	# "1. Read the example carefully.\n"
	# "2. For each named entity mentioned, determine its meaning and role in the sentence.\n"
	# "3. Think about the context: Is it a physical location (LOC)? A geopolitical region (GPE)? A person (PERSON)?\n"
	# "4. Based on the definition of each label, assign the most specific and correct label.\n\n"
	# "For example:\n"
	# "- 'Mount Everest' → LOC (it's a mountain)\n"
	# "- 'France' → GPE (it's a country)\n"
	# "- 'Microsoft' → ORG\n"
	# "- 'John Smith' → PERSON\n\n"
	# "- Return each example followed by the entities you found in this format:\n"
	# "'Example text.'\nEntities: [ENTITY_TYPE: entity text, ENTITY_TYPE: entity text, ...] or [No entities found]\n"
	# "- If no entities of the requested types are found, return 'No entities found'.\n"
	# "- Be precise about entity boundaries - don't include extra words.\n"
	# "- Do not explain or justify your answers.\n\n"
	# "Additional instructions:\n{user_prompt}\n\n"
	# "Few-shot examples:\n{few_shot_examples}\n\n"
	# "Examples to label:\n{examples}\n"
	# "Output:\n"
	# )
	#)
	# label_prompt_template = PromptTemplate(
	# input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
	# template=(
	# "{system_role}\n"
	# "- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
	# "- Use these entity types: {labels}.\n\n"
	# "### Output Format:\n"
	# # "Return each example followed by the entities you found in this format: 'Example text.\n Entities:\n [ENTITY_TYPE: entity text\n\"
	# "Return each example followed by the entities you found in this format: 'Example text.\n 'Entity types:\n "Then group the entities under each label like this:\n" "
	# #"Then Start with this line exactly: 'Entity types\n'\n"
	# #"Then group the entities under each label like this:\n"
	# "\n PERSON – Angela Merkel, John Smith\n\n"
	# "\ ORG – Google, United Nations\n\n"
	# "\n DATE – January 1st, 2023\n\n"
	# "\n ... and so on.\n\n"
	# "If entity {labels} not found, do not write it in your response\n"
	# "- Do NOT output them inline after the text.\n"
	# "- Do NOT repeat the sentence.\n"
	# "- If no entities are found for a type, skip it.\n"
	# "- Keep the format consistent.\n\n"
	# "User Instructions:\n{user_prompt}\n\n"
	# "Few-shot Examples:\n{few_shot_examples}\n\n"
	# "Examples to analyze:\n{examples}"
	# )
	# )


	label_prompt_template = PromptTemplate(
	input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
	template=(
	"{system_role}\n"
	"- You are an expert at Named Entity Recognition (NER) for domain: {domain}.\n"
	"- Use these entity types: {labels}.\n\n"
	"### Output Format:\n"
	"Return each example followed by the entities you found in this format:\n"
	"'Example text.\nEntity types:\n"
	"Then group the entities under each label like this:\n"
	"\nPERSON – Angela Merkel, John Smith\n"
	"ORG – Google, United Nations\n"
	"DATE – January 1st, 2023\n"
	"... and so on.\n\n"
	"Each new entities group should be in a new line.\n"
	"If entity type {labels} is not found, do not write it in your response.\n"
	"- Do NOT output them inline after the text.\n"
	"- Do NOT repeat the sentence.\n"
	"- If no entities are found for a type, skip it.\n"
	"- Keep the format consistent.\n\n"
	"User Instructions:\n{user_prompt}\n\n"
	"Few-shot Examples:\n{few_shot_examples}\n\n"
	"Examples to analyze:\n{examples}"
	)
	)

	#######
	else:
	label_prompt_template = PromptTemplate(

	input_variables=["system_role", "classification_type", "labels", "few_shot_examples", "examples","domain", "user_prompt"],
	template=(
	#"- Let'\s think step by step:"
	"{system_role}\n"
	# "- You are a professional {classification_type} expert in {domain} domain. Your role is to classify the following examples using these labels: {labels}.\n"
	"- Use the following instructions:\n"
	"- Use the following labels: {labels}.\n"
	"- Return the classified text followed by the label in this format: 'text. Label: [label]'\n"
	"- Do not provide any additional information or explanations\n"
	"- User prompt:\n {user_prompt}\n\n"
	"- Use user provided examples as guidence in the classification process:\n\n {few_shot_examples}\n"
	"- Examples to classify:\n{examples}\n\n"
	"- Think step by step then classify the examples"
	#"Output:\n"
	))

	# Check if few_shot_examples is already a formatted string
	# Check if few_shot_examples is already a formatted string
	if isinstance(few_shot_examples, str):
	formatted_few_shot = few_shot_examples
	# If it's a list of already formatted strings
	elif isinstance(few_shot_examples, list) and all(isinstance(ex, str) for ex in few_shot_examples):
	formatted_few_shot = "\n".join(few_shot_examples)
	# If it's a list of dictionaries with 'content' and 'label' keys
	elif isinstance(few_shot_examples, list) and all(isinstance(ex, dict) and 'content' in ex and 'label' in ex for ex in few_shot_examples):
	formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
	else:
	formatted_few_shot = ""
	# #new 22/4/2025
	# few_shot_examples = [
	# {"content": "Mount Everest is the tallest mountain in the world.", "label": "LOC: Mount Everest"},
	# {"content": "The President of the United States visited Paris last summer.", "label": "GPE: United States, GPE: Paris"},
	# {"content": "Amazon is expanding its offices in Berlin.", "label": "ORG: Amazon, GPE: Berlin"},
	# {"content": "J.K. Rowling wrote the Harry Potter books.", "label": "PERSON: J.K. Rowling"},
	# {"content": "Apple was founded in California in 1976.", "label": "ORG: Apple, GPE: California, DATE: 1976"},
	# {"content": "The Nile is the longest river in Africa.", "label": "LOC: Nile, GPE: Africa"},
	# {"content": "He arrived at 3 PM for the meeting.", "label": "TIME: 3 PM"},
	# {"content": "She bought the dress for $200.", "label": "MONEY: $200"},
	# {"content": "The event is scheduled for July 4th.", "label": "DATE: July 4th"},
	# {"content": "The World Health Organization is headquartered in Geneva.", "label": "ORG: World Health Organization, GPE: Geneva"}
	# ]
	# ###########
	# new 22/4/2025
	#formatted_few_shot = "\n".join([f"{ex['content']}\nEntities: [{ex['label']}]" for ex in few_shot_examples])
	formatted_few_shot = "\n\n".join([f"{ex['content']}\n\nEntity types\n{ex['label']}" for ex in few_shot_examples])

	###########
	system_prompt = label_prompt_template.format(
	system_role=st.session_state['system_role'],
	classification_type=classification_type,
	domain=domain,
	examples="\n".join(examples_to_classify),
	labels=", ".join(labels),
	user_prompt=user_prompt,
	few_shot_examples=formatted_few_shot
	)

	# Step 2: Store the system_prompt in st.session_state
	st.session_state['system_prompt'] = system_prompt
	#::contentReference[oaicite:0]{index=0}
	st.write("System Prompt:")
	#st.code(system_prompt)
	#st.code(st.session_state['system_prompt'])
	st.text_area("System Prompt", value=st.session_state['system_prompt'], height=300, max_chars=None, key=None, help=None, disabled=True)



	if st.button("🏷️ Label Data"):
	if examples_to_classify:
	with st.spinner("Labeling data..."):
	#Generate the system prompt based on classification type
	if classification_type == "Named Entity Recognition (NER)":
	system_prompt = label_prompt_template.format(
	system_role=st.session_state['system_role'],
	labels=", ".join(labels),
	domain = domain,
	few_shot_examples=few_shot_text,
	examples=examples_text,
	user_prompt=user_prompt
	#new
	#'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	)
	# if classification_type == "Named Entity Recognition (NER)":
	# # Step 1: Split the full response by example
	# raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
	# inputs = [ex.strip() for ex in examples_to_classify]

	# # Step 2: Match inputs with NER output blocks
	# labeled_examples = []
	# for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
	# labeled_examples.append({
	# 'text': text,
	# 'entities': f"Entity types\n{output_block.strip()}",
	# 'system_prompt': st.session_state.system_prompt,
	# 'system_role': st.session_state.system_role,
	# 'task_type': 'Named Entity Recognition (NER)',
	# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	# })

	# if classification_type == "Named Entity Recognition (NER)":
	# # Step 1: Split the full response by example
	# raw_outputs = [block.strip() for block in response.strip().split("Entity types") if block.strip()]
	# inputs = [ex.strip() for ex in examples_to_classify]

	# # Step 2: Match inputs with NER output blocks
	# labeled_examples = []
	# for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
	# labeled_examples.append({
	# 'text': text,
	# 'entities': f"Entity types\n{output_block.strip()}",
	# 'system_prompt': st.session_state.system_prompt,
	# 'system_role': st.session_state.system_role,
	# 'task_type': 'Named Entity Recognition (NER)',
	# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	# })


	# import re

	# if classification_type == "Named Entity Recognition (NER)":
	# # Use regex to split on "Entity types" while keeping it attached to each block
	# blocks = re.split(r"(Entity types)", response.strip())

	# # Recombine 'Entity types' with each block after splitting
	# raw_outputs = [
	# (blocks[i] + blocks[i+1]).strip()
	# for i in range(1, len(blocks) - 1, 2)
	# ]

	# inputs = [ex.strip() for ex in examples_to_classify]

	# labeled_examples = []
	# for i, (text, output_block) in enumerate(zip(inputs, raw_outputs)):
	# labeled_examples.append({
	# 'text': text,
	# 'entities': output_block,
	# 'system_prompt': st.session_state.system_prompt,
	# 'system_role': st.session_state.system_role,
	# 'task_type': 'Named Entity Recognition (NER)',
	# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	# })


	else:
	system_prompt = label_prompt_template.format(
	classification_type=classification_type,
	system_role=st.session_state['system_role'],
	domain = domain,
	labels=", ".join(labels),
	few_shot_examples=few_shot_text,
	examples=examples_text,
	user_prompt=user_prompt
	)
	try:
	stream = client.chat.completions.create(
	model=selected_model,
	messages=[{"role": "system", "content": system_prompt}],
	temperature=temperature,
	stream=True,
	#max_tokens=20000,
	max_tokens=4000,
	top_p = 0.9,

	)
	#new 24 March
	# Append user message
	st.session_state.messages.append({"role": "user", "content": system_prompt})
	#################
	response = st.write_stream(stream)
	st.session_state.messages.append({"role": "assistant", "content": response})
	# Display the labeled examples
	# # Optional: If you want to add it as a chat-style message log
	# preview_str = st.session_state.labeled_preview.to_markdown(index=False)
	# st.session_state.messages.append({"role": "assistant", "content": f"Here is a preview of the labeled examples:\n\n{preview_str}"})


	# # Stream response and append assistant message
	# #14/4/2024
	# response = st.write_stream(stream)
	# st.session_state.messages.append({"role": "assistant", "content": response})

	# Initialize session state variables if they don't exist
	if 'system_prompt' not in st.session_state:
	st.session_state.system_prompt = system_prompt

	if 'response' not in st.session_state:
	st.session_state.response = response

	if 'generated_examples' not in st.session_state:
	st.session_state.generated_examples = []

	if 'generated_examples_csv' not in st.session_state:
	st.session_state.generated_examples_csv = None

	if 'generated_examples_json' not in st.session_state:
	st.session_state.generated_examples_json = None




	# Save labeled examples to CSV
	#new 14/4/2025
	#labeled_examples = []
	# if classification_type == "Named Entity Recognition (NER)":
	# labeled_examples = []
	# for line in response.split('\n'):
	# if line.strip():
	# parts = line.rsplit('Entities:', 1)
	# if len(parts) == 2:
	# text = parts[0].strip()
	# entities = parts[1].strip()
	# if text and entities:
	# labeled_examples.append({
	# 'text': text,
	# 'entities': entities,
	# 'system_prompt': st.session_state.system_prompt,
	# 'system_role': st.session_state.system_role,
	# 'task_type': 'Named Entity Recognition (NER)',
	# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	# })

	#new 22/4/2025
	labeled_examples = []
	if classification_type == "Named Entity Recognition (NER)":
	labeled_examples = [{
	'ner_output': response.strip(),
	'system_prompt': st.session_state.system_prompt,
	'system_role': st.session_state.system_role,
	'task_type': 'Named Entity Recognition (NER)',
	'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	}]

	######


	else:
	labeled_examples = []
	for line in response.split('\n'):
	if line.strip():
	parts = line.rsplit('Label:', 1)
	if len(parts) == 2:
	text = parts[0].strip()
	label = parts[1].strip()
	if text and label:
	labeled_examples.append({
	'text': text,
	'label': label,
	'system_prompt': st.session_state.system_prompt,
	'system_role': st.session_state.system_role,
	'task_type': 'Data Labeling',
	'Use few-shot example?': 'Yes' if use_few_shot else 'No',
	})
	# Save and provide download options
	if labeled_examples:
	# Update session state
	st.session_state.labeled_examples = labeled_examples

	# Convert to CSV and JSON
	df = pd.DataFrame(labeled_examples)
	#new 22/4/2025
	# CSV
	st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')

	# JSON
	st.session_state.labeled_examples_json = json.dumps({
	"metadata": {
	"domain": domain,
	"labels": labels,
	"used_few_shot": use_few_shot,
	"task_type": "Named Entity Recognition (NER)",
	"timestamp": datetime.now().isoformat()
	},
	"examples": labeled_examples
	}, indent=2).encode('utf-8')

	############
	# CSV
	# st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')

	# # JSON
	# st.session_state.labeled_examples_json = json.dumps({
	# "metadata": {
	# "domain": domain,
	# "labels": labels,
	# "used_few_shot": use_few_shot,
	# "task_type": "Named Entity Recognition (NER)",
	# "timestamp": datetime.now().isoformat()
	# },
	# "examples": labeled_examples
	# }, indent=2).encode('utf-8')

	########
	# st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
	# st.session_state.labeled_examples_json = json.dumps(labeled_examples, indent=2).encode('utf-8')

	# Download buttons
	st.download_button(
	"📥 Download Labeled Examples (CSV)",
	st.session_state.labeled_examples_csv,
	"labeled_examples.csv",
	"text/csv",
	key='download-labeled-csv'
	)

	st.markdown("""
	<div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
	""", unsafe_allow_html=True)

	st.download_button(
	"📥 Download Labeled Examples (JSON)",
	st.session_state.labeled_examples_json,
	"labeled_examples.json",
	"application/json",
	key='download-labeled-json'
	)
	# Display the labeled examples
	st.markdown("##### 📋 Labeled Examples Preview")
	st.dataframe(df, use_container_width=True)
	# Display section
	#st.markdown("### 📋 Labeled Examples Preview")
	#st.dataframe(st.session_state.labeled_preview, use_container_width=True)



	# if labeled_examples:
	# df = pd.DataFrame(labeled_examples)
	# csv = df.to_csv(index=False).encode('utf-8')
	# st.download_button(
	# "📥 Download Labeled Examples",
	# csv,
	# "labeled_examples.csv",
	# "text/csv",
	# key='download-labeled-csv'
	# )
	# # Add space and center the "or"
	# st.markdown("""
	# <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . . or</div>
	# """, unsafe_allow_html=True)

	# if labeled_examples:
	# df = pd.DataFrame(labeled_examples)
	# csv = df.to_csv(index=False).encode('utf-8')
	# st.download_button(
	# "📥 Download Labeled Examples",
	# csv,
	# "labeled_examples.json",
	# "text/json",
	# key='download-labeled-JSON'
	# )

	# Add follow-up interaction options
	#st.markdown("---")
	#follow_up = st.radio(
	#"What would you like to do next?",
	#["Label more data", "Data Generation"],
	# key="labeling_follow_up"
	# )

	if st.button("Continue"):
	if follow_up == "Label more data":
	st.session_state.examples_to_classify = []
	st.experimental_rerun()
	elif follow_up == "Data Generation":
	st.session_state.task_choice = "Data Labeling"
	st.experimental_rerun()

	except Exception as e:
	st.error("An error occurred during labeling.")
	st.error(f"Details: {e}")
	else:
	st.warning("Please enter at least one example to classify.")

	#st.session_state.messages.append({"role": "assistant", "content": response})




	# Footer
	st.markdown("---")
	st.markdown(
	"""
	<div style='text-align: center'>
	<p>Made with ❤️ by Wedyan AlSakran 2025</p>
	</div>
	""",
	unsafe_allow_html=True
	)