Spaces:

fschwartzer
/

streamlit_chatbot

Runtime error

App Files Files Community

streamlit_chatbot / app.py

fschwartzer

Update app.py

f2de8aa verified about 1 year ago

raw

history blame

7.51 kB

	import streamlit as st
	import pandas as pd
	from transformers import BartForConditionalGeneration, TapexTokenizer, T5ForConditionalGeneration, T5Tokenizer
	from prophet import Prophet
	import datetime
	import sentencepiece as spm

	st.markdown("""
	<div style='display: flex; flex-direction: column; align-items: center;'>
	<div style='display: flex; align-items: center;'>
	<div style='width: 20px; height: 20px; background-color: green; border-radius: 50%; margin-right: 2px;'></div>
	<div style='width: 20px; height: 20px; background-color: red; border-radius: 50%; margin-right: 2px;'></div>
	<div style='width: 20px; height: 20px; background-color: yellow; border-radius: 50%; margin-right: 10px;'></div>
	<span style='font-size: 40px; font-weight: bold;'>PROTAX</span>
	</div>
	<div style='text-align: center; width: 100%;'>
	<span style='font-size: 20px; font-weight: bold; color: #333;'>
	<strong>PRO</strong>phet & <strong>TA</strong>pex E<strong>X</strong>plorer</span>
	</div>
	</div>
	""", unsafe_allow_html=True)

	# File upload interface
	uploaded_file = st.file_uploader("Carregue um arquivo CSV ou XLSX", type=['csv', 'xlsx'])

	if uploaded_file:
	if 'all_anomalies' not in st.session_state:
	with st.spinner('Aplicando modelo de série temporal...'):
	# Load the file into a DataFrame
	if uploaded_file.name.endswith('.csv'):
	df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
	elif uploaded_file.name.endswith('.xlsx'):
	df = pd.read_excel(uploaded_file)

	# Data preprocessing for Prophet
	new_df = df.iloc[2:, 9:-1].fillna(0)
	new_df.columns = df.iloc[1, 9:-1]
	new_df.columns = new_df.columns.str.replace(r" \(\d+\)", "", regex=True)

	month_dict = {
	'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
	'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
	'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
	}

	def convert_column_name(column_name):
	if column_name == 'Rótulos de Linha':
	return column_name
	parts = column_name.split('/')
	month = parts[0].strip()
	year = parts[1].strip()
	year = ''.join(filter(str.isdigit, year))
	month_number = month_dict.get(month, '00')
	return f"{month_number}/{year}"

	new_df.columns = [convert_column_name(col) for col in new_df.columns]
	new_df.columns = pd.to_datetime(new_df.columns, errors='coerce')
	new_df.rename(columns={new_df.columns[0]: 'Rotulo'}, inplace=True)
	df_clean = new_df.copy()

	# Create an empty DataFrame to store all anomalies
	all_anomalies = pd.DataFrame()

	# Process each row in the DataFrame
	for index, row in df_clean.iterrows():
	data = pd.DataFrame({
	'ds': [col for col in df_clean.columns if isinstance(col, pd.Timestamp)],
	'y': row[[isinstance(col, pd.Timestamp) for col in df_clean.columns]].values
	})

	data = data[data['y'] > 0].reset_index(drop=True)
	if data.empty or len(data) < 2:
	print(f"Skipping group {row['Rotulo']} because there are less than 2 non-zero observations.")
	continue

	try:
	model = Prophet(interval_width=0.95)
	model.fit(data)
	except ValueError as e:
	print(f"Skipping group {row['Rotulo']} due to error: {e}")
	continue

	future = model.make_future_dataframe(periods=12, freq='M')
	forecast = model.predict(future)

	num_real = len(data)
	num_forecast = len(forecast)
	real_values = list(data['y']) + [None] * (num_forecast - num_real)
	forecast['real'] = real_values
	anomalies = forecast[(forecast['real'] < forecast['yhat_lower']) \| (forecast['real'] > forecast['yhat_upper'])]

	anomalies['Group'] = row['Rotulo']
	all_anomalies = pd.concat([all_anomalies, anomalies[['ds', 'real', 'Group']]], ignore_index=True)

	# Store the result in session state
	all_anomalies.rename(columns={"ds": "datetime", "real": "monetary value", "Group": "group"}, inplace=True)
	all_anomalies = all_anomalies[all_anomalies['monetary value'].astype('float') >= 10,000,000.00]
	all_anomalies['monetary value'] = all_anomalies['monetary value'].apply(lambda x: f"{x:.2f}")
	all_anomalies.sort_values(by=['monetary value'], ascending=False, inplace=True)
	all_anomalies = all_anomalies.fillna('').astype(str)
	st.session_state['all_anomalies'] = all_anomalies

	# Load translation models
	pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
	en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
	tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")

	# Load TAPEX model
	tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
	tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")

	def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
	input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
	outputs = model.generate(input_ids)
	translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return translated_text

	def response(user_question, table_data):
	question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
	encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
	outputs = tapex_model.generate(**encoding)
	response_en = tapex_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
	response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
	return response_pt

	# Streamlit interface
	st.dataframe(st.session_state['all_anomalies'].head())

	# Chat history
	if 'history' not in st.session_state:
	st.session_state['history'] = []

	user_question = st.text_input("Escreva sua questão aqui:", "")

	if user_question:
	st.session_state['history'].append(('👤', user_question))
	st.markdown(f"👤 {user_question}")

	bot_response = response(user_question, st.session_state['all_anomalies'])

	st.session_state['history'].append(('🤖', bot_response))
	st.markdown(f"<div style='text-align: right'>🤖 {bot_response}</div>", unsafe_allow_html=True)

	if st.button("Limpar"):
	st.session_state['history'] = []

	for sender, message in st.session_state['history']:
	if sender == '👤':
	st.markdown(f"👤 {message}")
	elif sender == '🤖':
	st.markdown(f"<div style='text-align: right'>🤖 {message}</div>", unsafe_allow_html=True)
	else:
	st.warning("Por favor, carregue um arquivo CSV ou XLSX para começar.")