Spaces:

Chemically-motivated
/

OSINT_Tool

Paused

App Files Files Community

OSINT_Tool / app.py

Canstralian

Update app.py

bbbca4f verified 10 months ago

raw

history blame

5.53 kB

	import streamlit as st
	import requests
	import re
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch
	import pandas as pd
	from datasets import Dataset
	from huggingface_hub import hf_api

	# Title and description
	st.title("OSINT Tool 🏢")
	st.markdown("""
	This tool performs Open Source Intelligence (OSINT) analysis on GitHub repositories and fetches titles from URLs.
	It also allows uploading datasets (CSV format) for fine-tuning models like DistilBERT.
	""")

	# Sidebar for navigation
	st.sidebar.title("Navigation")
	app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"])

	# GitHub Repository Analysis
	if app_mode == "GitHub Repository Analysis":
	st.header("GitHub Repository Analysis")
	repo_owner = st.text_input("Enter GitHub Repository Owner", "huggingface")
	repo_name = st.text_input("Enter GitHub Repository Name", "transformers")

	if st.button("Analyze Repository"):
	if repo_owner and repo_name:
	try:
	response = requests.get(f"https://api.github.com/repos/{repo_owner}/{repo_name}")
	data = response.json()

	if response.status_code == 200:
	st.subheader("Repository Details")
	st.write(f"Name: {data['name']}")
	st.write(f"Owner: {data['owner']['login']}")
	st.write(f"Stars: {data['stargazers_count']}")
	st.write(f"Forks: {data['forks_count']}")
	st.write(f"Language: {data['language']}")
	st.write(f"Description: {data['description']}")
	else:
	st.error(f"Error: {data.get('message', 'Something went wrong with the request')}")
	except Exception as e:
	st.error(f"Error occurred: {e}")
	else:
	st.warning("Please enter both repository owner and name.")

	# URL Title Fetcher
	elif app_mode == "URL Title Fetcher":
	st.header("URL Title Fetcher")
	url = st.text_input("Enter URL", "https://www.huggingface.co")

	if st.button("Fetch Title"):
	if url:
	try:
	response = requests.get(url)
	if response.status_code == 200:
	# Try to extract the title from the HTML
	match = re.search('<title>(.*?)</title>', response.text)
	if match:
	title = match.group(1)
	st.write(f"Page Title: {title}")
	else:
	st.warning("Title tag not found in the page")
	else:
	st.error(f"Failed to retrieve the page. Status code: {response.status_code}")
	except Exception as e:
	st.error(f"Error occurred: {e}")
	else:
	st.warning("Please enter a valid URL.")

	# Dataset Upload & Fine-Tuning
	elif app_mode == "Dataset Upload & Fine-Tuning":
	st.header("Dataset Upload & Fine-Tuning")

	uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv")

	if uploaded_file is not None:
	# Load the CSV into a pandas DataFrame
	df = pd.read_csv(uploaded_file)

	# Display dataset preview
	st.subheader("Dataset Preview")
	st.write(df.head())

	# Convert CSV to Hugging Face dataset format
	dataset = Dataset.from_pandas(df)

	model_name = st.selectbox("Select model for fine-tuning", ["distilbert-base-uncased"])

	if st.button("Fine-tune Model"):
	if model_name:
	try:
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Prepare the dataset
	def preprocess_function(examples):
	return tokenizer(examples['text'], truncation=True, padding=True)

	tokenized_datasets = dataset.map(preprocess_function, batched=True)

	# Fine-tuning setup (using Hugging Face Trainer for a complete setup)
	from transformers import Trainer, TrainingArguments

	training_args = TrainingArguments(
	output_dir="./results",
	evaluation_strategy="epoch",
	learning_rate=2e-5,
	per_device_train_batch_size=16,
	per_device_eval_batch_size=16,
	num_train_epochs=3,
	weight_decay=0.01,
	)

	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=tokenized_datasets,
	eval_dataset=tokenized_datasets,
	)

	# Train the model
	trainer.train()

	st.success("Fine-tuning completed successfully!")
	except Exception as e:
	st.error(f"Error during fine-tuning: {e}")
	else:
	st.warning("Please select a model for fine-tuning.")

	else:
	st.warning("Please upload a dataset.")