|
|
import streamlit as st |
|
|
import requests |
|
|
import re |
|
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
import torch |
|
|
import pandas as pd |
|
|
from datasets import Dataset |
|
|
from huggingface_hub import hf_api |
|
|
|
|
|
|
|
|
st.title("OSINT Tool 🏢") |
|
|
st.markdown(""" |
|
|
This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs. |
|
|
It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**. |
|
|
""") |
|
|
|
|
|
|
|
|
st.sidebar.title("Navigation") |
|
|
app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"]) |
|
|
|
|
|
|
|
|
if app_mode == "GitHub Repository Analysis": |
|
|
st.header("GitHub Repository Analysis") |
|
|
repo_owner = st.text_input("Enter GitHub Repository Owner", "huggingface") |
|
|
repo_name = st.text_input("Enter GitHub Repository Name", "transformers") |
|
|
|
|
|
if st.button("Analyze Repository"): |
|
|
if repo_owner and repo_name: |
|
|
try: |
|
|
response = requests.get(f"https://api.github.com/repos/{repo_owner}/{repo_name}") |
|
|
data = response.json() |
|
|
|
|
|
if response.status_code == 200: |
|
|
st.subheader("Repository Details") |
|
|
st.write(f"**Name**: {data['name']}") |
|
|
st.write(f"**Owner**: {data['owner']['login']}") |
|
|
st.write(f"**Stars**: {data['stargazers_count']}") |
|
|
st.write(f"**Forks**: {data['forks_count']}") |
|
|
st.write(f"**Language**: {data['language']}") |
|
|
st.write(f"**Description**: {data['description']}") |
|
|
else: |
|
|
st.error(f"Error: {data.get('message', 'Something went wrong with the request')}") |
|
|
except Exception as e: |
|
|
st.error(f"Error occurred: {e}") |
|
|
else: |
|
|
st.warning("Please enter both repository owner and name.") |
|
|
|
|
|
|
|
|
elif app_mode == "URL Title Fetcher": |
|
|
st.header("URL Title Fetcher") |
|
|
url = st.text_input("Enter URL", "https://www.huggingface.co") |
|
|
|
|
|
if st.button("Fetch Title"): |
|
|
if url: |
|
|
try: |
|
|
response = requests.get(url) |
|
|
if response.status_code == 200: |
|
|
|
|
|
match = re.search('<title>(.*?)</title>', response.text) |
|
|
if match: |
|
|
title = match.group(1) |
|
|
st.write(f"**Page Title**: {title}") |
|
|
else: |
|
|
st.warning("Title tag not found in the page") |
|
|
else: |
|
|
st.error(f"Failed to retrieve the page. Status code: {response.status_code}") |
|
|
except Exception as e: |
|
|
st.error(f"Error occurred: {e}") |
|
|
else: |
|
|
st.warning("Please enter a valid URL.") |
|
|
|
|
|
|
|
|
elif app_mode == "Dataset Upload & Fine-Tuning": |
|
|
st.header("Dataset Upload & Fine-Tuning") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv") |
|
|
|
|
|
if uploaded_file is not None: |
|
|
|
|
|
df = pd.read_csv(uploaded_file) |
|
|
|
|
|
|
|
|
st.subheader("Dataset Preview") |
|
|
st.write(df.head()) |
|
|
|
|
|
|
|
|
dataset = Dataset.from_pandas(df) |
|
|
|
|
|
model_name = st.selectbox("Select model for fine-tuning", ["distilbert-base-uncased"]) |
|
|
|
|
|
if st.button("Fine-tune Model"): |
|
|
if model_name: |
|
|
try: |
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
|
|
|
def preprocess_function(examples): |
|
|
return tokenizer(examples['text'], truncation=True, padding=True) |
|
|
|
|
|
tokenized_datasets = dataset.map(preprocess_function, batched=True) |
|
|
|
|
|
|
|
|
from transformers import Trainer, TrainingArguments |
|
|
|
|
|
training_args = TrainingArguments( |
|
|
output_dir="./results", |
|
|
evaluation_strategy="epoch", |
|
|
learning_rate=2e-5, |
|
|
per_device_train_batch_size=16, |
|
|
per_device_eval_batch_size=16, |
|
|
num_train_epochs=3, |
|
|
weight_decay=0.01, |
|
|
) |
|
|
|
|
|
trainer = Trainer( |
|
|
model=model, |
|
|
args=training_args, |
|
|
train_dataset=tokenized_datasets, |
|
|
eval_dataset=tokenized_datasets, |
|
|
) |
|
|
|
|
|
|
|
|
trainer.train() |
|
|
|
|
|
st.success("Fine-tuning completed successfully!") |
|
|
except Exception as e: |
|
|
st.error(f"Error during fine-tuning: {e}") |
|
|
else: |
|
|
st.warning("Please select a model for fine-tuning.") |
|
|
|
|
|
else: |
|
|
st.warning("Please upload a dataset.") |
|
|
|