Spaces:

TanishqO0F
/

Test

Sleeping

App Files Files Community

TanishqO0F commited on Jun 24, 2024

Commit

a285409

verified ·

1 Parent(s): 6c3cf43

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -100

app.py CHANGED Viewed

@@ -1,119 +1,75 @@
 import gradio as gr
 import requests
 from bs4 import BeautifulSoup
 import pandas as pd
 from transformers import pipeline
-import yfinance as yf
-import plotly.graph_objects as go
-from datetime import datetime, timedelta
-# Sentiment Analysis Model
-sentiment_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
-# Function to encode special characters in the search query
-def encode_special_characters(text):
-    encoded_text = ''
-    special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'}
-    for char in text.lower():
-        encoded_text += special_characters.get(char, char)
-    return encoded_text
-# Function to fetch news articles
-def fetch_news(query, num_articles=10):
-    encoded_query = encode_special_characters(query)
-    url = f"https://news.google.com/search?q={encoded_query}&hl=en-US&gl=in&ceid=US%3Aen&num={num_articles}"
-    try:
-        response = requests.get(url, verify=False)
-        response.raise_for_status()
-    except requests.RequestException as e:
-        print(f"Error fetching news: {e}")
-        return pd.DataFrame()
     soup = BeautifulSoup(response.text, 'html.parser')
     articles = soup.find_all('article')
-    news_data = []
-    for article in articles[:num_articles]:
-        link = article.find('a')['href'].replace("./articles/", "https://news.google.com/articles/")
-        text_parts = article.get_text(separator='\n').split('\n')
-        news_data.append({
-            'Title': text_parts[2] if len(text_parts) > 2 else 'Missing',
-            'Source': text_parts[0] if len(text_parts) > 0 else 'Missing',
-            'Time': text_parts[3] if len(text_parts) > 3 else 'Missing',
-            'Author': text_parts[4].split('By ')[-1] if len(text_parts) > 4 else 'Missing',
-            'Link': link
-        })
-    return pd.DataFrame(news_data)
-# Function to perform sentiment analysis
-def analyze_sentiment(text):
-    result = sentiment_model(text)[0]
-    return result['label'], result['score']
-# Function to fetch stock data
-def fetch_stock_data(symbol, start_date, end_date):
-    stock = yf.Ticker(symbol)
-    data = stock.history(start=start_date, end=end_date)
-    return data
-# Main function to process news and perform analysis
-def news_and_analysis(query, stock_symbol):
-    # Fetch news
-    news_df = fetch_news(query)
-    if news_df.empty:
-        return "No news articles found.", None, None
-    # Perform sentiment analysis
-    news_df['Sentiment'], news_df['Sentiment_Score'] = zip(*news_df['Title'].apply(analyze_sentiment))
-    # Fetch stock data (last 30 days)
-    end_date = datetime.now()
-    start_date = end_date - timedelta(days=30)
-    stock_data = fetch_stock_data(stock_symbol, start_date, end_date)
-    # Create sentiment plot
-    sentiment_fig = go.Figure(data=[go.Bar(
-        x=news_df['Time'],
-        y=news_df['Sentiment_Score'],
-        marker_color=news_df['Sentiment'].map({'positive': 'green', 'neutral': 'gray', 'negative': 'red'})
-    )])
-    sentiment_fig.update_layout(title='News Sentiment Over Time', xaxis_title='Time', yaxis_title='Sentiment Score')
-    # Create stock price plot
-    stock_fig = go.Figure(data=[go.Candlestick(
-        x=stock_data.index,
-        open=stock_data['Open'],
-        high=stock_data['High'],
-        low=stock_data['Low'],
-        close=stock_data['Close']
-    )])
-    stock_fig.update_layout(title=f'{stock_symbol} Stock Price', xaxis_title='Date', yaxis_title='Price')
-    return news_df, sentiment_fig, stock_fig
-# Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Financial News Sentiment Analysis and Market Impact")
-    with gr.Row():
-        topic = gr.Textbox(label="Enter a financial topic or company name")
-        stock_symbol = gr.Textbox(label="Enter the stock symbol (e.g., RELIANCE.NS for Reliance Industries)")
-    analyze_btn = gr.Button(value="Analyze")
-    news_output = gr.DataFrame(label="News and Sentiment Analysis")
-    sentiment_plot = gr.Plot(label="Sentiment Analysis")
-    stock_plot = gr.Plot(label="Stock Price Movement")
-    analyze_btn.click(
-        news_and_analysis,
-        inputs=[topic, stock_symbol],
-        outputs=[news_output, sentiment_plot, stock_plot]
-    )
 demo.launch()

 import gradio as gr
+import selenium
 import requests
 from bs4 import BeautifulSoup
 import pandas as pd
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+import pandas as pd
+import time
 from transformers import pipeline
+# Search Query
+def news_and_analysis(query):
+# Encode special characters in a text string
+    def encode_special_characters(text):
+        encoded_text = ''
+        special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'}  # Add more special characters as needed
+        for char in text.lower():
+            encoded_text += special_characters.get(char, char)
+        return encoded_text
+    query2 = encode_special_characters(query)
+    url = f"https://news.google.com/search?q={query2}&hl=en-US&gl=in&ceid=US%3Aen&num=3"
+    response = requests.get(url, verify = False)
     soup = BeautifulSoup(response.text, 'html.parser')
     articles = soup.find_all('article')
+    links = [article.find('a')['href'] for article in articles]
+    links = [link.replace("./articles/", "https://news.google.com/articles/") for link in links]
+    news_text = [article.get_text(separator='\n') for article in articles]
+    news_text_split = [text.split('\n') for text in news_text]
+    news_df = pd.DataFrame({
+        'Title': [text[2] for text in news_text_split],
+        'Source': [text[0] for text in news_text_split],
+        'Time': [text[3] if len(text) > 3 else 'Missing' for text in news_text_split],
+        'Author': [text[4].split('By ')[-1] if len(text) > 4 else 'Missing' for text in news_text_split],
+        'Link': links
+    })
+    news_df = news_df.loc[0:5,:]
+    options = webdriver.ChromeOptions()
+    options.add_argument('--headless')
+    options.add_argument('--no-sandbox')
+    options.add_argument('--disable-dev-shm-usage')
+    options.use_chromium = True
+    driver = webdriver.Chrome(options = options)
+    classification= pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")
+    news_df['Sentiment'] = ''
+    for i in range(0, len(news_df)):
+        # driver.get(news_df.loc[i,'Link'])
+        # time.sleep(10)
+        # headline = driver.find_element('xpath', '//h1').text
+        #news_df.loc[i, 'Headline'] = headline
+        title = news_df.loc[i, 'Title']
+        news_df.loc[i, 'Sentiment'] = str(classification(title))
+        print(news_df)
+    return(news_df)
 with gr.Blocks() as demo:
+    topic= gr.Textbox(label="Topic for which you want Google news and sentiment analysis")
+    btn = gr.Button(value="Submit")
+    btn.click(news_and_analysis, inputs=topic, outputs= gr.Dataframe())
 demo.launch()