Spaces:

Divyansh12
/

analytics-vidhya-search

Sleeping

App Files Files Community

Divyansh12 commited on Oct 6, 2024

Commit

6ceca00

verified ·

1 Parent(s): e340556

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -29

app.py CHANGED Viewed

@@ -5,11 +5,10 @@ import streamlit as st
 from sentence_transformers import SentenceTransformer
 from scrapegraphai.graphs import SmartScraperMultiGraph
 from langchain_groq import ChatGroq
 import subprocess
-# Install Playwright if not already installed
 subprocess.run(["playwright", "install"])
 # Apply nest_asyncio to allow nested event loops
 nest_asyncio.apply()
@@ -43,25 +42,6 @@ graph_config = {
 base_url = "https://courses.analyticsvidhya.com/collections"
 urls = [f"{base_url}?page={i}" for i in range(1, 5)]  # Adjusting to scrape only the first 4 pages
-def format_courses(courses):
-    """Format the scraped course data into a human-readable format."""
-    formatted_output = []
-    # Check if courses is a list of dictionaries
-    if isinstance(courses, list) and all(isinstance(course, dict) for course in courses):
-        for course in courses:
-            title = course.get('title', 'No Title Provided')
-            description = course.get('description', 'No Description Provided')
-            link = course.get('link', 'No Link Provided')
-            formatted_output.append(f"**Title:** {title}\n**Description:** {description}\n**Link:** [View Course]({link})\n")
-    elif isinstance(courses, list):
-        # If courses are simply strings, format them directly
-        return "\n".join(courses)
-    else:
-        return "No courses found."
-    return "\n".join(formatted_output)
 # Run the scraper when the button is clicked
 if st.button("Scrape Courses"):
     try:
@@ -74,20 +54,14 @@ if st.button("Scrape Courses"):
         # Run the scraper
         result = smart_scraper_graph.run()
         # Save the result as a JSON file
         with open("courses.json", "w") as outfile:
             json.dump(result, outfile, indent=4)
-        # Print the raw result to understand its structure
-        st.write("Raw Result:", result)
-        # Format the result for display
-        human_readable_output = format_courses(result)
         # Display the results in Streamlit
         st.success("Scraping completed successfully!")
-        st.markdown(human_readable_output)  # Display formatted output in markdown
     except Exception as e:
         st.error(f"An error occurred: {e}")

 from sentence_transformers import SentenceTransformer
 from scrapegraphai.graphs import SmartScraperMultiGraph
 from langchain_groq import ChatGroq
 import subprocess
 subprocess.run(["playwright", "install"])
 # Apply nest_asyncio to allow nested event loops
 nest_asyncio.apply()
 base_url = "https://courses.analyticsvidhya.com/collections"
 urls = [f"{base_url}?page={i}" for i in range(1, 5)]  # Adjusting to scrape only the first 4 pages
 # Run the scraper when the button is clicked
 if st.button("Scrape Courses"):
     try:
         # Run the scraper
         result = smart_scraper_graph.run()
         # Save the result as a JSON file
         with open("courses.json", "w") as outfile:
             json.dump(result, outfile, indent=4)
         # Display the results in Streamlit
         st.success("Scraping completed successfully!")
+        st.json(result)  # Display the result as a JSON object
     except Exception as e:
         st.error(f"An error occurred: {e}")