Spaces:

Divyansh12
/

analytics-vidhya-search

Sleeping

App Files Files Community

Divyansh12 commited on Oct 6, 2024

Commit

e340556

verified ·

1 Parent(s): beec7dd

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -3

app.py CHANGED Viewed

@@ -5,10 +5,11 @@ import streamlit as st
 from sentence_transformers import SentenceTransformer
 from scrapegraphai.graphs import SmartScraperMultiGraph
 from langchain_groq import ChatGroq
 import subprocess
 subprocess.run(["playwright", "install"])
 # Apply nest_asyncio to allow nested event loops
 nest_asyncio.apply()
@@ -42,6 +43,25 @@ graph_config = {
 base_url = "https://courses.analyticsvidhya.com/collections"
 urls = [f"{base_url}?page={i}" for i in range(1, 5)]  # Adjusting to scrape only the first 4 pages
 # Run the scraper when the button is clicked
 if st.button("Scrape Courses"):
     try:
@@ -54,14 +74,20 @@ if st.button("Scrape Courses"):
         # Run the scraper
         result = smart_scraper_graph.run()
         # Save the result as a JSON file
         with open("courses.json", "w") as outfile:
             json.dump(result, outfile, indent=4)
         # Display the results in Streamlit
         st.success("Scraping completed successfully!")
-        st.json(result)  # Display the result as a JSON object
     except Exception as e:
         st.error(f"An error occurred: {e}")

 from sentence_transformers import SentenceTransformer
 from scrapegraphai.graphs import SmartScraperMultiGraph
 from langchain_groq import ChatGroq
 import subprocess
+# Install Playwright if not already installed
 subprocess.run(["playwright", "install"])
 # Apply nest_asyncio to allow nested event loops
 nest_asyncio.apply()
 base_url = "https://courses.analyticsvidhya.com/collections"
 urls = [f"{base_url}?page={i}" for i in range(1, 5)]  # Adjusting to scrape only the first 4 pages
+def format_courses(courses):
+    """Format the scraped course data into a human-readable format."""
+    formatted_output = []
+    # Check if courses is a list of dictionaries
+    if isinstance(courses, list) and all(isinstance(course, dict) for course in courses):
+        for course in courses:
+            title = course.get('title', 'No Title Provided')
+            description = course.get('description', 'No Description Provided')
+            link = course.get('link', 'No Link Provided')
+            formatted_output.append(f"**Title:** {title}\n**Description:** {description}\n**Link:** [View Course]({link})\n")
+    elif isinstance(courses, list):
+        # If courses are simply strings, format them directly
+        return "\n".join(courses)
+    else:
+        return "No courses found."
+    return "\n".join(formatted_output)
 # Run the scraper when the button is clicked
 if st.button("Scrape Courses"):
     try:
         # Run the scraper
         result = smart_scraper_graph.run()
         # Save the result as a JSON file
         with open("courses.json", "w") as outfile:
             json.dump(result, outfile, indent=4)
+        # Print the raw result to understand its structure
+        st.write("Raw Result:", result)
+        # Format the result for display
+        human_readable_output = format_courses(result)
         # Display the results in Streamlit
         st.success("Scraping completed successfully!")
+        st.markdown(human_readable_output)  # Display formatted output in markdown
     except Exception as e:
         st.error(f"An error occurred: {e}")