Divyansh12 commited on
Commit
e340556
·
verified ·
1 Parent(s): beec7dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -3
app.py CHANGED
@@ -5,10 +5,11 @@ import streamlit as st
5
  from sentence_transformers import SentenceTransformer
6
  from scrapegraphai.graphs import SmartScraperMultiGraph
7
  from langchain_groq import ChatGroq
8
-
9
  import subprocess
10
 
 
11
  subprocess.run(["playwright", "install"])
 
12
  # Apply nest_asyncio to allow nested event loops
13
  nest_asyncio.apply()
14
 
@@ -42,6 +43,25 @@ graph_config = {
42
  base_url = "https://courses.analyticsvidhya.com/collections"
43
  urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # Run the scraper when the button is clicked
46
  if st.button("Scrape Courses"):
47
  try:
@@ -54,14 +74,20 @@ if st.button("Scrape Courses"):
54
 
55
  # Run the scraper
56
  result = smart_scraper_graph.run()
57
-
58
  # Save the result as a JSON file
59
  with open("courses.json", "w") as outfile:
60
  json.dump(result, outfile, indent=4)
61
 
 
 
 
 
 
 
62
  # Display the results in Streamlit
63
  st.success("Scraping completed successfully!")
64
- st.json(result) # Display the result as a JSON object
65
 
66
  except Exception as e:
67
  st.error(f"An error occurred: {e}")
 
5
  from sentence_transformers import SentenceTransformer
6
  from scrapegraphai.graphs import SmartScraperMultiGraph
7
  from langchain_groq import ChatGroq
 
8
  import subprocess
9
 
10
+ # Install Playwright if not already installed
11
  subprocess.run(["playwright", "install"])
12
+
13
  # Apply nest_asyncio to allow nested event loops
14
  nest_asyncio.apply()
15
 
 
43
  base_url = "https://courses.analyticsvidhya.com/collections"
44
  urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
45
 
46
+ def format_courses(courses):
47
+ """Format the scraped course data into a human-readable format."""
48
+ formatted_output = []
49
+
50
+ # Check if courses is a list of dictionaries
51
+ if isinstance(courses, list) and all(isinstance(course, dict) for course in courses):
52
+ for course in courses:
53
+ title = course.get('title', 'No Title Provided')
54
+ description = course.get('description', 'No Description Provided')
55
+ link = course.get('link', 'No Link Provided')
56
+ formatted_output.append(f"**Title:** {title}\n**Description:** {description}\n**Link:** [View Course]({link})\n")
57
+ elif isinstance(courses, list):
58
+ # If courses are simply strings, format them directly
59
+ return "\n".join(courses)
60
+ else:
61
+ return "No courses found."
62
+
63
+ return "\n".join(formatted_output)
64
+
65
  # Run the scraper when the button is clicked
66
  if st.button("Scrape Courses"):
67
  try:
 
74
 
75
  # Run the scraper
76
  result = smart_scraper_graph.run()
77
+
78
  # Save the result as a JSON file
79
  with open("courses.json", "w") as outfile:
80
  json.dump(result, outfile, indent=4)
81
 
82
+ # Print the raw result to understand its structure
83
+ st.write("Raw Result:", result)
84
+
85
+ # Format the result for display
86
+ human_readable_output = format_courses(result)
87
+
88
  # Display the results in Streamlit
89
  st.success("Scraping completed successfully!")
90
+ st.markdown(human_readable_output) # Display formatted output in markdown
91
 
92
  except Exception as e:
93
  st.error(f"An error occurred: {e}")