Divyansh12 commited on
Commit
6ceca00
·
verified ·
1 Parent(s): e340556

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -29
app.py CHANGED
@@ -5,11 +5,10 @@ import streamlit as st
5
  from sentence_transformers import SentenceTransformer
6
  from scrapegraphai.graphs import SmartScraperMultiGraph
7
  from langchain_groq import ChatGroq
 
8
  import subprocess
9
 
10
- # Install Playwright if not already installed
11
  subprocess.run(["playwright", "install"])
12
-
13
  # Apply nest_asyncio to allow nested event loops
14
  nest_asyncio.apply()
15
 
@@ -43,25 +42,6 @@ graph_config = {
43
  base_url = "https://courses.analyticsvidhya.com/collections"
44
  urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
45
 
46
- def format_courses(courses):
47
- """Format the scraped course data into a human-readable format."""
48
- formatted_output = []
49
-
50
- # Check if courses is a list of dictionaries
51
- if isinstance(courses, list) and all(isinstance(course, dict) for course in courses):
52
- for course in courses:
53
- title = course.get('title', 'No Title Provided')
54
- description = course.get('description', 'No Description Provided')
55
- link = course.get('link', 'No Link Provided')
56
- formatted_output.append(f"**Title:** {title}\n**Description:** {description}\n**Link:** [View Course]({link})\n")
57
- elif isinstance(courses, list):
58
- # If courses are simply strings, format them directly
59
- return "\n".join(courses)
60
- else:
61
- return "No courses found."
62
-
63
- return "\n".join(formatted_output)
64
-
65
  # Run the scraper when the button is clicked
66
  if st.button("Scrape Courses"):
67
  try:
@@ -74,20 +54,14 @@ if st.button("Scrape Courses"):
74
 
75
  # Run the scraper
76
  result = smart_scraper_graph.run()
77
-
78
  # Save the result as a JSON file
79
  with open("courses.json", "w") as outfile:
80
  json.dump(result, outfile, indent=4)
81
 
82
- # Print the raw result to understand its structure
83
- st.write("Raw Result:", result)
84
-
85
- # Format the result for display
86
- human_readable_output = format_courses(result)
87
-
88
  # Display the results in Streamlit
89
  st.success("Scraping completed successfully!")
90
- st.markdown(human_readable_output) # Display formatted output in markdown
91
 
92
  except Exception as e:
93
  st.error(f"An error occurred: {e}")
 
5
  from sentence_transformers import SentenceTransformer
6
  from scrapegraphai.graphs import SmartScraperMultiGraph
7
  from langchain_groq import ChatGroq
8
+
9
  import subprocess
10
 
 
11
  subprocess.run(["playwright", "install"])
 
12
  # Apply nest_asyncio to allow nested event loops
13
  nest_asyncio.apply()
14
 
 
42
  base_url = "https://courses.analyticsvidhya.com/collections"
43
  urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # Run the scraper when the button is clicked
46
  if st.button("Scrape Courses"):
47
  try:
 
54
 
55
  # Run the scraper
56
  result = smart_scraper_graph.run()
57
+
58
  # Save the result as a JSON file
59
  with open("courses.json", "w") as outfile:
60
  json.dump(result, outfile, indent=4)
61
 
 
 
 
 
 
 
62
  # Display the results in Streamlit
63
  st.success("Scraping completed successfully!")
64
+ st.json(result) # Display the result as a JSON object
65
 
66
  except Exception as e:
67
  st.error(f"An error occurred: {e}")