Divyansh12 commited on
Commit
343b21c
·
verified ·
1 Parent(s): e8b3820

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -40
app.py CHANGED
@@ -5,17 +5,10 @@ import streamlit as st
5
  from sentence_transformers import SentenceTransformer
6
  from scrapegraphai.graphs import SmartScraperMultiGraph
7
  from langchain_groq import ChatGroq
8
- import os
9
- from dotenv import load_dotenv
10
- import subprocess
11
-
12
- # Load environment variables
13
- load_dotenv()
14
- api_key = os.getenv("GROQ_API_KEY")
15
 
16
- # Install playwright
17
- subprocess.run(["playwright", "install"], check=True)
18
 
 
19
  # Apply nest_asyncio to allow nested event loops
20
  nest_asyncio.apply()
21
 
@@ -26,9 +19,7 @@ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
26
  st.title("Course Scraper from Analytics Vidhya")
27
 
28
  # API Key Input
29
- if not api_key:
30
- st.warning("GROQ_API_KEY not found. Please set it in your environment or enter it below.")
31
- api_key = st.text_input("Enter your API Key:", type="password")
32
 
33
  # Prompt Input
34
  user_prompt = st.text_input("Enter your prompt for scraping:")
@@ -51,41 +42,27 @@ graph_config = {
51
  base_url = "https://courses.analyticsvidhya.com/collections"
52
  urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
53
 
54
- # Define the async scraping function
55
- async def scrape_courses():
56
  try:
57
- # Create the SmartScraperMultiGraph instance
58
  smart_scraper_graph = SmartScraperMultiGraph(
59
  prompt=user_prompt, # Use the user-defined prompt
60
  source=urls,
61
  config=graph_config
62
  )
63
- # Run the scraper asynchronously
64
- result = await smart_scraper_graph.run()
65
- return result
66
- except Exception as e:
67
- st.error(f"An error occurred during scraping: {e}")
68
- return None
69
 
70
- # Run the scraper when the button is clicked
71
- if st.button("Scrape Courses"):
72
- if not user_prompt:
73
- st.error("Please enter a prompt for scraping.")
74
- elif not api_key:
75
- st.error("Please enter a valid API key.")
76
- else:
77
- with st.spinner("Scraping in progress..."):
78
- # Use asyncio to run the scraper
79
- loop = asyncio.new_event_loop()
80
- asyncio.set_event_loop(loop)
81
- result = loop.run_until_complete(scrape_courses())
82
 
83
- if result:
84
- # Save the result as a JSON file
85
- with open("courses.json", "w") as outfile:
86
- json.dump(result, outfile, indent=4)
87
 
88
- # Display the results in Streamlit
89
- st.success("Scraping completed successfully!")
90
- st.json(result) # Display the result as a JSON object
91
 
 
5
  from sentence_transformers import SentenceTransformer
6
  from scrapegraphai.graphs import SmartScraperMultiGraph
7
  from langchain_groq import ChatGroq
 
 
 
 
 
 
 
8
 
9
+ import subprocess
 
10
 
11
+ subprocess.run(["playwright", "install"])
12
  # Apply nest_asyncio to allow nested event loops
13
  nest_asyncio.apply()
14
 
 
19
  st.title("Course Scraper from Analytics Vidhya")
20
 
21
  # API Key Input
22
+ api_key = st.text_input("Enter your API Key:", type="password")
 
 
23
 
24
  # Prompt Input
25
  user_prompt = st.text_input("Enter your prompt for scraping:")
 
42
  base_url = "https://courses.analyticsvidhya.com/collections"
43
  urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
44
 
45
+ # Run the scraper when the button is clicked
46
+ if st.button("Scrape Courses"):
47
  try:
48
+ # Create the SmartScraperGraph instance
49
  smart_scraper_graph = SmartScraperMultiGraph(
50
  prompt=user_prompt, # Use the user-defined prompt
51
  source=urls,
52
  config=graph_config
53
  )
 
 
 
 
 
 
54
 
55
+ # Run the scraper
56
+ result = smart_scraper_graph.run()
57
+
58
+ # Save the result as a JSON file
59
+ with open("courses.json", "w") as outfile:
60
+ json.dump(result, outfile, indent=4)
 
 
 
 
 
 
61
 
62
+ # Display the results in Streamlit
63
+ st.success("Scraping completed successfully!")
64
+ st.json(result) # Display the result as a JSON object
 
65
 
66
+ except Exception as e:
67
+ st.error(f"An error occurred: {e}")
 
68