Divyansh12 commited on
Commit
e8b3820
·
verified ·
1 Parent(s): 3a8f778

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -18
app.py CHANGED
@@ -7,12 +7,15 @@ from scrapegraphai.graphs import SmartScraperMultiGraph
7
  from langchain_groq import ChatGroq
8
  import os
9
  from dotenv import load_dotenv
 
 
 
10
  load_dotenv()
11
- api_key=os.getenv("GROQ_API_KEY")
12
 
13
- import subprocess
 
14
 
15
- subprocess.run(["playwright", "install"])
16
  # Apply nest_asyncio to allow nested event loops
17
  nest_asyncio.apply()
18
 
@@ -23,7 +26,9 @@ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
23
  st.title("Course Scraper from Analytics Vidhya")
24
 
25
  # API Key Input
26
- #api_key = st.text_input("Enter your API Key:", type="password")
 
 
27
 
28
  # Prompt Input
29
  user_prompt = st.text_input("Enter your prompt for scraping:")
@@ -46,26 +51,41 @@ graph_config = {
46
  base_url = "https://courses.analyticsvidhya.com/collections"
47
  urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
48
 
49
- # Run the scraper when the button is clicked
50
- if st.button("Scrape Courses"):
51
  try:
52
- # Create the SmartScraperGraph instance
53
  smart_scraper_graph = SmartScraperMultiGraph(
54
  prompt=user_prompt, # Use the user-defined prompt
55
  source=urls,
56
  config=graph_config
57
  )
 
 
 
 
 
 
58
 
59
- # Run the scraper
60
- result = smart_scraper_graph.run()
61
-
62
- # Save the result as a JSON file
63
- with open("courses.json", "w") as outfile:
64
- json.dump(result, outfile, indent=4)
 
 
 
 
 
 
65
 
66
- # Display the results in Streamlit
67
- st.success("Scraping completed successfully!")
68
- st.json(result) # Display the result as a JSON object
 
 
 
 
 
69
 
70
- except Exception as e:
71
- st.error(f"An error occurred: {e}")
 
7
  from langchain_groq import ChatGroq
8
  import os
9
  from dotenv import load_dotenv
10
+ import subprocess
11
+
12
+ # Load environment variables
13
  load_dotenv()
14
+ api_key = os.getenv("GROQ_API_KEY")
15
 
16
+ # Install playwright
17
+ subprocess.run(["playwright", "install"], check=True)
18
 
 
19
  # Apply nest_asyncio to allow nested event loops
20
  nest_asyncio.apply()
21
 
 
26
  st.title("Course Scraper from Analytics Vidhya")
27
 
28
  # API Key Input
29
+ if not api_key:
30
+ st.warning("GROQ_API_KEY not found. Please set it in your environment or enter it below.")
31
+ api_key = st.text_input("Enter your API Key:", type="password")
32
 
33
  # Prompt Input
34
  user_prompt = st.text_input("Enter your prompt for scraping:")
 
51
  base_url = "https://courses.analyticsvidhya.com/collections"
52
  urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
53
 
54
+ # Define the async scraping function
55
+ async def scrape_courses():
56
  try:
57
+ # Create the SmartScraperMultiGraph instance
58
  smart_scraper_graph = SmartScraperMultiGraph(
59
  prompt=user_prompt, # Use the user-defined prompt
60
  source=urls,
61
  config=graph_config
62
  )
63
+ # Run the scraper asynchronously
64
+ result = await smart_scraper_graph.run()
65
+ return result
66
+ except Exception as e:
67
+ st.error(f"An error occurred during scraping: {e}")
68
+ return None
69
 
70
+ # Run the scraper when the button is clicked
71
+ if st.button("Scrape Courses"):
72
+ if not user_prompt:
73
+ st.error("Please enter a prompt for scraping.")
74
+ elif not api_key:
75
+ st.error("Please enter a valid API key.")
76
+ else:
77
+ with st.spinner("Scraping in progress..."):
78
+ # Use asyncio to run the scraper
79
+ loop = asyncio.new_event_loop()
80
+ asyncio.set_event_loop(loop)
81
+ result = loop.run_until_complete(scrape_courses())
82
 
83
+ if result:
84
+ # Save the result as a JSON file
85
+ with open("courses.json", "w") as outfile:
86
+ json.dump(result, outfile, indent=4)
87
+
88
+ # Display the results in Streamlit
89
+ st.success("Scraping completed successfully!")
90
+ st.json(result) # Display the result as a JSON object
91