Divyansh12 commited on
Commit
46650c7
·
verified ·
1 Parent(s): 0a1692e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import nest_asyncio
3
+ import json
4
+ import streamlit as st
5
+ from sentence_transformers import SentenceTransformer
6
+ from scrapegraphai.graphs import SmartScraperMultiGraph
7
+ from langchain_groq import ChatGroq
8
+
9
+ # Apply nest_asyncio to allow nested event loops
10
+ nest_asyncio.apply()
11
+
12
+ # Load the sentence transformer model
13
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
14
+
15
+ # Streamlit Application
16
+ st.title("Course Scraper from Analytics Vidhya")
17
+
18
+ # API Key Input
19
+ api_key = st.text_input("Enter your API Key:", type="password")
20
+
21
+ # Scraping Configuration
22
+ graph_config = {
23
+ "llm": {
24
+ "model": "groq/llama3-70b-8192",
25
+ "api_key": api_key,
26
+ "temperature": 1
27
+ },
28
+ "embeddings": {
29
+ "model_instance": model
30
+ },
31
+ 'verbose': True,
32
+ "headless": True # Set to True to run in headless mode
33
+ }
34
+
35
+ # Generate URLs for the first 4 pages (1 to 4)
36
+ base_url = "https://courses.analyticsvidhya.com/collections"
37
+ urls = [f"{base_url}?page={i}" for i in range(1, 5)] # Adjusting to scrape only the first 4 pages
38
+
39
+ # Run the scraper when the button is clicked
40
+ if st.button("Scrape Courses"):
41
+ try:
42
+ # Create the SmartScraperGraph instance
43
+ smart_scraper_graph = SmartScraperMultiGraph(
44
+ prompt="tell me about all the courses",
45
+ source=urls,
46
+ config=graph_config
47
+ )
48
+
49
+ # Run the scraper
50
+ result = smart_scraper_graph.run()
51
+
52
+ # Save the result as a JSON file
53
+ with open("courses.json", "w") as outfile:
54
+ json.dump(result, outfile, indent=4)
55
+
56
+ # Display the results in Streamlit
57
+ st.success("Scraping completed successfully!")
58
+ st.json(result) # Display the result as a JSON object
59
+
60
+ except Exception as e:
61
+ st.error(f"An error occurred: {e}")