Spaces:

Divyansh12
/

analytics-vidhya-search

Sleeping

App Files Files Community

Divyansh12 commited on Oct 6, 2024

Commit

46650c7

verified ·

1 Parent(s): 0a1692e

Create app.py

Browse files

Files changed (1) hide show

app.py +61 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import asyncio
+import nest_asyncio
+import json
+import streamlit as st
+from sentence_transformers import SentenceTransformer
+from scrapegraphai.graphs import SmartScraperMultiGraph
+from langchain_groq import ChatGroq
+# Apply nest_asyncio to allow nested event loops
+nest_asyncio.apply()
+# Load the sentence transformer model
+model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# Streamlit Application
+st.title("Course Scraper from Analytics Vidhya")
+# API Key Input
+api_key = st.text_input("Enter your API Key:", type="password")
+# Scraping Configuration
+graph_config = {
+    "llm": {
+        "model": "groq/llama3-70b-8192",
+        "api_key": api_key,
+        "temperature": 1
+    },
+    "embeddings": {
+        "model_instance": model
+    },
+    'verbose': True,
+    "headless": True  # Set to True to run in headless mode
+}
+# Generate URLs for the first 4 pages (1 to 4)
+base_url = "https://courses.analyticsvidhya.com/collections"
+urls = [f"{base_url}?page={i}" for i in range(1, 5)]  # Adjusting to scrape only the first 4 pages
+# Run the scraper when the button is clicked
+if st.button("Scrape Courses"):
+    try:
+        # Create the SmartScraperGraph instance
+        smart_scraper_graph = SmartScraperMultiGraph(
+            prompt="tell me about all the courses",
+            source=urls,
+            config=graph_config
+        )
+        # Run the scraper
+        result = smart_scraper_graph.run()
+        # Save the result as a JSON file
+        with open("courses.json", "w") as outfile:
+            json.dump(result, outfile, indent=4)
+        # Display the results in Streamlit
+        st.success("Scraping completed successfully!")
+        st.json(result)  # Display the result as a JSON object
+    except Exception as e:
+        st.error(f"An error occurred: {e}")