Spaces:

Jofthomas
/

reddit_MCP

Sleeping

App Files Files Community

Jofthomas commited on Sep 9

Commit

ab03104

1 Parent(s): a6c6ed7

change

Browse files

Files changed (1) hide show

app.py +56 -34

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from __future__ import annotations
 import os
 from typing import List, Optional, Literal
 import httpx
@@ -58,38 +60,53 @@ def _fetch_subreddit_new(subreddit: str, limit: int) -> list[dict]:
             children = payload.get("data", {}).get("children", [])
             print(f"Reddit fetch source: JSON API ({len(children)} items)")
             return [child.get("data", {}) for child in children]
-    except Exception:
-        # RSS fallback
-        feed_url = f"https://www.reddit.com/r/{subreddit}/new/.rss"
-        feed = feedparser.parse(feed_url)
-        posts: list[dict] = []
-        for entry in feed.entries[:limit]:
-            # Attempt to extract id and score if present (RSS is limited)
-            link = entry.get("link") or ""
-            title = entry.get("title") or ""
-            # created: use published_parsed if available
-            created_utc = 0.0
-            if getattr(entry, "published_parsed", None):
-                try:
-                    import calendar
-                    created_utc = float(calendar.timegm(entry.published_parsed))
-                except Exception:
-                    created_utc = 0.0
-            posts.append(
-                {
-                    "title": title,
-                    "selftext": "",
-                    "score": 0,
-                    "created_utc": created_utc,
-                    "id": entry.get("id") or "",
-                    "permalink": "",
-                    "url": link,
-                    "link_flair_text": None,
-                }
-            )
-        print(f"Reddit fetch source: RSS fallback ({len(posts)} items)")
-        return posts
 def _get_mistral_client() -> Mistral:
@@ -171,9 +188,12 @@ def scan_mistralai_pain_points(limit: int = 50, min_score: int = 0) -> List[Pain
     for post in raw_posts:
         title = post.get("title", "").strip()
         selftext = post.get("selftext", "") or ""
-        score = int(post.get("score", 0) or 0)
-        if score < min_score:
             continue
         try:
@@ -184,6 +204,7 @@ def scan_mistralai_pain_points(limit: int = 50, min_score: int = 0) -> List[Pain
             continue
         if not should:
             continue
         try:
@@ -210,6 +231,7 @@ def scan_mistralai_pain_points(limit: int = 50, min_score: int = 0) -> List[Pain
                 flair=post.get("link_flair_text"),
             )
         )
     print(f"Extraction complete: {len(pain_points)} pain points")
     return pain_points

 from __future__ import annotations
 import os
+import re
+import html
 from typing import List, Optional, Literal
 import httpx
             children = payload.get("data", {}).get("children", [])
             print(f"Reddit fetch source: JSON API ({len(children)} items)")
             return [child.get("data", {}) for child in children]
+    except Exception as e:
+        print(f"Reddit JSON fetch failed: {e}; trying api.reddit.com")
+        try:
+            api_url = f"https://api.reddit.com/r/{subreddit}/new?limit={limit}"
+            with httpx.Client(timeout=httpx.Timeout(15.0), headers=headers) as client:
+                response = client.get(api_url, follow_redirects=True)
+                response.raise_for_status()
+                payload = response.json()
+                children = payload.get("data", {}).get("children", [])
+                print(f"Reddit fetch source: API domain ({len(children)} items)")
+                return [child.get("data", {}) for child in children]
+        except Exception as e2:
+            # RSS fallback
+            print(f"Reddit API fetch failed: {e2}; switching to RSS fallback")
+            feed_url = f"https://www.reddit.com/r/{subreddit}/new/.rss"
+            feed = feedparser.parse(feed_url)
+            posts: list[dict] = []
+            for entry in feed.entries[:limit]:
+                link = entry.get("link") or ""
+                title = entry.get("title") or ""
+                created_utc = 0.0
+                if getattr(entry, "published_parsed", None):
+                    try:
+                        import calendar
+                        created_utc = float(calendar.timegm(entry.published_parsed))
+                    except Exception:
+                        created_utc = 0.0
+                # Extract a crude text body from RSS summary/content for better AI signal
+                raw_summary = getattr(entry, "summary", "") or getattr(entry, "description", "") or ""
+                if raw_summary:
+                    text = html.unescape(re.sub(r"<[^>]+>", " ", raw_summary)).strip()
+                else:
+                    text = ""
+                posts.append(
+                    {
+                        "title": title,
+                        "selftext": text,
+                        "score": None,
+                        "created_utc": created_utc,
+                        "id": entry.get("id") or "",
+                        "permalink": "",
+                        "url": link,
+                        "link_flair_text": None,
+                    }
+                )
+            print(f"Reddit fetch source: RSS fallback ({len(posts)} items)")
+            return posts
 def _get_mistral_client() -> Mistral:
     for post in raw_posts:
         title = post.get("title", "").strip()
         selftext = post.get("selftext", "") or ""
+        raw_score = post.get("score")
+        score = int(raw_score) if raw_score is not None else 0
+        # Only filter by score when a real score is available
+        if raw_score is not None and score < min_score:
+            print(f"Skip by score: '{title[:80]}' score={score} < min_score={min_score}")
             continue
         try:
             continue
         if not should:
+            print(f"Classifier NO: '{title[:80]}'")
             continue
         try:
                 flair=post.get("link_flair_text"),
             )
         )
+        print(f"Added: '{ai_title[:80]}'")
     print(f"Extraction complete: {len(pain_points)} pain points")
     return pain_points