|
|
from __future__ import annotations |
|
|
import logging, re |
|
|
from config import CFG, _SESS |
|
|
from web_helpers import retry, fetch_blocked_site |
|
|
|
|
|
try: |
|
|
import yt_dlp |
|
|
_HAS = True |
|
|
except ImportError: |
|
|
_HAS = False |
|
|
|
|
|
_LANGS = ["en", "en-US"] |
|
|
|
|
|
@retry |
|
|
def fetch_youtube(url: str) -> str: |
|
|
if not _HAS: |
|
|
return fetch_blocked_site(url)[:CFG.text_cap] |
|
|
|
|
|
try: |
|
|
ydl_opts = {"quiet": True, "no_warnings": True, |
|
|
"writesubtitles": True, "writeautomaticsub": True, |
|
|
"skip_download": True} |
|
|
with yt_dlp.YoutubeDL(ydl_opts) as y: |
|
|
info = y.extract_info(url, download=False) |
|
|
|
|
|
subs = info.get("subtitles", {}) or {} |
|
|
auto = info.get("automatic_captions", {}) or {} |
|
|
tracks = next((subs.get(l) or auto.get(l) for l in _LANGS |
|
|
if subs.get(l) or auto.get(l)), None) |
|
|
if not tracks: |
|
|
tracks = next(iter(subs.values()), []) or next(iter(auto.values()), []) |
|
|
|
|
|
if tracks: |
|
|
cap_url = tracks[0]["url"] |
|
|
if "fmt=" not in cap_url: cap_url += "&fmt=json3" |
|
|
r = _SESS.get(cap_url, timeout=(CFG.connect_to, CFG.read_to)) |
|
|
r.raise_for_status() |
|
|
if cap_url.endswith(".vtt"): |
|
|
text = " ".join(l for l in r.text.splitlines() |
|
|
if l and "-->" not in l and not re.match(r"\d{2}:\d{2}", l)) |
|
|
else: |
|
|
text = " ".join(seg["utf8"] for ev in r.json()["events"] |
|
|
for seg in ev.get("segs", [])) |
|
|
if text: return text[:CFG.text_cap] |
|
|
|
|
|
meta = (info.get("title","") + "\n\n" + info.get("description","")).strip() |
|
|
return "[Retrieved from yt-dlp] " + meta[:CFG.text_cap] |
|
|
except Exception as e: |
|
|
logging.error("YouTube fetch failed %s: %s", url, e) |
|
|
return fetch_blocked_site(url)[:CFG.text_cap] |
|
|
|