File size: 1,889 Bytes
5ab87e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from __future__ import annotations
import logging, re
from config import CFG, _SESS
from web_helpers import retry, fetch_blocked_site

try:
    import yt_dlp
    _HAS = True
except ImportError:
    _HAS = False

_LANGS = ["en", "en-US"]

@retry
def fetch_youtube(url: str) -> str:
    if not _HAS:
        return fetch_blocked_site(url)[:CFG.text_cap]

    try:
        ydl_opts = {"quiet": True, "no_warnings": True,
                    "writesubtitles": True, "writeautomaticsub": True,
                    "skip_download": True}
        with yt_dlp.YoutubeDL(ydl_opts) as y:
            info = y.extract_info(url, download=False)

        subs = info.get("subtitles", {}) or {}
        auto = info.get("automatic_captions", {}) or {}
        tracks = next((subs.get(l) or auto.get(l) for l in _LANGS
                       if subs.get(l) or auto.get(l)), None)
        if not tracks:
            tracks = next(iter(subs.values()), []) or next(iter(auto.values()), [])

        if tracks:
            cap_url = tracks[0]["url"]
            if "fmt=" not in cap_url: cap_url += "&fmt=json3"
            r = _SESS.get(cap_url, timeout=(CFG.connect_to, CFG.read_to))
            r.raise_for_status()
            if cap_url.endswith(".vtt"):
                text = " ".join(l for l in r.text.splitlines()
                                if l and "-->" not in l and not re.match(r"\d{2}:\d{2}", l))
            else:
                text = " ".join(seg["utf8"] for ev in r.json()["events"]
                                for seg in ev.get("segs", []))
            if text: return text[:CFG.text_cap]

        meta = (info.get("title","") + "\n\n" + info.get("description","")).strip()
        return "[Retrieved from yt-dlp] " + meta[:CFG.text_cap]
    except Exception as e:
        logging.error("YouTube fetch failed %s: %s", url, e)
        return fetch_blocked_site(url)[:CFG.text_cap]