13ze commited on
Commit
ed9451a
verified
1 Parent(s): ca19b43

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from bs4 import BeautifulSoup
3
+ from markdownify import MarkdownConverter
4
+
5
+
6
+ def md(soup, **options):
7
+ return MarkdownConverter(**options).convert_soup(soup)
8
+
9
+
10
+ def main_fn(html: str, check: list[int]):
11
+ soup = BeautifulSoup(html, features="html.parser")
12
+
13
+ # Remove <script> e <style>
14
+ for tag in ["script", "style"]:
15
+ for t in soup.find_all(tag):
16
+ t.decompose()
17
+
18
+ body = soup.find("body")
19
+ main = soup.find("main")
20
+
21
+ strip_tags = [] # Futuramente pode-se usar `check` para filtrar tags
22
+
23
+ if main:
24
+ markdown = md(main, strip=strip_tags)
25
+ else:
26
+ markdown = md(body, strip=strip_tags)
27
+
28
+ title_tag = soup.find("title")
29
+ title = title_tag.get_text(strip=True) if title_tag else "Sem t铆tulo"
30
+
31
+ return f"{title}\n======\n\n{markdown}"
32
+
33
+
34
+ demo = gr.Interface(
35
+ fn=main_fn,
36
+ title="HTML para Markdown",
37
+ description="""
38
+ <div style="width: fit-content; margin: 0 auto;">
39
+ Cole aqui seu HTML bruto e o app vai converter para Markdown.
40
+ </div>""",
41
+ inputs=[
42
+ gr.TextArea(label="HTML", placeholder="Cole seu c贸digo HTML aqui...", lines=20),
43
+ gr.CheckboxGroup(
44
+ label="Ignorar tags (sem efeito - tudo ser谩 extra铆do)",
45
+ choices=["a", "img", "noscript"],
46
+ value=[],
47
+ ),
48
+ ],
49
+ outputs=[
50
+ gr.TextArea(label="Markdown gerado", show_copy_button=True)
51
+ ],
52
+ allow_flagging="never",
53
+ )
54
+
55
+ demo.launch(server_name="0.0.0.0")