Spaces:
Runtime error
Runtime error
Commit
·
c749499
1
Parent(s):
eb4710d
Add token count per row
Browse files- tapas-styles.css +12 -2
- tapas_visualizer.py +10 -2
tapas-styles.css
CHANGED
|
@@ -11,12 +11,17 @@
|
|
| 11 |
letter-spacing:2px; /* Give some extra separation between chars */
|
| 12 |
}
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
.non-token{
|
| 15 |
/* White space and other things the tokenizer ignores*/
|
| 16 |
white-space: pre;
|
| 17 |
letter-spacing:4px;
|
| 18 |
-
border-top:1px solid #A0A0A0; /* A gentle border on top and bottom makes tabs more ovious*/
|
| 19 |
-
border-bottom:1px solid #A0A0A0
|
| 20 |
line-height: 1rem;
|
| 21 |
height: calc(100% - 2px);
|
| 22 |
}
|
|
@@ -35,4 +40,9 @@
|
|
| 35 |
.odd-token{
|
| 36 |
background:#A0A0A0;
|
| 37 |
border: 1px solid #A0A0A0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
}
|
|
|
|
| 11 |
letter-spacing:2px; /* Give some extra separation between chars */
|
| 12 |
}
|
| 13 |
|
| 14 |
+
th, td {
|
| 15 |
+
padding: 10px;
|
| 16 |
+
border: 1px solid;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
.non-token{
|
| 20 |
/* White space and other things the tokenizer ignores*/
|
| 21 |
white-space: pre;
|
| 22 |
letter-spacing:4px;
|
| 23 |
+
/* border-top:1px solid #A0A0A0; /* A gentle border on top and bottom makes tabs more ovious*/
|
| 24 |
+
/*border-bottom:1px solid #A0A0A0;*/
|
| 25 |
line-height: 1rem;
|
| 26 |
height: calc(100% - 2px);
|
| 27 |
}
|
|
|
|
| 40 |
.odd-token{
|
| 41 |
background:#A0A0A0;
|
| 42 |
border: 1px solid #A0A0A0;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.count{
|
| 46 |
+
font-family: "Tahoma" "Arial";
|
| 47 |
+
font-size: 1.2em;
|
| 48 |
}
|
tapas_visualizer.py
CHANGED
|
@@ -117,10 +117,14 @@ class TapasVisualizer:
|
|
| 117 |
|
| 118 |
# token_df = pd.DataFrame(token_data, columns=['id', 'token', 'segment_id', 'column_id', 'row_id'])
|
| 119 |
header_row_html = ""
|
|
|
|
| 120 |
for col_id, col in enumerate(table.columns, start=1):
|
| 121 |
-
|
|
|
|
| 122 |
cell_html = "".join(span_htmls)
|
| 123 |
header_row_html += f"<th>{cell_html}</th>"
|
|
|
|
|
|
|
| 124 |
header_row_html = f'<tr>{header_row_html}</tr>'
|
| 125 |
|
| 126 |
table_vals = table.values
|
|
@@ -129,10 +133,14 @@ class TapasVisualizer:
|
|
| 129 |
|
| 130 |
for row_id, row in enumerate(table_vals, start=1):
|
| 131 |
row_html = ""
|
|
|
|
| 132 |
for col_id, cell in enumerate(row, start=1):
|
| 133 |
-
|
|
|
|
| 134 |
cell_html = "".join(span_htmls)
|
| 135 |
row_html += f"<td>{cell_html}</td>"
|
|
|
|
|
|
|
| 136 |
table_html += f'<tr>{row_html}</tr>'
|
| 137 |
|
| 138 |
table_html = f'<table>{table_html}</table>'
|
|
|
|
| 117 |
|
| 118 |
# token_df = pd.DataFrame(token_data, columns=['id', 'token', 'segment_id', 'column_id', 'row_id'])
|
| 119 |
header_row_html = ""
|
| 120 |
+
header_row_token_cnt = 0
|
| 121 |
for col_id, col in enumerate(table.columns, start=1):
|
| 122 |
+
cur_cell_tokens = cell_tokens[0, col_id]
|
| 123 |
+
span_htmls = self.text_to_html(col, cur_cell_tokens)
|
| 124 |
cell_html = "".join(span_htmls)
|
| 125 |
header_row_html += f"<th>{cell_html}</th>"
|
| 126 |
+
header_row_token_cnt += len(cur_cell_tokens)
|
| 127 |
+
header_row_html += f'<th style="border: none;">{self.style_span(header_row_token_cnt, ["non-token", "count"])}</th>'
|
| 128 |
header_row_html = f'<tr>{header_row_html}</tr>'
|
| 129 |
|
| 130 |
table_vals = table.values
|
|
|
|
| 133 |
|
| 134 |
for row_id, row in enumerate(table_vals, start=1):
|
| 135 |
row_html = ""
|
| 136 |
+
row_token_cnt = 0
|
| 137 |
for col_id, cell in enumerate(row, start=1):
|
| 138 |
+
cur_cell_tokens = cell_tokens[(row_id, col_id)]
|
| 139 |
+
span_htmls = self.text_to_html(cell, cur_cell_tokens)
|
| 140 |
cell_html = "".join(span_htmls)
|
| 141 |
row_html += f"<td>{cell_html}</td>"
|
| 142 |
+
row_token_cnt += len(cur_cell_tokens)
|
| 143 |
+
row_html += f'<td style="border: none;">{self.style_span(row_token_cnt, ["non-token", "count"])}</td>'
|
| 144 |
table_html += f'<tr>{row_html}</tr>'
|
| 145 |
|
| 146 |
table_html = f'<table>{table_html}</table>'
|