mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
remove temp internal implementation of html export
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
a472f49d5d
commit
c0e272c3eb
@ -9,67 +9,6 @@ from docling.datamodel.document import ConversionResult, Page
|
|||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _export_table_to_html(table: Table):
|
|
||||||
|
|
||||||
# TODO: this is flagged as internal, because we will move it
|
|
||||||
# to the docling-core package.
|
|
||||||
|
|
||||||
def _get_tablecell_span(cell: TableCell, ix):
|
|
||||||
if cell.spans is None:
|
|
||||||
span = set()
|
|
||||||
else:
|
|
||||||
span = set([s[ix] for s in cell.spans])
|
|
||||||
if len(span) == 0:
|
|
||||||
return 1, None, None
|
|
||||||
return len(span), min(span), max(span)
|
|
||||||
|
|
||||||
body = ""
|
|
||||||
nrows = table.num_rows
|
|
||||||
ncols = table.num_cols
|
|
||||||
|
|
||||||
if table.data is None:
|
|
||||||
return ""
|
|
||||||
for i in range(nrows):
|
|
||||||
body += "<tr>"
|
|
||||||
for j in range(ncols):
|
|
||||||
cell: TableCell = table.data[i][j]
|
|
||||||
|
|
||||||
rowspan, rowstart, rowend = _get_tablecell_span(cell, 0)
|
|
||||||
colspan, colstart, colend = _get_tablecell_span(cell, 1)
|
|
||||||
|
|
||||||
if rowstart is not None and rowstart != i:
|
|
||||||
continue
|
|
||||||
if colstart is not None and colstart != j:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if rowstart is None:
|
|
||||||
rowstart = i
|
|
||||||
if colstart is None:
|
|
||||||
colstart = j
|
|
||||||
|
|
||||||
content = cell.text.strip()
|
|
||||||
label = cell.obj_type
|
|
||||||
label_class = "body"
|
|
||||||
celltag = "td"
|
|
||||||
if label in ["row_header", "row_multi_header", "row_title"]:
|
|
||||||
label_class = "header"
|
|
||||||
elif label in ["col_header", "col_multi_header"]:
|
|
||||||
label_class = "header"
|
|
||||||
celltag = "th"
|
|
||||||
|
|
||||||
opening_tag = f"{celltag}"
|
|
||||||
if rowspan > 1:
|
|
||||||
opening_tag += f' rowspan="{rowspan}"'
|
|
||||||
if colspan > 1:
|
|
||||||
opening_tag += f' colspan="{colspan}"'
|
|
||||||
|
|
||||||
body += f"<{opening_tag}>{content}</{celltag}>"
|
|
||||||
body += "</tr>"
|
|
||||||
body = f"<table>{body}</table>"
|
|
||||||
|
|
||||||
return body
|
|
||||||
|
|
||||||
|
|
||||||
def generate_multimodal_pages(
|
def generate_multimodal_pages(
|
||||||
doc_result: ConversionResult,
|
doc_result: ConversionResult,
|
||||||
) -> Iterable[Tuple[str, str, List[Dict[str, Any]], List[Dict[str, Any]], Page]]:
|
) -> Iterable[Tuple[str, str, List[Dict[str, Any]], List[Dict[str, Any]], Page]]:
|
||||||
|
Loading…
Reference in New Issue
Block a user