From c0e272c3eb837ab1b642dfc13c1a4f1e7d8f2001 Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Tue, 17 Sep 2024 15:38:52 +0200 Subject: [PATCH] remove temp internal implementation of html export Signed-off-by: Michele Dolfi --- docling/utils/export.py | 61 ----------------------------------------- 1 file changed, 61 deletions(-) diff --git a/docling/utils/export.py b/docling/utils/export.py index 2bba44f0..e9e56930 100644 --- a/docling/utils/export.py +++ b/docling/utils/export.py @@ -9,67 +9,6 @@ from docling.datamodel.document import ConversionResult, Page _log = logging.getLogger(__name__) -def _export_table_to_html(table: Table): - - # TODO: this is flagged as internal, because we will move it - # to the docling-core package. - - def _get_tablecell_span(cell: TableCell, ix): - if cell.spans is None: - span = set() - else: - span = set([s[ix] for s in cell.spans]) - if len(span) == 0: - return 1, None, None - return len(span), min(span), max(span) - - body = "" - nrows = table.num_rows - ncols = table.num_cols - - if table.data is None: - return "" - for i in range(nrows): - body += "" - for j in range(ncols): - cell: TableCell = table.data[i][j] - - rowspan, rowstart, rowend = _get_tablecell_span(cell, 0) - colspan, colstart, colend = _get_tablecell_span(cell, 1) - - if rowstart is not None and rowstart != i: - continue - if colstart is not None and colstart != j: - continue - - if rowstart is None: - rowstart = i - if colstart is None: - colstart = j - - content = cell.text.strip() - label = cell.obj_type - label_class = "body" - celltag = "td" - if label in ["row_header", "row_multi_header", "row_title"]: - label_class = "header" - elif label in ["col_header", "col_multi_header"]: - label_class = "header" - celltag = "th" - - opening_tag = f"{celltag}" - if rowspan > 1: - opening_tag += f' rowspan="{rowspan}"' - if colspan > 1: - opening_tag += f' colspan="{colspan}"' - - body += f"<{opening_tag}>{content}" - body += "" - body = f"{body}
" - - return body - - def generate_multimodal_pages( doc_result: ConversionResult, ) -> Iterable[Tuple[str, str, List[Dict[str, Any]], List[Dict[str, Any]], Page]]: