mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: updating default parameters to get better performance with docling-parse (#2208)
* updated the code Signed-off-by: Peter Staar <taa@zurich.ibm.com> * updated the parameters Signed-off-by: Peter Staar <taa@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
a9f41b088e
commit
b49d1ad4f1
@@ -30,13 +30,21 @@ class DoclingParseV4PageBackend(PdfPageBackend):
|
|||||||
page_no: int,
|
page_no: int,
|
||||||
create_words: bool = True,
|
create_words: bool = True,
|
||||||
create_textlines: bool = True,
|
create_textlines: bool = True,
|
||||||
|
keep_chars: bool = False,
|
||||||
|
keep_lines: bool = False,
|
||||||
|
keep_images: bool = True,
|
||||||
):
|
):
|
||||||
self._ppage = page_obj
|
self._ppage = page_obj
|
||||||
self._dp_doc = dp_doc
|
self._dp_doc = dp_doc
|
||||||
self._page_no = page_no
|
self._page_no = page_no
|
||||||
|
|
||||||
self._create_words = create_words
|
self._create_words = create_words
|
||||||
self._create_textlines = create_textlines
|
self._create_textlines = create_textlines
|
||||||
|
|
||||||
|
self._keep_chars = keep_chars
|
||||||
|
self._keep_lines = keep_lines
|
||||||
|
self._keep_images = keep_images
|
||||||
|
|
||||||
self._dpage: Optional[SegmentedPdfPage] = None
|
self._dpage: Optional[SegmentedPdfPage] = None
|
||||||
self._unloaded = False
|
self._unloaded = False
|
||||||
self.valid = (self._ppage is not None) and (self._dp_doc is not None)
|
self.valid = (self._ppage is not None) and (self._dp_doc is not None)
|
||||||
@@ -47,9 +55,9 @@ class DoclingParseV4PageBackend(PdfPageBackend):
|
|||||||
|
|
||||||
seg_page = self._dp_doc.get_page(
|
seg_page = self._dp_doc.get_page(
|
||||||
self._page_no + 1,
|
self._page_no + 1,
|
||||||
keep_chars=True,
|
keep_chars=self._keep_chars,
|
||||||
keep_lines=True,
|
keep_lines=self._keep_lines,
|
||||||
keep_bitmaps=True,
|
keep_bitmaps=self._keep_images,
|
||||||
create_words=self._create_words,
|
create_words=self._create_words,
|
||||||
create_textlines=self._create_textlines,
|
create_textlines=self._create_textlines,
|
||||||
enforce_same_font=True,
|
enforce_same_font=True,
|
||||||
|
|||||||
Reference in New Issue
Block a user