mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 14:04:27 +00:00
Added support for force_backend_text parameter
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
5c62f88175
commit
5dffd10eae
@ -12,6 +12,7 @@ from docling_core.types.doc import ( # DocItemLabel,; DoclingDocument,; GroupLa
|
|||||||
DocItem,
|
DocItem,
|
||||||
ImageRef,
|
ImageRef,
|
||||||
PictureItem,
|
PictureItem,
|
||||||
|
TextItem,
|
||||||
)
|
)
|
||||||
from docling_core.types.doc.document import DocTagsDocument
|
from docling_core.types.doc.document import DocTagsDocument
|
||||||
from docling_core.types.doc.tokens import DocumentToken, TableToken
|
from docling_core.types.doc.tokens import DocumentToken, TableToken
|
||||||
@ -87,6 +88,19 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
|
|
||||||
return page
|
return page
|
||||||
|
|
||||||
|
def extract_text_from_backend(self, page: Page, bbox: BoundingBox | None) -> str:
|
||||||
|
# Convert bounding box normalized to 0-100 into page coordinates for cropping
|
||||||
|
text = ""
|
||||||
|
if bbox:
|
||||||
|
if page.size:
|
||||||
|
# bbox.l = bbox.l * page.size.width
|
||||||
|
# bbox.t = bbox.t * page.size.height
|
||||||
|
# bbox.r = bbox.r * page.size.width
|
||||||
|
# bbox.b = bbox.b * page.size.height
|
||||||
|
if page._backend:
|
||||||
|
text = page._backend.get_text_in_rect(bbox)
|
||||||
|
return text
|
||||||
|
|
||||||
def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
|
def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
|
||||||
with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
|
with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
|
||||||
|
|
||||||
@ -112,9 +126,27 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
doctags_list_c, image_list_c
|
doctags_list_c, image_list_c
|
||||||
)
|
)
|
||||||
conv_res.document.load_from_doctags(doctags_doc)
|
conv_res.document.load_from_doctags(doctags_doc)
|
||||||
# USE THIS TO FORCE BACKEND TEXT
|
|
||||||
# if self.force_backend_text:
|
# If forced backend text, replace model predicted text with backend one
|
||||||
# text_content = extract_text_from_backend(page, bbox)
|
if page.size:
|
||||||
|
if self.force_backend_text:
|
||||||
|
scale = self.pipeline_options.images_scale
|
||||||
|
for element, _level in conv_res.document.iterate_items():
|
||||||
|
if (
|
||||||
|
not isinstance(element, TextItem)
|
||||||
|
or len(element.prov) == 0
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
crop_bbox = (
|
||||||
|
element.prov[0]
|
||||||
|
.bbox.scaled(scale=scale)
|
||||||
|
.to_top_left_origin(
|
||||||
|
page_height=page.size.height * scale
|
||||||
|
)
|
||||||
|
)
|
||||||
|
txt = self.extract_text_from_backend(page, crop_bbox)
|
||||||
|
element.text = txt
|
||||||
|
element.orig = txt
|
||||||
elif (
|
elif (
|
||||||
self.pipeline_options.vlm_options.response_format
|
self.pipeline_options.vlm_options.response_format
|
||||||
== ResponseFormat.MARKDOWN
|
== ResponseFormat.MARKDOWN
|
||||||
|
Loading…
Reference in New Issue
Block a user