mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 14:04:27 +00:00
preparing to migrate to new doctags deserializer
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
46dc2e621f
commit
8e54299eac
@ -9,6 +9,7 @@ from pathlib import Path
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from docling_core.types import DoclingDocument
|
from docling_core.types import DoclingDocument
|
||||||
|
from docling_core.types.doc.document import DocTagsDocument
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
BoundingBox,
|
BoundingBox,
|
||||||
DocItem,
|
DocItem,
|
||||||
@ -108,6 +109,11 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
== ResponseFormat.DOCTAGS
|
== ResponseFormat.DOCTAGS
|
||||||
):
|
):
|
||||||
conv_res.document = self._turn_tags_into_doc(conv_res.pages)
|
conv_res.document = self._turn_tags_into_doc(conv_res.pages)
|
||||||
|
# doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
|
||||||
|
# conv_res.document.load_from_doctags(doctags_doc)
|
||||||
|
# USE THIS TO FORCE BACKEND TEXT
|
||||||
|
# if self.force_backend_text:
|
||||||
|
# text_content = extract_text_from_backend(page, bbox)
|
||||||
elif (
|
elif (
|
||||||
self.pipeline_options.vlm_options.response_format
|
self.pipeline_options.vlm_options.response_format
|
||||||
== ResponseFormat.MARKDOWN
|
== ResponseFormat.MARKDOWN
|
||||||
|
Loading…
Reference in New Issue
Block a user