From 1b968e49841a073ab8b52c58629f8e413411803f Mon Sep 17 00:00:00 2001 From: Maksym Lysak Date: Fri, 10 Jan 2025 10:50:35 +0100 Subject: [PATCH] Fixes to preserve page image and demo export to html Signed-off-by: Maksym Lysak --- docling/models/smol_docling_model.py | 2 -- docling/pipeline/base_pipeline.py | 11 +++++------ docling/pipeline/vlm_pipeline.py | 2 +- docs/examples/minimal_smol_docling.py | 25 ++++++++++++++++++------- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/docling/models/smol_docling_model.py b/docling/models/smol_docling_model.py index 72113a96..80c54f43 100644 --- a/docling/models/smol_docling_model.py +++ b/docling/models/smol_docling_model.py @@ -23,7 +23,6 @@ from docling.datamodel.pipeline_options import AcceleratorDevice, AcceleratorOpt from docling.datamodel.settings import settings from docling.models.base_model import BasePageModel from docling.utils.accelerator_utils import decide_device -from docling.utils.layout_postprocessor import LayoutPostprocessor from docling.utils.profiling import TimeRecorder _log = logging.getLogger(__name__) @@ -44,7 +43,6 @@ class SmolDoclingModel(BasePageModel): ) self.param_quantized = False - # self.your_vlm_predictor(..., device) = None # TODO self.processor = AutoProcessor.from_pretrained(artifacts_path) if not self.param_quantized: self.vlm_model = Idefics3ForConditionalGeneration.from_pretrained( diff --git a/docling/pipeline/base_pipeline.py b/docling/pipeline/base_pipeline.py index 1bf48ef0..734fb118 100644 --- a/docling/pipeline/base_pipeline.py +++ b/docling/pipeline/base_pipeline.py @@ -200,13 +200,12 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name. return conv_res def _unload(self, conv_res: ConversionResult) -> ConversionResult: - for page in conv_res.pages: - if page._backend is not None: - page._backend.unload() - - if conv_res.input._backend: - conv_res.input._backend.unload() + # for page in conv_res.pages: + # if page._backend is not None: + # page._backend.unload() + # if conv_res.input._backend: + # conv_res.input._backend.unload() return conv_res def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus: diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py index 6de5385d..e846ada0 100644 --- a/docling/pipeline/vlm_pipeline.py +++ b/docling/pipeline/vlm_pipeline.py @@ -100,7 +100,7 @@ class VlmPipeline(PaginatedPipeline): if page.predictions.doctags is not None: document_tags += page.predictions.doctags.tag_string - conv_res.document = self._turn_tags_into_doc(document_tags, None) + conv_res.document = self._turn_tags_into_doc(document_tags, page.image) """ image_bytes = BytesIO() if page.image: diff --git a/docs/examples/minimal_smol_docling.py b/docs/examples/minimal_smol_docling.py index e8ad8b02..bea0a746 100644 --- a/docs/examples/minimal_smol_docling.py +++ b/docs/examples/minimal_smol_docling.py @@ -1,3 +1,5 @@ +from pathlib import Path + from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.datamodel.base_models import InputFormat from docling.datamodel.pipeline_options import PdfPipelineOptions @@ -9,19 +11,21 @@ from docling.pipeline.vlm_pipeline import VlmPipeline source = "tests/data/2305.03393v1-pg9.pdf" pipeline_options = PdfPipelineOptions() +pipeline_options.generate_page_images = True pipeline_options.artifacts_path = "model_artifacts" +from docling_core.types.doc import DocItemLabel, ImageRefMode +from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS + converter = DocumentConverter( format_options={ InputFormat.PDF: PdfFormatOption( pipeline_cls=VlmPipeline, pipeline_options=pipeline_options, - backend=DoclingParseDocumentBackend, ), InputFormat.IMAGE: PdfFormatOption( pipeline_cls=VlmPipeline, pipeline_options=pipeline_options, - backend=DoclingParseDocumentBackend, ), } ) @@ -33,11 +37,11 @@ print("") result = converter.convert(source) -print("------------") -print("result:") -print("------------") -print("") -print(result) +# print("------------") +# print("result:") +# print("------------") +# print("") +# print(result) print("------------") print("MD:") @@ -45,6 +49,13 @@ print("------------") print("") print(result.document.export_to_markdown()) +Path("scratch").mkdir(parents=True, exist_ok=True) +result.document.save_as_html( + filename=Path("scratch/smol_export.html"), + image_mode=ImageRefMode.REFERENCED, + labels=[*DEFAULT_EXPORT_LABELS, DocItemLabel.FOOTNOTE], +) + print("") print("============") print("done!")