from io import BytesIO from pathlib import Path from unittest.mock import Mock import pytest from docling.datamodel.accelerator_options import AcceleratorDevice from docling.datamodel.base_models import DocumentStream, InputFormat from docling.datamodel.pipeline_options_vlm_model import ( InferenceFramework, InlineVlmOptions, ResponseFormat, TransformersPromptStyle, ) from docling.document_converter import DocumentConverter, PdfFormatOption from docling.models.base_model import BaseVlmPageModel from .test_data_gen_flag import GEN_TEST_DATA from .verify_utils import verify_conversion_result_v2 GENERATE = GEN_TEST_DATA def get_pdf_path(): pdf_path = Path("./tests/data/pdf/2305.03393v1-pg9.pdf") return pdf_path @pytest.fixture def converter(): from docling.datamodel.pipeline_options import PdfPipelineOptions pipeline_options = PdfPipelineOptions() pipeline_options.do_ocr = False pipeline_options.do_table_structure = True pipeline_options.table_structure_options.do_cell_matching = True pipeline_options.accelerator_options.device = AcceleratorDevice.CPU pipeline_options.generate_parsed_pages = True converter = DocumentConverter( format_options={ InputFormat.PDF: PdfFormatOption( pipeline_options=pipeline_options, backend=PdfFormatOption().backend, ) } ) return converter def test_convert_path(converter: DocumentConverter): pdf_path = get_pdf_path() print(f"converting {pdf_path}") # Avoid heavy torch-dependent models by not instantiating layout models here in coverage run doc_result = converter.convert(pdf_path) verify_conversion_result_v2( input_path=pdf_path, doc_result=doc_result, generate=GENERATE ) def test_convert_stream(converter: DocumentConverter): pdf_path = get_pdf_path() print(f"converting {pdf_path}") buf = BytesIO(pdf_path.open("rb").read()) stream = DocumentStream(name=pdf_path.name, stream=buf) doc_result = converter.convert(stream) verify_conversion_result_v2( input_path=pdf_path, doc_result=doc_result, generate=GENERATE ) class _DummyVlm(BaseVlmPageModel): def __init__(self, prompt_style: TransformersPromptStyle, repo_id: str = ""): # type: ignore[no-untyped-def] self.vlm_options = InlineVlmOptions( repo_id=repo_id or "dummy/repo", prompt="test prompt", inference_framework=InferenceFramework.TRANSFORMERS, response_format=ResponseFormat.PLAINTEXT, transformers_prompt_style=prompt_style, ) self.processor = Mock() def __call__(self, conv_res, page_batch): # type: ignore[no-untyped-def] return [] def process_images(self, image_batch, prompt): # type: ignore[no-untyped-def] return [] def test_formulate_prompt_raw(): model = _DummyVlm(TransformersPromptStyle.RAW) assert model.formulate_prompt("hello") == "hello" def test_formulate_prompt_none(): model = _DummyVlm(TransformersPromptStyle.NONE) assert model.formulate_prompt("ignored") == "" def test_formulate_prompt_phi4_special_case(): model = _DummyVlm( TransformersPromptStyle.RAW, repo_id="ibm-granite/granite-docling-258M" ) # RAW style with granite-docling should still invoke the special path only when style not RAW; # ensure RAW returns the user text assert model.formulate_prompt("describe image") == "describe image" def test_formulate_prompt_chat_uses_processor_template(): model = _DummyVlm(TransformersPromptStyle.CHAT) model.processor.apply_chat_template.return_value = "templated" out = model.formulate_prompt("summarize") assert out == "templated" model.processor.apply_chat_template.assert_called() def test_formulate_prompt_unknown_style_raises(): # Create an InlineVlmOptions with an invalid enum by patching attribute directly model = _DummyVlm(TransformersPromptStyle.RAW) model.vlm_options.transformers_prompt_style = "__invalid__" # type: ignore[assignment] with pytest.raises(RuntimeError): model.formulate_prompt("x") def test_vlm_prompt_style_none_and_chat_variants(): # NONE always empty m_none = _DummyVlm(TransformersPromptStyle.NONE) assert m_none.formulate_prompt("anything") == "" # CHAT path ensures processor used even with complex prompt m_chat = _DummyVlm(TransformersPromptStyle.CHAT) m_chat.processor.apply_chat_template.return_value = "ok" out = m_chat.formulate_prompt("details please") assert out == "ok"