mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Introduced SmolDoclingOptions to configure model parameters (such as query and artifacts path) via client code, see example in minimal_smol_docling. Provisioning for other potential vlm all-in-one models.
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
88b9ac6706
commit
f2751e11f9
@ -254,6 +254,14 @@ granite_picture_description = PictureDescriptionVlmOptions(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SmolDoclingOptions(BaseModel):
|
||||||
|
artifacts_path: str = ""
|
||||||
|
question: str = "Perform Layout Analysis."
|
||||||
|
load_in_8bit: bool = True
|
||||||
|
llm_int8_threshold: float = 6.0
|
||||||
|
quantized: bool = False
|
||||||
|
|
||||||
|
|
||||||
# Define an enum for the backend options
|
# Define an enum for the backend options
|
||||||
class PdfBackend(str, Enum):
|
class PdfBackend(str, Enum):
|
||||||
"""Enum of valid PDF backends."""
|
"""Enum of valid PDF backends."""
|
||||||
@ -313,6 +321,8 @@ class PdfPipelineOptions(PipelineOptions):
|
|||||||
Field(discriminator="kind"),
|
Field(discriminator="kind"),
|
||||||
] = smolvlm_picture_description
|
] = smolvlm_picture_description
|
||||||
|
|
||||||
|
vlm_options: Union[SmolDoclingOptions,] = Field(SmolDoclingOptions())
|
||||||
|
|
||||||
images_scale: float = 1.0
|
images_scale: float = 1.0
|
||||||
generate_page_images: bool = False
|
generate_page_images: bool = False
|
||||||
generate_picture_images: bool = False
|
generate_picture_images: bool = False
|
||||||
|
@ -13,7 +13,11 @@ from transformers import ( # type: ignore
|
|||||||
|
|
||||||
from docling.datamodel.base_models import DocTagsPrediction, Page
|
from docling.datamodel.base_models import DocTagsPrediction, Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import AcceleratorDevice, AcceleratorOptions
|
from docling.datamodel.pipeline_options import (
|
||||||
|
AcceleratorDevice,
|
||||||
|
AcceleratorOptions,
|
||||||
|
SmolDoclingOptions,
|
||||||
|
)
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
@ -24,17 +28,23 @@ _log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
class SmolDoclingModel(BasePageModel):
|
class SmolDoclingModel(BasePageModel):
|
||||||
|
|
||||||
def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
|
def __init__(
|
||||||
|
self,
|
||||||
|
artifacts_path: Path,
|
||||||
|
accelerator_options: AcceleratorOptions,
|
||||||
|
vlm_options: SmolDoclingOptions,
|
||||||
|
):
|
||||||
device = decide_device(accelerator_options.device)
|
device = decide_device(accelerator_options.device)
|
||||||
self.device = device
|
self.device = device
|
||||||
_log.info("Available device for SmolDocling: {}".format(device))
|
_log.info("Available device for SmolDocling: {}".format(device))
|
||||||
|
|
||||||
# PARAMETERS:
|
# PARAMETERS:
|
||||||
self.param_question = "Perform Layout Analysis."
|
self.param_question = vlm_options.question # "Perform Layout Analysis."
|
||||||
self.param_quantization_config = BitsAndBytesConfig(
|
self.param_quantization_config = BitsAndBytesConfig(
|
||||||
load_in_8bit=True, llm_int8_threshold=6.0
|
load_in_8bit=vlm_options.load_in_8bit, # True,
|
||||||
|
llm_int8_threshold=vlm_options.llm_int8_threshold, # 6.0
|
||||||
)
|
)
|
||||||
self.param_quantized = False
|
self.param_quantized = vlm_options.quantized # False
|
||||||
|
|
||||||
self.processor = AutoProcessor.from_pretrained(artifacts_path)
|
self.processor = AutoProcessor.from_pretrained(artifacts_path)
|
||||||
if not self.param_quantized:
|
if not self.param_quantized:
|
||||||
|
@ -36,8 +36,6 @@ _log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class VlmPipeline(PaginatedPipeline):
|
class VlmPipeline(PaginatedPipeline):
|
||||||
# _smol_vlm_path = "SmolDocling-0.0.2"
|
|
||||||
_smol_vlm_path = "SmolDocling_2.7_DT_0.7"
|
|
||||||
|
|
||||||
def __init__(self, pipeline_options: PdfPipelineOptions):
|
def __init__(self, pipeline_options: PdfPipelineOptions):
|
||||||
super().__init__(pipeline_options)
|
super().__init__(pipeline_options)
|
||||||
@ -60,8 +58,9 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
|
|
||||||
self.build_pipe = [
|
self.build_pipe = [
|
||||||
SmolDoclingModel(
|
SmolDoclingModel(
|
||||||
artifacts_path=self.artifacts_path / VlmPipeline._smol_vlm_path,
|
artifacts_path=self.artifacts_path,
|
||||||
accelerator_options=pipeline_options.accelerator_options,
|
accelerator_options=pipeline_options.accelerator_options,
|
||||||
|
vlm_options=self.pipeline_options.vlm_options,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -8,24 +8,31 @@ import yaml
|
|||||||
|
|
||||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions, SmolDoclingOptions
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
from docling.pipeline.vlm_pipeline import VlmPipeline
|
from docling.pipeline.vlm_pipeline import VlmPipeline
|
||||||
|
|
||||||
sources = [
|
sources = [
|
||||||
# "https://arxiv.org/pdf/2408.09869"
|
# "https://arxiv.org/pdf/2408.09869",
|
||||||
# "tests/data/2305.03393v1-pg9-img.png",
|
# "tests/data/2305.03393v1-pg9-img.png",
|
||||||
"tests/data/2305.03393v1-pg9.pdf",
|
"tests/data/2305.03393v1-pg9.pdf",
|
||||||
# "demo_data/page.png",
|
|
||||||
# "demo_data/original_tables.pdf",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
pipeline_options = PdfPipelineOptions()
|
pipeline_options = PdfPipelineOptions()
|
||||||
pipeline_options.generate_page_images = True
|
pipeline_options.generate_page_images = True
|
||||||
pipeline_options.force_backend_text = (
|
# If force_backend_text = True, text from backend will be used instead of generated text
|
||||||
False # If True, text from backend will be used instead of generated text
|
pipeline_options.force_backend_text = False
|
||||||
|
pipeline_options.artifacts_path = "model_artifacts/SmolDocling_2.7_DT_0.7"
|
||||||
|
|
||||||
|
vlm_options = SmolDoclingOptions(
|
||||||
|
artifacts_path="model_artifacts/SmolDocling_2.7_DT_0.7",
|
||||||
|
question="Perform Layout Analysis.",
|
||||||
|
load_in_8bit=True,
|
||||||
|
llm_int8_threshold=6.0,
|
||||||
|
quantized=False,
|
||||||
)
|
)
|
||||||
pipeline_options.artifacts_path = "model_artifacts"
|
|
||||||
|
pipeline_options.vlm_options = vlm_options
|
||||||
|
|
||||||
from docling_core.types.doc import DocItemLabel, ImageRefMode
|
from docling_core.types.doc import DocItemLabel, ImageRefMode
|
||||||
from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS
|
from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS
|
||||||
|
Loading…
Reference in New Issue
Block a user