mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Moved keep_backend = True to vlm pipeline
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
853544ba11
commit
0c60ef199a
@ -116,10 +116,7 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
|
||||
|
||||
def __init__(self, pipeline_options: PipelineOptions):
|
||||
super().__init__(pipeline_options)
|
||||
self.keep_backend = (
|
||||
True # For now, need to be able to query for page size post prediction
|
||||
)
|
||||
# self.keep_backend = False
|
||||
self.keep_backend = False
|
||||
|
||||
def _apply_on_pages(
|
||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
||||
|
@ -1,6 +1,7 @@
|
||||
import itertools
|
||||
import logging
|
||||
import re
|
||||
import warnings
|
||||
|
||||
# from io import BytesIO
|
||||
from pathlib import Path
|
||||
@ -28,7 +29,8 @@ from docling.backend.abstract_backend import AbstractDocumentBackend
|
||||
from docling.backend.pdf_backend import PdfDocumentBackend
|
||||
from docling.datamodel.base_models import Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, VlmPipelineOptions
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.smol_docling_model import SmolDoclingModel
|
||||
from docling.pipeline.base_pipeline import PaginatedPipeline
|
||||
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
||||
@ -38,9 +40,29 @@ _log = logging.getLogger(__name__)
|
||||
|
||||
class VlmPipeline(PaginatedPipeline):
|
||||
|
||||
def __init__(self, pipeline_options: PdfPipelineOptions):
|
||||
def __init__(self, pipeline_options: VlmPipelineOptions):
|
||||
super().__init__(pipeline_options)
|
||||
self.pipeline_options: PdfPipelineOptions
|
||||
self.keep_backend = True
|
||||
|
||||
warnings.warn(
|
||||
"This API is currently experimental and may change in upcoming versions without notice.",
|
||||
category=UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
self.pipeline_options: VlmPipelineOptions
|
||||
|
||||
artifacts_path: Optional[Path] = None
|
||||
if pipeline_options.artifacts_path is not None:
|
||||
artifacts_path = Path(pipeline_options.artifacts_path).expanduser()
|
||||
elif settings.artifacts_path is not None:
|
||||
artifacts_path = Path(settings.artifacts_path).expanduser()
|
||||
|
||||
if artifacts_path is not None and not artifacts_path.is_dir():
|
||||
raise RuntimeError(
|
||||
f"The value of {artifacts_path=} is not valid. "
|
||||
"When defined, it must point to a folder containing all models required by the pipeline."
|
||||
)
|
||||
|
||||
# force_backend_text = False - use text that is coming from SmolDocling
|
||||
# force_backend_text = True - get text from backend using bounding boxes predicted by SmolDoclingss
|
||||
|
Loading…
Reference in New Issue
Block a user