mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
add supported_devices
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
5d21153948
commit
7f6df727e3
@ -22,6 +22,7 @@ from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBacke
|
|||||||
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
||||||
from docling.backend.pdf_backend import PdfDocumentBackend
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
||||||
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
ConversionStatus,
|
ConversionStatus,
|
||||||
FormatToExtensions,
|
FormatToExtensions,
|
||||||
@ -30,8 +31,6 @@ from docling.datamodel.base_models import (
|
|||||||
)
|
)
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorDevice,
|
|
||||||
AcceleratorOptions,
|
|
||||||
EasyOcrOptions,
|
EasyOcrOptions,
|
||||||
OcrOptions,
|
OcrOptions,
|
||||||
PaginatedPipelineOptions,
|
PaginatedPipelineOptions,
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
|
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
|
||||||
@ -10,13 +8,11 @@ from pydantic import (
|
|||||||
BaseModel,
|
BaseModel,
|
||||||
ConfigDict,
|
ConfigDict,
|
||||||
Field,
|
Field,
|
||||||
field_validator,
|
|
||||||
model_validator,
|
|
||||||
)
|
)
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
||||||
from typing_extensions import deprecated
|
from typing_extensions import deprecated
|
||||||
|
|
||||||
# Import the following for backwards compatibility
|
# Import the following for backwards compatibility
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.pipeline_options_vlm_model import (
|
from docling.datamodel.pipeline_options_vlm_model import (
|
||||||
ApiVlmOptions,
|
ApiVlmOptions,
|
||||||
InferenceFramework,
|
InferenceFramework,
|
||||||
@ -34,64 +30,6 @@ from docling.datamodel.vlm_model_spec import (
|
|||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class AcceleratorDevice(str, Enum):
|
|
||||||
"""Devices to run model inference"""
|
|
||||||
|
|
||||||
AUTO = "auto"
|
|
||||||
CPU = "cpu"
|
|
||||||
CUDA = "cuda"
|
|
||||||
MPS = "mps"
|
|
||||||
|
|
||||||
|
|
||||||
class AcceleratorOptions(BaseSettings):
|
|
||||||
model_config = SettingsConfigDict(
|
|
||||||
env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
|
|
||||||
)
|
|
||||||
|
|
||||||
num_threads: int = 4
|
|
||||||
device: Union[str, AcceleratorDevice] = "auto"
|
|
||||||
cuda_use_flash_attention2: bool = False
|
|
||||||
|
|
||||||
@field_validator("device")
|
|
||||||
def validate_device(cls, value):
|
|
||||||
# "auto", "cpu", "cuda", "mps", or "cuda:N"
|
|
||||||
if value in {d.value for d in AcceleratorDevice} or re.match(
|
|
||||||
r"^cuda(:\d+)?$", value
|
|
||||||
):
|
|
||||||
return value
|
|
||||||
raise ValueError(
|
|
||||||
"Invalid device option. Use 'auto', 'cpu', 'mps', 'cuda', or 'cuda:N'."
|
|
||||||
)
|
|
||||||
|
|
||||||
@model_validator(mode="before")
|
|
||||||
@classmethod
|
|
||||||
def check_alternative_envvars(cls, data: Any) -> Any:
|
|
||||||
r"""
|
|
||||||
Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
|
|
||||||
The alternative envvar is used only if it is valid and the regular envvar is not set.
|
|
||||||
|
|
||||||
Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
|
|
||||||
the same functionality. In case the alias envvar is set and the user tries to override the
|
|
||||||
parameter in settings initialization, Pydantic treats the parameter provided in __init__()
|
|
||||||
as an extra input instead of simply overwriting the evvar value for that parameter.
|
|
||||||
"""
|
|
||||||
if isinstance(data, dict):
|
|
||||||
input_num_threads = data.get("num_threads")
|
|
||||||
# Check if to set the num_threads from the alternative envvar
|
|
||||||
if input_num_threads is None:
|
|
||||||
docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
|
|
||||||
omp_num_threads = os.getenv("OMP_NUM_THREADS")
|
|
||||||
if docling_num_threads is None and omp_num_threads is not None:
|
|
||||||
try:
|
|
||||||
data["num_threads"] = int(omp_num_threads)
|
|
||||||
except ValueError:
|
|
||||||
_log.error(
|
|
||||||
"Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
|
|
||||||
omp_num_threads,
|
|
||||||
)
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
class BaseOptions(BaseModel):
|
class BaseOptions(BaseModel):
|
||||||
"""Base class for options."""
|
"""Base class for options."""
|
||||||
|
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, Literal
|
from typing import Any, Dict, List, Literal
|
||||||
|
|
||||||
from pydantic import AnyUrl, BaseModel
|
from pydantic import AnyUrl, BaseModel
|
||||||
from typing_extensions import deprecated
|
from typing_extensions import deprecated
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice
|
||||||
|
|
||||||
|
|
||||||
class BaseVlmOptions(BaseModel):
|
class BaseVlmOptions(BaseModel):
|
||||||
kind: str
|
kind: str
|
||||||
@ -35,6 +37,12 @@ class InlineVlmOptions(BaseVlmOptions):
|
|||||||
inference_framework: InferenceFramework
|
inference_framework: InferenceFramework
|
||||||
response_format: ResponseFormat
|
response_format: ResponseFormat
|
||||||
|
|
||||||
|
supported_devices: List[AcceleratorDevice] = [
|
||||||
|
AcceleratorDevice.CPU,
|
||||||
|
AcceleratorDevice.CUDA,
|
||||||
|
AcceleratorDevice.MPS,
|
||||||
|
]
|
||||||
|
|
||||||
scale: float = 2.0
|
scale: float = 2.0
|
||||||
|
|
||||||
temperature: float = 0.0
|
temperature: float = 0.0
|
||||||
|
@ -5,6 +5,7 @@ from pydantic import (
|
|||||||
AnyUrl,
|
AnyUrl,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice
|
||||||
from docling.datamodel.pipeline_options_vlm_model import (
|
from docling.datamodel.pipeline_options_vlm_model import (
|
||||||
ApiVlmOptions,
|
ApiVlmOptions,
|
||||||
InferenceFramework,
|
InferenceFramework,
|
||||||
@ -21,6 +22,7 @@ SMOLDOCLING_MLX = InlineVlmOptions(
|
|||||||
prompt="Convert this page to docling.",
|
prompt="Convert this page to docling.",
|
||||||
response_format=ResponseFormat.DOCTAGS,
|
response_format=ResponseFormat.DOCTAGS,
|
||||||
inference_framework=InferenceFramework.MLX,
|
inference_framework=InferenceFramework.MLX,
|
||||||
|
supported_devices=[AcceleratorDevice.MPS],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@ -30,6 +32,11 @@ SMOLDOCLING_TRANSFORMERS = InlineVlmOptions(
|
|||||||
prompt="Convert this page to docling.",
|
prompt="Convert this page to docling.",
|
||||||
response_format=ResponseFormat.DOCTAGS,
|
response_format=ResponseFormat.DOCTAGS,
|
||||||
inference_framework=InferenceFramework.TRANSFORMERS_VISION2SEQ,
|
inference_framework=InferenceFramework.TRANSFORMERS_VISION2SEQ,
|
||||||
|
supported_devices=[
|
||||||
|
AcceleratorDevice.CPU,
|
||||||
|
AcceleratorDevice.CUDA,
|
||||||
|
AcceleratorDevice.MPS,
|
||||||
|
],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@ -40,6 +47,7 @@ GRANITE_VISION_TRANSFORMERS = InlineVlmOptions(
|
|||||||
prompt="Convert this page to markdown. Do not miss any text and only output the bare MarkDown!",
|
prompt="Convert this page to markdown. Do not miss any text and only output the bare MarkDown!",
|
||||||
response_format=ResponseFormat.MARKDOWN,
|
response_format=ResponseFormat.MARKDOWN,
|
||||||
inference_framework=InferenceFramework.TRANSFORMERS_VISION2SEQ,
|
inference_framework=InferenceFramework.TRANSFORMERS_VISION2SEQ,
|
||||||
|
supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@ -60,6 +68,7 @@ PIXTRAL_12B_TRANSFORMERS = InlineVlmOptions(
|
|||||||
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
||||||
response_format=ResponseFormat.MARKDOWN,
|
response_format=ResponseFormat.MARKDOWN,
|
||||||
inference_framework=InferenceFramework.TRANSFORMERS_VISION2SEQ,
|
inference_framework=InferenceFramework.TRANSFORMERS_VISION2SEQ,
|
||||||
|
supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@ -69,6 +78,7 @@ PIXTRAL_12B_MLX = InlineVlmOptions(
|
|||||||
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
||||||
response_format=ResponseFormat.MARKDOWN,
|
response_format=ResponseFormat.MARKDOWN,
|
||||||
inference_framework=InferenceFramework.MLX,
|
inference_framework=InferenceFramework.MLX,
|
||||||
|
supported_devices=[AcceleratorDevice.MPS],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@ -77,8 +87,10 @@ PIXTRAL_12B_MLX = InlineVlmOptions(
|
|||||||
PHI4_TRANSFORMERS = InlineVlmOptions(
|
PHI4_TRANSFORMERS = InlineVlmOptions(
|
||||||
repo_id="microsoft/Phi-4-multimodal-instruct",
|
repo_id="microsoft/Phi-4-multimodal-instruct",
|
||||||
prompt="Convert this page to MarkDown. Do not miss any text and only output the bare markdown",
|
prompt="Convert this page to MarkDown. Do not miss any text and only output the bare markdown",
|
||||||
|
trust_remote_code=True,
|
||||||
response_format=ResponseFormat.MARKDOWN,
|
response_format=ResponseFormat.MARKDOWN,
|
||||||
inference_framework=InferenceFramework.TRANSFORMERS_CAUSALLM,
|
inference_framework=InferenceFramework.TRANSFORMERS_CAUSALLM,
|
||||||
|
supported_devices=[AcceleratorDevice.CPU, AcceleratorDevice.CUDA],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@ -89,6 +101,7 @@ QWEN25_VL_3B_MLX = InlineVlmOptions(
|
|||||||
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
||||||
response_format=ResponseFormat.MARKDOWN,
|
response_format=ResponseFormat.MARKDOWN,
|
||||||
inference_framework=InferenceFramework.MLX,
|
inference_framework=InferenceFramework.MLX,
|
||||||
|
supported_devices=[AcceleratorDevice.MPS],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@ -99,6 +112,7 @@ GEMMA3_12B_MLX = InlineVlmOptions(
|
|||||||
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
||||||
response_format=ResponseFormat.MARKDOWN,
|
response_format=ResponseFormat.MARKDOWN,
|
||||||
inference_framework=InferenceFramework.MLX,
|
inference_framework=InferenceFramework.MLX,
|
||||||
|
supported_devices=[AcceleratorDevice.MPS],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
@ -108,6 +122,7 @@ GEMMA3_27B_MLX = InlineVlmOptions(
|
|||||||
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
prompt="Convert this page to markdown. Do not miss any text and only output the bare markdown!",
|
||||||
response_format=ResponseFormat.MARKDOWN,
|
response_format=ResponseFormat.MARKDOWN,
|
||||||
inference_framework=InferenceFramework.MLX,
|
inference_framework=InferenceFramework.MLX,
|
||||||
|
supported_devices=[AcceleratorDevice.MPS],
|
||||||
scale=2.0,
|
scale=2.0,
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
|
@ -11,9 +11,10 @@ from PIL import Image, ImageDraw
|
|||||||
from rtree import index
|
from rtree import index
|
||||||
from scipy.ndimage import binary_dilation, find_objects, label
|
from scipy.ndimage import binary_dilation, find_objects, label
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import AcceleratorOptions, OcrOptions
|
from docling.datamodel.pipeline_options import OcrOptions
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_model import BaseModelWithOptions, BasePageModel
|
from docling.models.base_model import BaseModelWithOptions, BasePageModel
|
||||||
|
|
||||||
|
@ -16,8 +16,8 @@ from docling_core.types.doc.labels import CodeLanguageLabel
|
|||||||
from PIL import Image, ImageOps
|
from PIL import Image, ImageOps
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.base_models import ItemAndImageEnrichmentElement
|
from docling.datamodel.base_models import ItemAndImageEnrichmentElement
|
||||||
from docling.datamodel.pipeline_options import AcceleratorOptions
|
|
||||||
from docling.models.base_model import BaseItemAndImageEnrichmentModel
|
from docling.models.base_model import BaseItemAndImageEnrichmentModel
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ from docling_core.types.doc import (
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options import AcceleratorOptions
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.models.base_model import BaseEnrichmentModel
|
from docling.models.base_model import BaseEnrichmentModel
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
|
|
||||||
|
@ -9,11 +9,10 @@ import numpy
|
|||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorDevice,
|
|
||||||
AcceleratorOptions,
|
|
||||||
EasyOcrOptions,
|
EasyOcrOptions,
|
||||||
OcrOptions,
|
OcrOptions,
|
||||||
)
|
)
|
||||||
|
@ -10,9 +10,9 @@ from docling_core.types.doc import DocItemLabel
|
|||||||
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import AcceleratorOptions
|
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
|
@ -8,10 +8,10 @@ from typing import Optional, Type
|
|||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorOptions,
|
|
||||||
OcrMacOptions,
|
OcrMacOptions,
|
||||||
OcrOptions,
|
OcrOptions,
|
||||||
)
|
)
|
||||||
|
@ -5,8 +5,8 @@ from typing import Optional, Type, Union
|
|||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorOptions,
|
|
||||||
PictureDescriptionApiOptions,
|
PictureDescriptionApiOptions,
|
||||||
PictureDescriptionBaseOptions,
|
PictureDescriptionBaseOptions,
|
||||||
)
|
)
|
||||||
|
@ -13,8 +13,8 @@ from docling_core.types.doc.document import ( # TODO: move import to docling_co
|
|||||||
)
|
)
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorOptions,
|
|
||||||
PictureDescriptionBaseOptions,
|
PictureDescriptionBaseOptions,
|
||||||
)
|
)
|
||||||
from docling.models.base_model import (
|
from docling.models.base_model import (
|
||||||
|
@ -4,8 +4,8 @@ from typing import Optional, Type, Union
|
|||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorOptions,
|
|
||||||
PictureDescriptionBaseOptions,
|
PictureDescriptionBaseOptions,
|
||||||
PictureDescriptionVlmOptions,
|
PictureDescriptionVlmOptions,
|
||||||
)
|
)
|
||||||
|
@ -7,11 +7,10 @@ import numpy
|
|||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorDevice,
|
|
||||||
AcceleratorOptions,
|
|
||||||
OcrOptions,
|
OcrOptions,
|
||||||
RapidOcrOptions,
|
RapidOcrOptions,
|
||||||
)
|
)
|
||||||
|
@ -13,11 +13,10 @@ from docling_core.types.doc.page import (
|
|||||||
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
|
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import Page, Table, TableStructurePrediction
|
from docling.datamodel.base_models import Page, Table, TableStructurePrediction
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorDevice,
|
|
||||||
AcceleratorOptions,
|
|
||||||
TableFormerMode,
|
TableFormerMode,
|
||||||
TableStructureOptions,
|
TableStructureOptions,
|
||||||
)
|
)
|
||||||
|
@ -13,10 +13,10 @@ import pandas as pd
|
|||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.page import TextCell
|
from docling_core.types.doc.page import TextCell
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorOptions,
|
|
||||||
OcrOptions,
|
OcrOptions,
|
||||||
TesseractCliOcrOptions,
|
TesseractCliOcrOptions,
|
||||||
)
|
)
|
||||||
|
@ -7,10 +7,10 @@ from typing import Iterable, Optional, Type
|
|||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.page import TextCell
|
from docling_core.types.doc.page import TextCell
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorOptions,
|
|
||||||
OcrOptions,
|
OcrOptions,
|
||||||
TesseractOcrOptions,
|
TesseractOcrOptions,
|
||||||
)
|
)
|
||||||
|
@ -4,11 +4,11 @@ from collections.abc import Iterable
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from docling.datamodel.base_models import Page, VlmPrediction
|
from docling.datamodel.accelerator_options import (
|
||||||
from docling.datamodel.document import ConversionResult
|
|
||||||
from docling.datamodel.pipeline_options import (
|
|
||||||
AcceleratorOptions,
|
AcceleratorOptions,
|
||||||
)
|
)
|
||||||
|
from docling.datamodel.base_models import Page, VlmPrediction
|
||||||
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
|
from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.models.hf_vlm_model import HuggingFaceVlmModel
|
from docling.models.hf_vlm_model import HuggingFaceVlmModel
|
||||||
@ -39,8 +39,10 @@ class HuggingFaceVlmModel_AutoModelForCausalLM(BasePageModel):
|
|||||||
GenerationConfig,
|
GenerationConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.device = decide_device(accelerator_options.device)
|
self.device = decide_device(
|
||||||
self.device = HuggingFaceVlmModel.map_device_to_cpu_if_mlx(self.device)
|
accelerator_options.device,
|
||||||
|
supported_devices=vlm_options.supported_devices,
|
||||||
|
)
|
||||||
_log.debug(f"Available device for VLM: {self.device}")
|
_log.debug(f"Available device for VLM: {self.device}")
|
||||||
|
|
||||||
self.use_cache = vlm_options.use_kv_cache
|
self.use_cache = vlm_options.use_kv_cache
|
||||||
|
@ -4,11 +4,11 @@ from collections.abc import Iterable
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from docling.datamodel.base_models import Page, VlmPrediction
|
from docling.datamodel.accelerator_options import (
|
||||||
from docling.datamodel.document import ConversionResult
|
|
||||||
from docling.datamodel.pipeline_options import (
|
|
||||||
AcceleratorOptions,
|
AcceleratorOptions,
|
||||||
)
|
)
|
||||||
|
from docling.datamodel.base_models import Page, VlmPrediction
|
||||||
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
|
from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.models.hf_vlm_model import HuggingFaceVlmModel
|
from docling.models.hf_vlm_model import HuggingFaceVlmModel
|
||||||
@ -38,9 +38,11 @@ class HuggingFaceVlmModel_AutoModelForVision2Seq(BasePageModel):
|
|||||||
BitsAndBytesConfig,
|
BitsAndBytesConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.device = decide_device(accelerator_options.device)
|
self.device = decide_device(
|
||||||
self.device = HuggingFaceVlmModel.map_device_to_cpu_if_mlx(self.device)
|
accelerator_options.device,
|
||||||
_log.debug(f"Available device for HuggingFace VLM: {self.device}")
|
supported_devices=vlm_options.supported_devices,
|
||||||
|
)
|
||||||
|
_log.debug(f"Available device for VLM: {self.device}")
|
||||||
|
|
||||||
self.use_cache = vlm_options.use_kv_cache
|
self.use_cache = vlm_options.use_kv_cache
|
||||||
self.max_new_tokens = vlm_options.max_new_tokens
|
self.max_new_tokens = vlm_options.max_new_tokens
|
||||||
|
@ -4,11 +4,11 @@ from collections.abc import Iterable
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from docling.datamodel.base_models import Page, VlmPrediction, VlmPredictionToken
|
from docling.datamodel.accelerator_options import (
|
||||||
from docling.datamodel.document import ConversionResult
|
|
||||||
from docling.datamodel.pipeline_options import (
|
|
||||||
AcceleratorOptions,
|
AcceleratorOptions,
|
||||||
)
|
)
|
||||||
|
from docling.datamodel.base_models import Page, VlmPrediction, VlmPredictionToken
|
||||||
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
|
from docling.datamodel.pipeline_options_vlm_model import InlineVlmOptions
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.models.hf_vlm_model import HuggingFaceVlmModel
|
from docling.models.hf_vlm_model import HuggingFaceVlmModel
|
||||||
|
@ -1,13 +1,16 @@
|
|||||||
import logging
|
import logging
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options import AcceleratorDevice
|
from docling.datamodel.accelerator_options import AcceleratorDevice
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def decide_device(accelerator_device: str) -> str:
|
def decide_device(
|
||||||
|
accelerator_device: str, supported_devices: Optional[List[AcceleratorDevice]] = None
|
||||||
|
) -> str:
|
||||||
r"""
|
r"""
|
||||||
Resolve the device based on the acceleration options and the available devices in the system.
|
Resolve the device based on the acceleration options and the available devices in the system.
|
||||||
|
|
||||||
@ -20,6 +23,18 @@ def decide_device(accelerator_device: str) -> str:
|
|||||||
has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
|
has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
|
||||||
has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
|
has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
|
||||||
|
|
||||||
|
if supported_devices is not None:
|
||||||
|
if has_cuda and AcceleratorDevice.CUDA not in supported_devices:
|
||||||
|
_log.info(
|
||||||
|
f"Removing CUDA from available devices because it is not in {supported_devices=}"
|
||||||
|
)
|
||||||
|
has_cuda = False
|
||||||
|
if has_mps and AcceleratorDevice.MPS not in supported_devices:
|
||||||
|
_log.info(
|
||||||
|
f"Removing MPS from available devices because it is not in {supported_devices=}"
|
||||||
|
)
|
||||||
|
has_mps = False
|
||||||
|
|
||||||
if accelerator_device == AcceleratorDevice.AUTO.value: # Handle 'auto'
|
if accelerator_device == AcceleratorDevice.AUTO.value: # Handle 'auto'
|
||||||
if has_cuda:
|
if has_cuda:
|
||||||
device = "cuda:0"
|
device = "cuda:0"
|
||||||
|
@ -3,10 +3,9 @@ import logging
|
|||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorDevice,
|
|
||||||
AcceleratorOptions,
|
|
||||||
PdfPipelineOptions,
|
PdfPipelineOptions,
|
||||||
)
|
)
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorDevice,
|
|
||||||
AcceleratorOptions,
|
|
||||||
PdfPipelineOptions,
|
PdfPipelineOptions,
|
||||||
)
|
)
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import AcceleratorDevice, PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
|
|
||||||
from .test_data_gen_flag import GEN_TEST_DATA
|
from .test_data_gen_flag import GEN_TEST_DATA
|
||||||
|
@ -3,10 +3,10 @@ from pathlib import Path
|
|||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorDevice,
|
|
||||||
EasyOcrOptions,
|
EasyOcrOptions,
|
||||||
OcrMacOptions,
|
OcrMacOptions,
|
||||||
OcrOptions,
|
OcrOptions,
|
||||||
|
@ -7,11 +7,10 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
|||||||
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
||||||
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
|
||||||
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
||||||
|
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
|
||||||
from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
|
from docling.datamodel.base_models import ConversionStatus, InputFormat, QualityGrade
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
AcceleratorDevice,
|
|
||||||
AcceleratorOptions,
|
|
||||||
PdfPipelineOptions,
|
PdfPipelineOptions,
|
||||||
TableFormerMode,
|
TableFormerMode,
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user