mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
add GlmOptions
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
786b89efd9
commit
94b5e1532d
@ -14,23 +14,24 @@ from docling_core.types import Ref
|
|||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.experimental.document import DoclingDocument
|
from docling_core.types.experimental.document import DoclingDocument
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from docling.datamodel.base_models import Cluster
|
from docling.datamodel.base_models import Cluster
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
|
|
||||||
|
|
||||||
class GlmModel:
|
class GlmOptions(BaseModel):
|
||||||
def __init__(self, config):
|
create_legacy_output: bool = True
|
||||||
self.config = config
|
model_names: str = "" # e.g. "language;term;reference"
|
||||||
self.create_legacy_output = config.get("create_legacy_output", True)
|
|
||||||
|
|
||||||
|
class GlmModel:
|
||||||
|
def __init__(self, options: GlmOptions):
|
||||||
|
self.options = options
|
||||||
|
self.create_legacy_output = self.options.create_legacy_output
|
||||||
|
|
||||||
self.model_names = self.config.get(
|
|
||||||
"model_names", ""
|
|
||||||
) # "language;term;reference"
|
|
||||||
load_pretrained_nlp_models()
|
load_pretrained_nlp_models()
|
||||||
# model = init_nlp_model(model_names="language;term;reference")
|
self.model = init_nlp_model(model_names=self.options.model_names)
|
||||||
model = init_nlp_model(model_names=self.model_names)
|
|
||||||
self.model = model
|
|
||||||
|
|
||||||
def __call__(
|
def __call__(
|
||||||
self, conv_res: ConversionResult
|
self, conv_res: ConversionResult
|
||||||
|
@ -13,7 +13,7 @@ from docling.datamodel.pipeline_options import (
|
|||||||
TesseractOcrOptions,
|
TesseractOcrOptions,
|
||||||
)
|
)
|
||||||
from docling.models.base_ocr_model import BaseOcrModel
|
from docling.models.base_ocr_model import BaseOcrModel
|
||||||
from docling.models.ds_glm_model import GlmModel
|
from docling.models.ds_glm_model import GlmModel, GlmOptions
|
||||||
from docling.models.easyocr_model import EasyOcrModel
|
from docling.models.easyocr_model import EasyOcrModel
|
||||||
from docling.models.layout_model import LayoutModel
|
from docling.models.layout_model import LayoutModel
|
||||||
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
|
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
|
||||||
@ -42,7 +42,9 @@ class StandardPdfModelPipeline(PaginatedModelPipeline):
|
|||||||
|
|
||||||
self.artifacts_path = Path(artifacts_path)
|
self.artifacts_path = Path(artifacts_path)
|
||||||
self.glm_model = GlmModel(
|
self.glm_model = GlmModel(
|
||||||
config={"create_legacy_output": pipeline_options.create_legacy_output}
|
options=GlmOptions(
|
||||||
|
create_legacy_output=pipeline_options.create_legacy_output
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if ocr_model := self.get_ocr_model() is None:
|
if ocr_model := self.get_ocr_model() is None:
|
||||||
|
Loading…
Reference in New Issue
Block a user